errors.h (warning, [...]): Mark as cold.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94
95 /* The infinite cost. */
96 #define INFTY 10000000
97
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
101
102
103 /* Representation of the induction variable. */
104 struct iv
105 {
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
113 };
114
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
117 {
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 };
125
126 /* Types of uses. */
127 enum use_type
128 {
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
132 };
133
134 /* The candidate - cost pair. */
135 struct cost_pair
136 {
137 struct iv_cand *cand; /* The candidate. */
138 unsigned cost; /* The cost. */
139 bitmap depends_on; /* The list of invariants that have to be
140 preserved. */
141 tree value; /* For final value elimination, the expression for
142 the final value of the iv. For iv elimination,
143 the new bound to compare with. */
144 };
145
146 /* Use. */
147 struct iv_use
148 {
149 unsigned id; /* The id of the use. */
150 enum use_type type; /* Type of the use. */
151 struct iv *iv; /* The induction variable it is based on. */
152 tree stmt; /* Statement in that it occurs. */
153 tree *op_p; /* The place where it occurs. */
154 bitmap related_cands; /* The set of "related" iv candidates, plus the common
155 important ones. */
156
157 unsigned n_map_members; /* Number of candidates in the cost_map list. */
158 struct cost_pair *cost_map;
159 /* The costs wrto the iv candidates. */
160
161 struct iv_cand *selected;
162 /* The selected candidate. */
163 };
164
165 /* The position where the iv is computed. */
166 enum iv_position
167 {
168 IP_NORMAL, /* At the end, just before the exit condition. */
169 IP_END, /* At the end of the latch block. */
170 IP_ORIGINAL /* The original biv. */
171 };
172
173 /* The induction variable candidate. */
174 struct iv_cand
175 {
176 unsigned id; /* The number of the candidate. */
177 bool important; /* Whether this is an "important" candidate, i.e. such
178 that it should be considered by all uses. */
179 enum iv_position pos; /* Where it is computed. */
180 tree incremented_at; /* For original biv, the statement where it is
181 incremented. */
182 tree var_before; /* The variable used for it before increment. */
183 tree var_after; /* The variable used for it after increment. */
184 struct iv *iv; /* The value of the candidate. NULL for
185 "pseudocandidate" used to indicate the possibility
186 to replace the final value of an iv by direct
187 computation of the value. */
188 unsigned cost; /* Cost of the candidate. */
189 bitmap depends_on; /* The list of invariants that are used in step of the
190 biv. */
191 };
192
193 /* The data used by the induction variable optimizations. */
194
195 typedef struct iv_use *iv_use_p;
196 DEF_VEC_P(iv_use_p);
197 DEF_VEC_ALLOC_P(iv_use_p,heap);
198
199 typedef struct iv_cand *iv_cand_p;
200 DEF_VEC_P(iv_cand_p);
201 DEF_VEC_ALLOC_P(iv_cand_p,heap);
202
203 struct ivopts_data
204 {
205 /* The currently optimized loop. */
206 struct loop *current_loop;
207
208 /* Number of registers used in it. */
209 unsigned regs_used;
210
211 /* Numbers of iterations for all exits of the current loop. */
212 struct pointer_map_t *niters;
213
214 /* The size of version_info array allocated. */
215 unsigned version_info_size;
216
217 /* The array of information for the ssa names. */
218 struct version_info *version_info;
219
220 /* The bitmap of indices in version_info whose value was changed. */
221 bitmap relevant;
222
223 /* The maximum invariant id. */
224 unsigned max_inv_id;
225
226 /* The uses of induction variables. */
227 VEC(iv_use_p,heap) *iv_uses;
228
229 /* The candidates. */
230 VEC(iv_cand_p,heap) *iv_candidates;
231
232 /* A bitmap of important candidates. */
233 bitmap important_candidates;
234
235 /* Whether to consider just related and important candidates when replacing a
236 use. */
237 bool consider_all_candidates;
238 };
239
240 /* An assignment of iv candidates to uses. */
241
242 struct iv_ca
243 {
244 /* The number of uses covered by the assignment. */
245 unsigned upto;
246
247 /* Number of uses that cannot be expressed by the candidates in the set. */
248 unsigned bad_uses;
249
250 /* Candidate assigned to a use, together with the related costs. */
251 struct cost_pair **cand_for_use;
252
253 /* Number of times each candidate is used. */
254 unsigned *n_cand_uses;
255
256 /* The candidates used. */
257 bitmap cands;
258
259 /* The number of candidates in the set. */
260 unsigned n_cands;
261
262 /* Total number of registers needed. */
263 unsigned n_regs;
264
265 /* Total cost of expressing uses. */
266 unsigned cand_use_cost;
267
268 /* Total cost of candidates. */
269 unsigned cand_cost;
270
271 /* Number of times each invariant is used. */
272 unsigned *n_invariant_uses;
273
274 /* Total cost of the assignment. */
275 unsigned cost;
276 };
277
278 /* Difference of two iv candidate assignments. */
279
280 struct iv_ca_delta
281 {
282 /* Changed use. */
283 struct iv_use *use;
284
285 /* An old assignment (for rollback purposes). */
286 struct cost_pair *old_cp;
287
288 /* A new assignment. */
289 struct cost_pair *new_cp;
290
291 /* Next change in the list. */
292 struct iv_ca_delta *next_change;
293 };
294
295 /* Bound on number of candidates below that all candidates are considered. */
296
297 #define CONSIDER_ALL_CANDIDATES_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299
300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
301 optimizing such a loop would help, and it would take ages). */
302
303 #define MAX_CONSIDERED_USES \
304 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305
306 /* If there are at most this number of ivs in the set, try removing unnecessary
307 ivs from the set always. */
308
309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
310 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311
312 /* The list of trees for that the decl_rtl field must be reset is stored
313 here. */
314
315 static VEC(tree,heap) *decl_rtl_to_reset;
316
317 /* Number of uses recorded in DATA. */
318
319 static inline unsigned
320 n_iv_uses (struct ivopts_data *data)
321 {
322 return VEC_length (iv_use_p, data->iv_uses);
323 }
324
325 /* Ith use recorded in DATA. */
326
327 static inline struct iv_use *
328 iv_use (struct ivopts_data *data, unsigned i)
329 {
330 return VEC_index (iv_use_p, data->iv_uses, i);
331 }
332
333 /* Number of candidates recorded in DATA. */
334
335 static inline unsigned
336 n_iv_cands (struct ivopts_data *data)
337 {
338 return VEC_length (iv_cand_p, data->iv_candidates);
339 }
340
341 /* Ith candidate recorded in DATA. */
342
343 static inline struct iv_cand *
344 iv_cand (struct ivopts_data *data, unsigned i)
345 {
346 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 }
348
349 /* The single loop exit if it dominates the latch, NULL otherwise. */
350
351 edge
352 single_dom_exit (struct loop *loop)
353 {
354 edge exit = single_exit (loop);
355
356 if (!exit)
357 return NULL;
358
359 if (!just_once_each_iteration_p (loop, exit->src))
360 return NULL;
361
362 return exit;
363 }
364
365 /* Dumps information about the induction variable IV to FILE. */
366
367 extern void dump_iv (FILE *, struct iv *);
368 void
369 dump_iv (FILE *file, struct iv *iv)
370 {
371 if (iv->ssa_name)
372 {
373 fprintf (file, "ssa name ");
374 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
375 fprintf (file, "\n");
376 }
377
378 fprintf (file, " type ");
379 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
380 fprintf (file, "\n");
381
382 if (iv->step)
383 {
384 fprintf (file, " base ");
385 print_generic_expr (file, iv->base, TDF_SLIM);
386 fprintf (file, "\n");
387
388 fprintf (file, " step ");
389 print_generic_expr (file, iv->step, TDF_SLIM);
390 fprintf (file, "\n");
391 }
392 else
393 {
394 fprintf (file, " invariant ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397 }
398
399 if (iv->base_object)
400 {
401 fprintf (file, " base object ");
402 print_generic_expr (file, iv->base_object, TDF_SLIM);
403 fprintf (file, "\n");
404 }
405
406 if (iv->biv_p)
407 fprintf (file, " is a biv\n");
408 }
409
410 /* Dumps information about the USE to FILE. */
411
412 extern void dump_use (FILE *, struct iv_use *);
413 void
414 dump_use (FILE *file, struct iv_use *use)
415 {
416 fprintf (file, "use %d\n", use->id);
417
418 switch (use->type)
419 {
420 case USE_NONLINEAR_EXPR:
421 fprintf (file, " generic\n");
422 break;
423
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
427
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
431
432 default:
433 gcc_unreachable ();
434 }
435
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
439
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
444
445 dump_iv (file, use->iv);
446
447 if (use->related_cands)
448 {
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
451 }
452 }
453
454 /* Dumps information about the uses to FILE. */
455
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
459 {
460 unsigned i;
461 struct iv_use *use;
462
463 for (i = 0; i < n_iv_uses (data); i++)
464 {
465 use = iv_use (data, i);
466
467 dump_use (file, use);
468 fprintf (file, "\n");
469 }
470 }
471
472 /* Dumps information about induction variable candidate CAND to FILE. */
473
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
477 {
478 struct iv *iv = cand->iv;
479
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
482
483 if (cand->depends_on)
484 {
485 fprintf (file, " depends on ");
486 dump_bitmap (file, cand->depends_on);
487 }
488
489 if (!iv)
490 {
491 fprintf (file, " final value replacement\n");
492 return;
493 }
494
495 switch (cand->pos)
496 {
497 case IP_NORMAL:
498 fprintf (file, " incremented before exit test\n");
499 break;
500
501 case IP_END:
502 fprintf (file, " incremented at end\n");
503 break;
504
505 case IP_ORIGINAL:
506 fprintf (file, " original biv\n");
507 break;
508 }
509
510 dump_iv (file, iv);
511 }
512
513 /* Returns the info for ssa version VER. */
514
515 static inline struct version_info *
516 ver_info (struct ivopts_data *data, unsigned ver)
517 {
518 return data->version_info + ver;
519 }
520
521 /* Returns the info for ssa name NAME. */
522
523 static inline struct version_info *
524 name_info (struct ivopts_data *data, tree name)
525 {
526 return ver_info (data, SSA_NAME_VERSION (name));
527 }
528
529 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
530 emitted in LOOP. */
531
532 static bool
533 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
534 {
535 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
536
537 gcc_assert (bb);
538
539 if (sbb == loop->latch)
540 return true;
541
542 if (sbb != bb)
543 return false;
544
545 return stmt == last_stmt (bb);
546 }
547
548 /* Returns true if STMT if after the place where the original induction
549 variable CAND is incremented. */
550
551 static bool
552 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 {
554 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
555 basic_block stmt_bb = bb_for_stmt (stmt);
556 block_stmt_iterator bsi;
557
558 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
559 return false;
560
561 if (stmt_bb != cand_bb)
562 return true;
563
564 /* Scan the block from the end, since the original ivs are usually
565 incremented at the end of the loop body. */
566 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 {
568 if (bsi_stmt (bsi) == cand->incremented_at)
569 return false;
570 if (bsi_stmt (bsi) == stmt)
571 return true;
572 }
573 }
574
575 /* Returns true if STMT if after the place where the induction variable
576 CAND is incremented in LOOP. */
577
578 static bool
579 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
580 {
581 switch (cand->pos)
582 {
583 case IP_END:
584 return false;
585
586 case IP_NORMAL:
587 return stmt_after_ip_normal_pos (loop, stmt);
588
589 case IP_ORIGINAL:
590 return stmt_after_ip_original_pos (cand, stmt);
591
592 default:
593 gcc_unreachable ();
594 }
595 }
596
597 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
598
599 static bool
600 abnormal_ssa_name_p (tree exp)
601 {
602 if (!exp)
603 return false;
604
605 if (TREE_CODE (exp) != SSA_NAME)
606 return false;
607
608 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
609 }
610
611 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
612 abnormal phi node. Callback for for_each_index. */
613
614 static bool
615 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
616 void *data ATTRIBUTE_UNUSED)
617 {
618 if (TREE_CODE (base) == ARRAY_REF)
619 {
620 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
621 return false;
622 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
623 return false;
624 }
625
626 return !abnormal_ssa_name_p (*index);
627 }
628
629 /* Returns true if EXPR contains a ssa name that occurs in an
630 abnormal phi node. */
631
632 bool
633 contains_abnormal_ssa_name_p (tree expr)
634 {
635 enum tree_code code;
636 enum tree_code_class class;
637
638 if (!expr)
639 return false;
640
641 code = TREE_CODE (expr);
642 class = TREE_CODE_CLASS (code);
643
644 if (code == SSA_NAME)
645 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
646
647 if (code == INTEGER_CST
648 || is_gimple_min_invariant (expr))
649 return false;
650
651 if (code == ADDR_EXPR)
652 return !for_each_index (&TREE_OPERAND (expr, 0),
653 idx_contains_abnormal_ssa_name_p,
654 NULL);
655
656 switch (class)
657 {
658 case tcc_binary:
659 case tcc_comparison:
660 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
661 return true;
662
663 /* Fallthru. */
664 case tcc_unary:
665 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
666 return true;
667
668 break;
669
670 default:
671 gcc_unreachable ();
672 }
673
674 return false;
675 }
676
677 /* Returns tree describing number of iterations determined from
678 EXIT of DATA->current_loop, or NULL if something goes wrong. */
679
680 static tree
681 niter_for_exit (struct ivopts_data *data, edge exit)
682 {
683 struct tree_niter_desc desc;
684 tree niter;
685 void **slot;
686
687 if (!data->niters)
688 {
689 data->niters = pointer_map_create ();
690 slot = NULL;
691 }
692 else
693 slot = pointer_map_contains (data->niters, exit);
694
695 if (!slot)
696 {
697 /* Try to determine number of iterations. We must know it
698 unconditionally (i.e., without possibility of # of iterations
699 being zero). Also, we cannot safely work with ssa names that
700 appear in phi nodes on abnormal edges, so that we do not create
701 overlapping life ranges for them (PR 27283). */
702 if (number_of_iterations_exit (data->current_loop,
703 exit, &desc, true)
704 && integer_zerop (desc.may_be_zero)
705 && !contains_abnormal_ssa_name_p (desc.niter))
706 niter = desc.niter;
707 else
708 niter = NULL_TREE;
709
710 *pointer_map_insert (data->niters, exit) = niter;
711 }
712 else
713 niter = *slot;
714
715 return niter;
716 }
717
718 /* Returns tree describing number of iterations determined from
719 single dominating exit of DATA->current_loop, or NULL if something
720 goes wrong. */
721
722 static tree
723 niter_for_single_dom_exit (struct ivopts_data *data)
724 {
725 edge exit = single_dom_exit (data->current_loop);
726
727 if (!exit)
728 return NULL;
729
730 return niter_for_exit (data, exit);
731 }
732
733 /* Initializes data structures used by the iv optimization pass, stored
734 in DATA. */
735
736 static void
737 tree_ssa_iv_optimize_init (struct ivopts_data *data)
738 {
739 data->version_info_size = 2 * num_ssa_names;
740 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
741 data->relevant = BITMAP_ALLOC (NULL);
742 data->important_candidates = BITMAP_ALLOC (NULL);
743 data->max_inv_id = 0;
744 data->niters = NULL;
745 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
746 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
747 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
748 }
749
750 /* Returns a memory object to that EXPR points. In case we are able to
751 determine that it does not point to any such object, NULL is returned. */
752
753 static tree
754 determine_base_object (tree expr)
755 {
756 enum tree_code code = TREE_CODE (expr);
757 tree base, obj, op0, op1;
758
759 /* If this is a pointer casted to any type, we need to determine
760 the base object for the pointer; so handle conversions before
761 throwing away non-pointer expressions. */
762 if (TREE_CODE (expr) == NOP_EXPR
763 || TREE_CODE (expr) == CONVERT_EXPR)
764 return determine_base_object (TREE_OPERAND (expr, 0));
765
766 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
767 return NULL_TREE;
768
769 switch (code)
770 {
771 case INTEGER_CST:
772 return NULL_TREE;
773
774 case ADDR_EXPR:
775 obj = TREE_OPERAND (expr, 0);
776 base = get_base_address (obj);
777
778 if (!base)
779 return expr;
780
781 if (TREE_CODE (base) == INDIRECT_REF)
782 return determine_base_object (TREE_OPERAND (base, 0));
783
784 return fold_convert (ptr_type_node,
785 build_fold_addr_expr (base));
786
787 case PLUS_EXPR:
788 case MINUS_EXPR:
789 op0 = determine_base_object (TREE_OPERAND (expr, 0));
790 op1 = determine_base_object (TREE_OPERAND (expr, 1));
791
792 if (!op1)
793 return op0;
794
795 if (!op0)
796 return (code == PLUS_EXPR
797 ? op1
798 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
799
800 return fold_build2 (code, ptr_type_node, op0, op1);
801
802 default:
803 return fold_convert (ptr_type_node, expr);
804 }
805 }
806
807 /* Allocates an induction variable with given initial value BASE and step STEP
808 for loop LOOP. */
809
810 static struct iv *
811 alloc_iv (tree base, tree step)
812 {
813 struct iv *iv = XCNEW (struct iv);
814 gcc_assert (step != NULL_TREE);
815
816 iv->base = base;
817 iv->base_object = determine_base_object (base);
818 iv->step = step;
819 iv->biv_p = false;
820 iv->have_use_for = false;
821 iv->use_id = 0;
822 iv->ssa_name = NULL_TREE;
823
824 return iv;
825 }
826
827 /* Sets STEP and BASE for induction variable IV. */
828
829 static void
830 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
831 {
832 struct version_info *info = name_info (data, iv);
833
834 gcc_assert (!info->iv);
835
836 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
837 info->iv = alloc_iv (base, step);
838 info->iv->ssa_name = iv;
839 }
840
841 /* Finds induction variable declaration for VAR. */
842
843 static struct iv *
844 get_iv (struct ivopts_data *data, tree var)
845 {
846 basic_block bb;
847 tree type = TREE_TYPE (var);
848
849 if (!POINTER_TYPE_P (type)
850 && !INTEGRAL_TYPE_P (type))
851 return NULL;
852
853 if (!name_info (data, var)->iv)
854 {
855 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
856
857 if (!bb
858 || !flow_bb_inside_loop_p (data->current_loop, bb))
859 set_iv (data, var, var, build_int_cst (type, 0));
860 }
861
862 return name_info (data, var)->iv;
863 }
864
865 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
866 not define a simple affine biv with nonzero step. */
867
868 static tree
869 determine_biv_step (tree phi)
870 {
871 struct loop *loop = bb_for_stmt (phi)->loop_father;
872 tree name = PHI_RESULT (phi);
873 affine_iv iv;
874
875 if (!is_gimple_reg (name))
876 return NULL_TREE;
877
878 if (!simple_iv (loop, phi, name, &iv, true))
879 return NULL_TREE;
880
881 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
882 }
883
884 /* Finds basic ivs. */
885
886 static bool
887 find_bivs (struct ivopts_data *data)
888 {
889 tree phi, step, type, base;
890 bool found = false;
891 struct loop *loop = data->current_loop;
892
893 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
894 {
895 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
896 continue;
897
898 step = determine_biv_step (phi);
899 if (!step)
900 continue;
901
902 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
903 base = expand_simple_operations (base);
904 if (contains_abnormal_ssa_name_p (base)
905 || contains_abnormal_ssa_name_p (step))
906 continue;
907
908 type = TREE_TYPE (PHI_RESULT (phi));
909 base = fold_convert (type, base);
910 if (step)
911 step = fold_convert (type, step);
912
913 set_iv (data, PHI_RESULT (phi), base, step);
914 found = true;
915 }
916
917 return found;
918 }
919
920 /* Marks basic ivs. */
921
922 static void
923 mark_bivs (struct ivopts_data *data)
924 {
925 tree phi, var;
926 struct iv *iv, *incr_iv;
927 struct loop *loop = data->current_loop;
928 basic_block incr_bb;
929
930 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
931 {
932 iv = get_iv (data, PHI_RESULT (phi));
933 if (!iv)
934 continue;
935
936 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
937 incr_iv = get_iv (data, var);
938 if (!incr_iv)
939 continue;
940
941 /* If the increment is in the subloop, ignore it. */
942 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
943 if (incr_bb->loop_father != data->current_loop
944 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
945 continue;
946
947 iv->biv_p = true;
948 incr_iv->biv_p = true;
949 }
950 }
951
952 /* Checks whether STMT defines a linear induction variable and stores its
953 parameters to IV. */
954
955 static bool
956 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
957 {
958 tree lhs;
959 struct loop *loop = data->current_loop;
960
961 iv->base = NULL_TREE;
962 iv->step = NULL_TREE;
963
964 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
965 return false;
966
967 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
968 if (TREE_CODE (lhs) != SSA_NAME)
969 return false;
970
971 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
972 return false;
973 iv->base = expand_simple_operations (iv->base);
974
975 if (contains_abnormal_ssa_name_p (iv->base)
976 || contains_abnormal_ssa_name_p (iv->step))
977 return false;
978
979 return true;
980 }
981
982 /* Finds general ivs in statement STMT. */
983
984 static void
985 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
986 {
987 affine_iv iv;
988
989 if (!find_givs_in_stmt_scev (data, stmt, &iv))
990 return;
991
992 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
993 }
994
995 /* Finds general ivs in basic block BB. */
996
997 static void
998 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
999 {
1000 block_stmt_iterator bsi;
1001
1002 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1003 find_givs_in_stmt (data, bsi_stmt (bsi));
1004 }
1005
1006 /* Finds general ivs. */
1007
1008 static void
1009 find_givs (struct ivopts_data *data)
1010 {
1011 struct loop *loop = data->current_loop;
1012 basic_block *body = get_loop_body_in_dom_order (loop);
1013 unsigned i;
1014
1015 for (i = 0; i < loop->num_nodes; i++)
1016 find_givs_in_bb (data, body[i]);
1017 free (body);
1018 }
1019
1020 /* For each ssa name defined in LOOP determines whether it is an induction
1021 variable and if so, its initial value and step. */
1022
1023 static bool
1024 find_induction_variables (struct ivopts_data *data)
1025 {
1026 unsigned i;
1027 bitmap_iterator bi;
1028
1029 if (!find_bivs (data))
1030 return false;
1031
1032 find_givs (data);
1033 mark_bivs (data);
1034
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1036 {
1037 tree niter = niter_for_single_dom_exit (data);
1038
1039 if (niter)
1040 {
1041 fprintf (dump_file, " number of iterations ");
1042 print_generic_expr (dump_file, niter, TDF_SLIM);
1043 fprintf (dump_file, "\n\n");
1044 };
1045
1046 fprintf (dump_file, "Induction variables:\n\n");
1047
1048 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1049 {
1050 if (ver_info (data, i)->iv)
1051 dump_iv (dump_file, ver_info (data, i)->iv);
1052 }
1053 }
1054
1055 return true;
1056 }
1057
1058 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1059
1060 static struct iv_use *
1061 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1062 tree stmt, enum use_type use_type)
1063 {
1064 struct iv_use *use = XCNEW (struct iv_use);
1065
1066 use->id = n_iv_uses (data);
1067 use->type = use_type;
1068 use->iv = iv;
1069 use->stmt = stmt;
1070 use->op_p = use_p;
1071 use->related_cands = BITMAP_ALLOC (NULL);
1072
1073 /* To avoid showing ssa name in the dumps, if it was not reset by the
1074 caller. */
1075 iv->ssa_name = NULL_TREE;
1076
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 dump_use (dump_file, use);
1079
1080 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1081
1082 return use;
1083 }
1084
1085 /* Checks whether OP is a loop-level invariant and if so, records it.
1086 NONLINEAR_USE is true if the invariant is used in a way we do not
1087 handle specially. */
1088
1089 static void
1090 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1091 {
1092 basic_block bb;
1093 struct version_info *info;
1094
1095 if (TREE_CODE (op) != SSA_NAME
1096 || !is_gimple_reg (op))
1097 return;
1098
1099 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1100 if (bb
1101 && flow_bb_inside_loop_p (data->current_loop, bb))
1102 return;
1103
1104 info = name_info (data, op);
1105 info->name = op;
1106 info->has_nonlin_use |= nonlinear_use;
1107 if (!info->inv_id)
1108 info->inv_id = ++data->max_inv_id;
1109 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1110 }
1111
1112 /* Checks whether the use OP is interesting and if so, records it. */
1113
1114 static struct iv_use *
1115 find_interesting_uses_op (struct ivopts_data *data, tree op)
1116 {
1117 struct iv *iv;
1118 struct iv *civ;
1119 tree stmt;
1120 struct iv_use *use;
1121
1122 if (TREE_CODE (op) != SSA_NAME)
1123 return NULL;
1124
1125 iv = get_iv (data, op);
1126 if (!iv)
1127 return NULL;
1128
1129 if (iv->have_use_for)
1130 {
1131 use = iv_use (data, iv->use_id);
1132
1133 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1134 return use;
1135 }
1136
1137 if (integer_zerop (iv->step))
1138 {
1139 record_invariant (data, op, true);
1140 return NULL;
1141 }
1142 iv->have_use_for = true;
1143
1144 civ = XNEW (struct iv);
1145 *civ = *iv;
1146
1147 stmt = SSA_NAME_DEF_STMT (op);
1148 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1149 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1150
1151 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1152 iv->use_id = use->id;
1153
1154 return use;
1155 }
1156
1157 /* Given a condition *COND_P, checks whether it is a compare of an induction
1158 variable and an invariant. If this is the case, CONTROL_VAR is set
1159 to location of the iv, BOUND to the location of the invariant,
1160 IV_VAR and IV_BOUND are set to the corresponding induction variable
1161 descriptions, and true is returned. If this is not the case,
1162 CONTROL_VAR and BOUND are set to the arguments of the condition and
1163 false is returned. */
1164
1165 static bool
1166 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1167 tree **control_var, tree **bound,
1168 struct iv **iv_var, struct iv **iv_bound)
1169 {
1170 /* The nodes returned when COND has just one operand. Note that you should
1171 not modify anything in BOUND or IV_BOUND because of this. */
1172 static struct iv const_iv;
1173 static tree zero;
1174 tree cond = *cond_p;
1175 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1176 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1177 bool ret = false;
1178
1179 zero = integer_zero_node;
1180 const_iv.step = integer_zero_node;
1181
1182 if (TREE_CODE (cond) == SSA_NAME)
1183 {
1184 op0 = cond_p;
1185 iv0 = get_iv (data, cond);
1186 ret = (iv0 && !integer_zerop (iv0->step));
1187 goto end;
1188 }
1189
1190 if (!COMPARISON_CLASS_P (cond))
1191 {
1192 op0 = cond_p;
1193 goto end;
1194 }
1195
1196 op0 = &TREE_OPERAND (cond, 0);
1197 op1 = &TREE_OPERAND (cond, 1);
1198 if (TREE_CODE (*op0) == SSA_NAME)
1199 iv0 = get_iv (data, *op0);
1200 if (TREE_CODE (*op1) == SSA_NAME)
1201 iv1 = get_iv (data, *op1);
1202
1203 /* Exactly one of the compared values must be an iv, and the other one must
1204 be an invariant. */
1205 if (!iv0 || !iv1)
1206 goto end;
1207
1208 if (integer_zerop (iv0->step))
1209 {
1210 /* Control variable may be on the other side. */
1211 tmp_op = op0; op0 = op1; op1 = tmp_op;
1212 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1213 }
1214 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1215
1216 end:
1217 if (control_var)
1218 *control_var = op0;;
1219 if (iv_var)
1220 *iv_var = iv0;;
1221 if (bound)
1222 *bound = op1;
1223 if (iv_bound)
1224 *iv_bound = iv1;
1225
1226 return ret;
1227 }
1228
1229 /* Checks whether the condition *COND_P in STMT is interesting
1230 and if so, records it. */
1231
1232 static void
1233 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1234 {
1235 tree *var_p, *bound_p;
1236 struct iv *var_iv, *civ;
1237
1238 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1239 {
1240 find_interesting_uses_op (data, *var_p);
1241 find_interesting_uses_op (data, *bound_p);
1242 return;
1243 }
1244
1245 civ = XNEW (struct iv);
1246 *civ = *var_iv;
1247 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1248 }
1249
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251 i.e. if all its operands are defined outside of the LOOP. */
1252
1253 bool
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1255 {
1256 basic_block def_bb;
1257 unsigned i, len;
1258
1259 if (is_gimple_min_invariant (expr))
1260 return true;
1261
1262 if (TREE_CODE (expr) == SSA_NAME)
1263 {
1264 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1265 if (def_bb
1266 && flow_bb_inside_loop_p (loop, def_bb))
1267 return false;
1268
1269 return true;
1270 }
1271
1272 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1273 return false;
1274
1275 len = TREE_OPERAND_LENGTH (expr);
1276 for (i = 0; i < len; i++)
1277 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278 return false;
1279
1280 return true;
1281 }
1282
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284 initial ones. Returns false when the value of the index cannot be determined.
1285 Callback for for_each_index. */
1286
1287 struct ifs_ivopts_data
1288 {
1289 struct ivopts_data *ivopts_data;
1290 tree stmt;
1291 tree step;
1292 };
1293
1294 static bool
1295 idx_find_step (tree base, tree *idx, void *data)
1296 {
1297 struct ifs_ivopts_data *dta = data;
1298 struct iv *iv;
1299 tree step, iv_base, iv_step, lbound, off;
1300 struct loop *loop = dta->ivopts_data->current_loop;
1301
1302 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1304 return false;
1305
1306 /* If base is a component ref, require that the offset of the reference
1307 be invariant. */
1308 if (TREE_CODE (base) == COMPONENT_REF)
1309 {
1310 off = component_ref_field_offset (base);
1311 return expr_invariant_in_loop_p (loop, off);
1312 }
1313
1314 /* If base is array, first check whether we will be able to move the
1315 reference out of the loop (in order to take its address in strength
1316 reduction). In order for this to work we need both lower bound
1317 and step to be loop invariants. */
1318 if (TREE_CODE (base) == ARRAY_REF)
1319 {
1320 step = array_ref_element_size (base);
1321 lbound = array_ref_low_bound (base);
1322
1323 if (!expr_invariant_in_loop_p (loop, step)
1324 || !expr_invariant_in_loop_p (loop, lbound))
1325 return false;
1326 }
1327
1328 if (TREE_CODE (*idx) != SSA_NAME)
1329 return true;
1330
1331 iv = get_iv (dta->ivopts_data, *idx);
1332 if (!iv)
1333 return false;
1334
1335 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1336 *&x[0], which is not folded and does not trigger the
1337 ARRAY_REF path below. */
1338 *idx = iv->base;
1339
1340 if (integer_zerop (iv->step))
1341 return true;
1342
1343 if (TREE_CODE (base) == ARRAY_REF)
1344 {
1345 step = array_ref_element_size (base);
1346
1347 /* We only handle addresses whose step is an integer constant. */
1348 if (TREE_CODE (step) != INTEGER_CST)
1349 return false;
1350 }
1351 else
1352 /* The step for pointer arithmetics already is 1 byte. */
1353 step = build_int_cst (sizetype, 1);
1354
1355 iv_base = iv->base;
1356 iv_step = iv->step;
1357 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358 sizetype, &iv_base, &iv_step, dta->stmt,
1359 false))
1360 {
1361 /* The index might wrap. */
1362 return false;
1363 }
1364
1365 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1367
1368 return true;
1369 }
1370
1371 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1372 object is passed to it in DATA. */
1373
1374 static bool
1375 idx_record_use (tree base, tree *idx,
1376 void *data)
1377 {
1378 find_interesting_uses_op (data, *idx);
1379 if (TREE_CODE (base) == ARRAY_REF)
1380 {
1381 find_interesting_uses_op (data, array_ref_element_size (base));
1382 find_interesting_uses_op (data, array_ref_low_bound (base));
1383 }
1384 return true;
1385 }
1386
1387 /* Returns true if memory reference REF may be unaligned. */
1388
1389 static bool
1390 may_be_unaligned_p (tree ref)
1391 {
1392 tree base;
1393 tree base_type;
1394 HOST_WIDE_INT bitsize;
1395 HOST_WIDE_INT bitpos;
1396 tree toffset;
1397 enum machine_mode mode;
1398 int unsignedp, volatilep;
1399 unsigned base_align;
1400
1401 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402 thus they are not misaligned. */
1403 if (TREE_CODE (ref) == TARGET_MEM_REF)
1404 return false;
1405
1406 /* The test below is basically copy of what expr.c:normal_inner_ref
1407 does to check whether the object must be loaded by parts when
1408 STRICT_ALIGNMENT is true. */
1409 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410 &unsignedp, &volatilep, true);
1411 base_type = TREE_TYPE (base);
1412 base_align = TYPE_ALIGN (base_type);
1413
1414 if (mode != BLKmode
1415 && (base_align < GET_MODE_ALIGNMENT (mode)
1416 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417 || bitpos % BITS_PER_UNIT != 0))
1418 return true;
1419
1420 return false;
1421 }
1422
1423 /* Return true if EXPR may be non-addressable. */
1424
1425 static bool
1426 may_be_nonaddressable_p (tree expr)
1427 {
1428 switch (TREE_CODE (expr))
1429 {
1430 case COMPONENT_REF:
1431 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1433
1434 case ARRAY_REF:
1435 case ARRAY_RANGE_REF:
1436 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1437
1438 case VIEW_CONVERT_EXPR:
1439 /* This kind of view-conversions may wrap non-addressable objects
1440 and make them look addressable. After some processing the
1441 non-addressability may be uncovered again, causing ADDR_EXPRs
1442 of inappropriate objects to be built. */
1443 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1445
1446 default:
1447 break;
1448 }
1449
1450 return false;
1451 }
1452
1453 /* Finds addresses in *OP_P inside STMT. */
1454
1455 static void
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1457 {
1458 tree base = *op_p, step = build_int_cst (sizetype, 0);
1459 struct iv *civ;
1460 struct ifs_ivopts_data ifs_ivopts_data;
1461
1462 /* Do not play with volatile memory references. A bit too conservative,
1463 perhaps, but safe. */
1464 if (stmt_ann (stmt)->has_volatile_ops)
1465 goto fail;
1466
1467 /* Ignore bitfields for now. Not really something terribly complicated
1468 to handle. TODO. */
1469 if (TREE_CODE (base) == BIT_FIELD_REF)
1470 goto fail;
1471
1472 if (may_be_nonaddressable_p (base))
1473 goto fail;
1474
1475 if (STRICT_ALIGNMENT
1476 && may_be_unaligned_p (base))
1477 goto fail;
1478
1479 base = unshare_expr (base);
1480
1481 if (TREE_CODE (base) == TARGET_MEM_REF)
1482 {
1483 tree type = build_pointer_type (TREE_TYPE (base));
1484 tree astep;
1485
1486 if (TMR_BASE (base)
1487 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1488 {
1489 civ = get_iv (data, TMR_BASE (base));
1490 if (!civ)
1491 goto fail;
1492
1493 TMR_BASE (base) = civ->base;
1494 step = civ->step;
1495 }
1496 if (TMR_INDEX (base)
1497 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1498 {
1499 civ = get_iv (data, TMR_INDEX (base));
1500 if (!civ)
1501 goto fail;
1502
1503 TMR_INDEX (base) = civ->base;
1504 astep = civ->step;
1505
1506 if (astep)
1507 {
1508 if (TMR_STEP (base))
1509 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1510
1511 step = fold_build2 (PLUS_EXPR, type, step, astep);
1512 }
1513 }
1514
1515 if (integer_zerop (step))
1516 goto fail;
1517 base = tree_mem_ref_addr (type, base);
1518 }
1519 else
1520 {
1521 ifs_ivopts_data.ivopts_data = data;
1522 ifs_ivopts_data.stmt = stmt;
1523 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525 || integer_zerop (ifs_ivopts_data.step))
1526 goto fail;
1527 step = ifs_ivopts_data.step;
1528
1529 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1531
1532 base = build_fold_addr_expr (base);
1533
1534 /* Substituting bases of IVs into the base expression might
1535 have caused folding opportunities. */
1536 if (TREE_CODE (base) == ADDR_EXPR)
1537 {
1538 tree *ref = &TREE_OPERAND (base, 0);
1539 while (handled_component_p (*ref))
1540 ref = &TREE_OPERAND (*ref, 0);
1541 if (TREE_CODE (*ref) == INDIRECT_REF)
1542 *ref = fold_indirect_ref (*ref);
1543 }
1544 }
1545
1546 civ = alloc_iv (base, step);
1547 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548 return;
1549
1550 fail:
1551 for_each_index (op_p, idx_record_use, data);
1552 }
1553
1554 /* Finds and records invariants used in STMT. */
1555
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1558 {
1559 ssa_op_iter iter;
1560 use_operand_p use_p;
1561 tree op;
1562
1563 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1564 {
1565 op = USE_FROM_PTR (use_p);
1566 record_invariant (data, op, false);
1567 }
1568 }
1569
1570 /* Finds interesting uses of induction variables in the statement STMT. */
1571
1572 static void
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1574 {
1575 struct iv *iv;
1576 tree op, lhs, rhs;
1577 ssa_op_iter iter;
1578 use_operand_p use_p;
1579
1580 find_invariants_stmt (data, stmt);
1581
1582 if (TREE_CODE (stmt) == COND_EXPR)
1583 {
1584 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1585 return;
1586 }
1587
1588 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1589 {
1590 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1592
1593 if (TREE_CODE (lhs) == SSA_NAME)
1594 {
1595 /* If the statement defines an induction variable, the uses are not
1596 interesting by themselves. */
1597
1598 iv = get_iv (data, lhs);
1599
1600 if (iv && !integer_zerop (iv->step))
1601 return;
1602 }
1603
1604 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1605 {
1606 case tcc_comparison:
1607 find_interesting_uses_cond (data, stmt,
1608 &GIMPLE_STMT_OPERAND (stmt, 1));
1609 return;
1610
1611 case tcc_reference:
1612 find_interesting_uses_address (data, stmt,
1613 &GIMPLE_STMT_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt,
1616 &GIMPLE_STMT_OPERAND (stmt, 0));
1617 return;
1618
1619 default: ;
1620 }
1621
1622 if (REFERENCE_CLASS_P (lhs)
1623 && is_gimple_val (rhs))
1624 {
1625 find_interesting_uses_address (data, stmt,
1626 &GIMPLE_STMT_OPERAND (stmt, 0));
1627 find_interesting_uses_op (data, rhs);
1628 return;
1629 }
1630
1631 /* TODO -- we should also handle address uses of type
1632
1633 memory = call (whatever);
1634
1635 and
1636
1637 call (memory). */
1638 }
1639
1640 if (TREE_CODE (stmt) == PHI_NODE
1641 && bb_for_stmt (stmt) == data->current_loop->header)
1642 {
1643 lhs = PHI_RESULT (stmt);
1644 iv = get_iv (data, lhs);
1645
1646 if (iv && !integer_zerop (iv->step))
1647 return;
1648 }
1649
1650 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1651 {
1652 op = USE_FROM_PTR (use_p);
1653
1654 if (TREE_CODE (op) != SSA_NAME)
1655 continue;
1656
1657 iv = get_iv (data, op);
1658 if (!iv)
1659 continue;
1660
1661 find_interesting_uses_op (data, op);
1662 }
1663 }
1664
1665 /* Finds interesting uses of induction variables outside of loops
1666 on loop exit edge EXIT. */
1667
1668 static void
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1670 {
1671 tree phi, def;
1672
1673 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1674 {
1675 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676 if (is_gimple_reg (def))
1677 find_interesting_uses_op (data, def);
1678 }
1679 }
1680
1681 /* Finds uses of the induction variables that are interesting. */
1682
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1685 {
1686 basic_block bb;
1687 block_stmt_iterator bsi;
1688 tree phi;
1689 basic_block *body = get_loop_body (data->current_loop);
1690 unsigned i;
1691 struct version_info *info;
1692 edge e;
1693
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "Uses:\n\n");
1696
1697 for (i = 0; i < data->current_loop->num_nodes; i++)
1698 {
1699 edge_iterator ei;
1700 bb = body[i];
1701
1702 FOR_EACH_EDGE (e, ei, bb->succs)
1703 if (e->dest != EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705 find_interesting_uses_outside (data, e);
1706
1707 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708 find_interesting_uses_stmt (data, phi);
1709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1711 }
1712
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1714 {
1715 bitmap_iterator bi;
1716
1717 fprintf (dump_file, "\n");
1718
1719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720 {
1721 info = ver_info (data, i);
1722 if (info->inv_id)
1723 {
1724 fprintf (dump_file, " ");
1725 print_generic_expr (dump_file, info->name, TDF_SLIM);
1726 fprintf (dump_file, " is invariant (%d)%s\n",
1727 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1728 }
1729 }
1730
1731 fprintf (dump_file, "\n");
1732 }
1733
1734 free (body);
1735 }
1736
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1740
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743 unsigned HOST_WIDE_INT *offset)
1744 {
1745 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746 enum tree_code code;
1747 tree type, orig_type = TREE_TYPE (expr);
1748 unsigned HOST_WIDE_INT off0, off1, st;
1749 tree orig_expr = expr;
1750
1751 STRIP_NOPS (expr);
1752
1753 type = TREE_TYPE (expr);
1754 code = TREE_CODE (expr);
1755 *offset = 0;
1756
1757 switch (code)
1758 {
1759 case INTEGER_CST:
1760 if (!cst_and_fits_in_hwi (expr)
1761 || integer_zerop (expr))
1762 return orig_expr;
1763
1764 *offset = int_cst_value (expr);
1765 return build_int_cst (orig_type, 0);
1766
1767 case PLUS_EXPR:
1768 case MINUS_EXPR:
1769 op0 = TREE_OPERAND (expr, 0);
1770 op1 = TREE_OPERAND (expr, 1);
1771
1772 op0 = strip_offset_1 (op0, false, false, &off0);
1773 op1 = strip_offset_1 (op1, false, false, &off1);
1774
1775 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776 if (op0 == TREE_OPERAND (expr, 0)
1777 && op1 == TREE_OPERAND (expr, 1))
1778 return orig_expr;
1779
1780 if (integer_zerop (op1))
1781 expr = op0;
1782 else if (integer_zerop (op0))
1783 {
1784 if (code == PLUS_EXPR)
1785 expr = op1;
1786 else
1787 expr = fold_build1 (NEGATE_EXPR, type, op1);
1788 }
1789 else
1790 expr = fold_build2 (code, type, op0, op1);
1791
1792 return fold_convert (orig_type, expr);
1793
1794 case ARRAY_REF:
1795 if (!inside_addr)
1796 return orig_expr;
1797
1798 step = array_ref_element_size (expr);
1799 if (!cst_and_fits_in_hwi (step))
1800 break;
1801
1802 st = int_cst_value (step);
1803 op1 = TREE_OPERAND (expr, 1);
1804 op1 = strip_offset_1 (op1, false, false, &off1);
1805 *offset = off1 * st;
1806
1807 if (top_compref
1808 && integer_zerop (op1))
1809 {
1810 /* Strip the component reference completely. */
1811 op0 = TREE_OPERAND (expr, 0);
1812 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813 *offset += off0;
1814 return op0;
1815 }
1816 break;
1817
1818 case COMPONENT_REF:
1819 if (!inside_addr)
1820 return orig_expr;
1821
1822 tmp = component_ref_field_offset (expr);
1823 if (top_compref
1824 && cst_and_fits_in_hwi (tmp))
1825 {
1826 /* Strip the component reference completely. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829 *offset = off0 + int_cst_value (tmp);
1830 return op0;
1831 }
1832 break;
1833
1834 case ADDR_EXPR:
1835 op0 = TREE_OPERAND (expr, 0);
1836 op0 = strip_offset_1 (op0, true, true, &off0);
1837 *offset += off0;
1838
1839 if (op0 == TREE_OPERAND (expr, 0))
1840 return orig_expr;
1841
1842 expr = build_fold_addr_expr (op0);
1843 return fold_convert (orig_type, expr);
1844
1845 case INDIRECT_REF:
1846 inside_addr = false;
1847 break;
1848
1849 default:
1850 return orig_expr;
1851 }
1852
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857 *offset += off0;
1858
1859 if (op0 == TREE_OPERAND (expr, 0)
1860 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861 return orig_expr;
1862
1863 expr = copy_node (expr);
1864 TREE_OPERAND (expr, 0) = op0;
1865 if (op1)
1866 TREE_OPERAND (expr, 1) = op1;
1867
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1870 will fix that. */
1871 expr = fold_convert (orig_type, expr);
1872
1873 return expr;
1874 }
1875
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1877
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 {
1881 return strip_offset_1 (expr, false, false, offset);
1882 }
1883
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 We return unsigned type with the same precision, which avoids problems
1886 with overflows. */
1887
1888 static tree
1889 generic_type_for (tree type)
1890 {
1891 if (POINTER_TYPE_P (type))
1892 return unsigned_type_for (type);
1893
1894 if (TYPE_UNSIGNED (type))
1895 return type;
1896
1897 return unsigned_type_for (type);
1898 }
1899
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1902
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 {
1907 bitmap *depends_on = data;
1908 struct version_info *info;
1909
1910 if (TREE_CODE (*expr_p) != SSA_NAME)
1911 return NULL_TREE;
1912 info = name_info (fd_ivopts_data, *expr_p);
1913
1914 if (!info->inv_id || info->has_nonlin_use)
1915 return NULL_TREE;
1916
1917 if (!*depends_on)
1918 *depends_on = BITMAP_ALLOC (NULL);
1919 bitmap_set_bit (*depends_on, info->inv_id);
1920
1921 return NULL_TREE;
1922 }
1923
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1928
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931 tree base, tree step, bool important, enum iv_position pos,
1932 struct iv_use *use, tree incremented_at)
1933 {
1934 unsigned i;
1935 struct iv_cand *cand = NULL;
1936 tree type, orig_type;
1937
1938 if (base)
1939 {
1940 orig_type = TREE_TYPE (base);
1941 type = generic_type_for (orig_type);
1942 if (type != orig_type)
1943 {
1944 base = fold_convert (type, base);
1945 step = fold_convert (type, step);
1946 }
1947 }
1948
1949 for (i = 0; i < n_iv_cands (data); i++)
1950 {
1951 cand = iv_cand (data, i);
1952
1953 if (cand->pos != pos)
1954 continue;
1955
1956 if (cand->incremented_at != incremented_at)
1957 continue;
1958
1959 if (!cand->iv)
1960 {
1961 if (!base && !step)
1962 break;
1963
1964 continue;
1965 }
1966
1967 if (!base && !step)
1968 continue;
1969
1970 if (operand_equal_p (base, cand->iv->base, 0)
1971 && operand_equal_p (step, cand->iv->step, 0))
1972 break;
1973 }
1974
1975 if (i == n_iv_cands (data))
1976 {
1977 cand = XCNEW (struct iv_cand);
1978 cand->id = i;
1979
1980 if (!base && !step)
1981 cand->iv = NULL;
1982 else
1983 cand->iv = alloc_iv (base, step);
1984
1985 cand->pos = pos;
1986 if (pos != IP_ORIGINAL && cand->iv)
1987 {
1988 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989 cand->var_after = cand->var_before;
1990 }
1991 cand->important = important;
1992 cand->incremented_at = incremented_at;
1993 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1994
1995 if (step
1996 && TREE_CODE (step) != INTEGER_CST)
1997 {
1998 fd_ivopts_data = data;
1999 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2000 }
2001
2002 if (dump_file && (dump_flags & TDF_DETAILS))
2003 dump_cand (dump_file, cand);
2004 }
2005
2006 if (important && !cand->important)
2007 {
2008 cand->important = true;
2009 if (dump_file && (dump_flags & TDF_DETAILS))
2010 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2011 }
2012
2013 if (use)
2014 {
2015 bitmap_set_bit (use->related_cands, i);
2016 if (dump_file && (dump_flags & TDF_DETAILS))
2017 fprintf (dump_file, "Candidate %d is related to use %d\n",
2018 cand->id, use->id);
2019 }
2020
2021 return cand;
2022 }
2023
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2025 is allowed.
2026
2027 The purpose is to avoid splitting latch edge with a biv increment, thus
2028 creating a jump, possibly confusing other optimization passes and leaving
2029 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2030 is not available (so we do not have a better alternative), or if the latch
2031 edge is already nonempty. */
2032
2033 static bool
2034 allow_ip_end_pos_p (struct loop *loop)
2035 {
2036 if (!ip_normal_pos (loop))
2037 return true;
2038
2039 if (!empty_block_p (ip_end_pos (loop)))
2040 return true;
2041
2042 return false;
2043 }
2044
2045 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2046 position to POS. If USE is not NULL, the candidate is set as related to
2047 it. The candidate computation is scheduled on all available positions. */
2048
2049 static void
2050 add_candidate (struct ivopts_data *data,
2051 tree base, tree step, bool important, struct iv_use *use)
2052 {
2053 if (ip_normal_pos (data->current_loop))
2054 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055 if (ip_end_pos (data->current_loop)
2056 && allow_ip_end_pos_p (data->current_loop))
2057 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2058 }
2059
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061 type with SIZE bits. */
2062
2063 static void
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2065 unsigned int size)
2066 {
2067 tree type = lang_hooks.types.type_for_size (size, true);
2068 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2069 true, NULL);
2070 }
2071
2072 /* Adds standard iv candidates. */
2073
2074 static void
2075 add_standard_iv_candidates (struct ivopts_data *data)
2076 {
2077 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2078
2079 /* The same for a double-integer type if it is still fast enough. */
2080 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2082 }
2083
2084
2085 /* Adds candidates bases on the old induction variable IV. */
2086
2087 static void
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2089 {
2090 tree phi, def;
2091 struct iv_cand *cand;
2092
2093 add_candidate (data, iv->base, iv->step, true, NULL);
2094
2095 /* The same, but with initial value zero. */
2096 add_candidate (data,
2097 build_int_cst (TREE_TYPE (iv->base), 0),
2098 iv->step, true, NULL);
2099
2100 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101 if (TREE_CODE (phi) == PHI_NODE)
2102 {
2103 /* Additionally record the possibility of leaving the original iv
2104 untouched. */
2105 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106 cand = add_candidate_1 (data,
2107 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108 SSA_NAME_DEF_STMT (def));
2109 cand->var_before = iv->ssa_name;
2110 cand->var_after = def;
2111 }
2112 }
2113
2114 /* Adds candidates based on the old induction variables. */
2115
2116 static void
2117 add_old_ivs_candidates (struct ivopts_data *data)
2118 {
2119 unsigned i;
2120 struct iv *iv;
2121 bitmap_iterator bi;
2122
2123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2124 {
2125 iv = ver_info (data, i)->iv;
2126 if (iv && iv->biv_p && !integer_zerop (iv->step))
2127 add_old_iv_candidates (data, iv);
2128 }
2129 }
2130
2131 /* Adds candidates based on the value of the induction variable IV and USE. */
2132
2133 static void
2134 add_iv_value_candidates (struct ivopts_data *data,
2135 struct iv *iv, struct iv_use *use)
2136 {
2137 unsigned HOST_WIDE_INT offset;
2138 tree base;
2139
2140 add_candidate (data, iv->base, iv->step, false, use);
2141
2142 /* The same, but with initial value zero. Make such variable important,
2143 since it is generic enough so that possibly many uses may be based
2144 on it. */
2145 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146 iv->step, true, use);
2147
2148 /* Third, try removing the constant offset. */
2149 base = strip_offset (iv->base, &offset);
2150 if (offset)
2151 add_candidate (data, base, iv->step, false, use);
2152 }
2153
2154 /* Adds candidates based on the uses. */
2155
2156 static void
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2158 {
2159 unsigned i;
2160
2161 for (i = 0; i < n_iv_uses (data); i++)
2162 {
2163 struct iv_use *use = iv_use (data, i);
2164
2165 if (!use)
2166 continue;
2167
2168 switch (use->type)
2169 {
2170 case USE_NONLINEAR_EXPR:
2171 case USE_COMPARE:
2172 case USE_ADDRESS:
2173 /* Just add the ivs based on the value of the iv used here. */
2174 add_iv_value_candidates (data, use->iv, use);
2175 break;
2176
2177 default:
2178 gcc_unreachable ();
2179 }
2180 }
2181 }
2182
2183 /* Record important candidates and add them to related_cands bitmaps
2184 if needed. */
2185
2186 static void
2187 record_important_candidates (struct ivopts_data *data)
2188 {
2189 unsigned i;
2190 struct iv_use *use;
2191
2192 for (i = 0; i < n_iv_cands (data); i++)
2193 {
2194 struct iv_cand *cand = iv_cand (data, i);
2195
2196 if (cand->important)
2197 bitmap_set_bit (data->important_candidates, i);
2198 }
2199
2200 data->consider_all_candidates = (n_iv_cands (data)
2201 <= CONSIDER_ALL_CANDIDATES_BOUND);
2202
2203 if (data->consider_all_candidates)
2204 {
2205 /* We will not need "related_cands" bitmaps in this case,
2206 so release them to decrease peak memory consumption. */
2207 for (i = 0; i < n_iv_uses (data); i++)
2208 {
2209 use = iv_use (data, i);
2210 BITMAP_FREE (use->related_cands);
2211 }
2212 }
2213 else
2214 {
2215 /* Add important candidates to the related_cands bitmaps. */
2216 for (i = 0; i < n_iv_uses (data); i++)
2217 bitmap_ior_into (iv_use (data, i)->related_cands,
2218 data->important_candidates);
2219 }
2220 }
2221
2222 /* Finds the candidates for the induction variables. */
2223
2224 static void
2225 find_iv_candidates (struct ivopts_data *data)
2226 {
2227 /* Add commonly used ivs. */
2228 add_standard_iv_candidates (data);
2229
2230 /* Add old induction variables. */
2231 add_old_ivs_candidates (data);
2232
2233 /* Add induction variables derived from uses. */
2234 add_derived_ivs_candidates (data);
2235
2236 /* Record the important candidates. */
2237 record_important_candidates (data);
2238 }
2239
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242 we allocate a simple list to every use. */
2243
2244 static void
2245 alloc_use_cost_map (struct ivopts_data *data)
2246 {
2247 unsigned i, size, s, j;
2248
2249 for (i = 0; i < n_iv_uses (data); i++)
2250 {
2251 struct iv_use *use = iv_use (data, i);
2252 bitmap_iterator bi;
2253
2254 if (data->consider_all_candidates)
2255 size = n_iv_cands (data);
2256 else
2257 {
2258 s = 0;
2259 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2260 {
2261 s++;
2262 }
2263
2264 /* Round up to the power of two, so that moduling by it is fast. */
2265 for (size = 1; size < s; size <<= 1)
2266 continue;
2267 }
2268
2269 use->n_map_members = size;
2270 use->cost_map = XCNEWVEC (struct cost_pair, size);
2271 }
2272 }
2273
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275 on invariants DEPENDS_ON and that the value used in expressing it
2276 is VALUE.*/
2277
2278 static void
2279 set_use_iv_cost (struct ivopts_data *data,
2280 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281 bitmap depends_on, tree value)
2282 {
2283 unsigned i, s;
2284
2285 if (cost == INFTY)
2286 {
2287 BITMAP_FREE (depends_on);
2288 return;
2289 }
2290
2291 if (data->consider_all_candidates)
2292 {
2293 use->cost_map[cand->id].cand = cand;
2294 use->cost_map[cand->id].cost = cost;
2295 use->cost_map[cand->id].depends_on = depends_on;
2296 use->cost_map[cand->id].value = value;
2297 return;
2298 }
2299
2300 /* n_map_members is a power of two, so this computes modulo. */
2301 s = cand->id & (use->n_map_members - 1);
2302 for (i = s; i < use->n_map_members; i++)
2303 if (!use->cost_map[i].cand)
2304 goto found;
2305 for (i = 0; i < s; i++)
2306 if (!use->cost_map[i].cand)
2307 goto found;
2308
2309 gcc_unreachable ();
2310
2311 found:
2312 use->cost_map[i].cand = cand;
2313 use->cost_map[i].cost = cost;
2314 use->cost_map[i].depends_on = depends_on;
2315 use->cost_map[i].value = value;
2316 }
2317
2318 /* Gets cost of (USE, CANDIDATE) pair. */
2319
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322 struct iv_cand *cand)
2323 {
2324 unsigned i, s;
2325 struct cost_pair *ret;
2326
2327 if (!cand)
2328 return NULL;
2329
2330 if (data->consider_all_candidates)
2331 {
2332 ret = use->cost_map + cand->id;
2333 if (!ret->cand)
2334 return NULL;
2335
2336 return ret;
2337 }
2338
2339 /* n_map_members is a power of two, so this computes modulo. */
2340 s = cand->id & (use->n_map_members - 1);
2341 for (i = s; i < use->n_map_members; i++)
2342 if (use->cost_map[i].cand == cand)
2343 return use->cost_map + i;
2344
2345 for (i = 0; i < s; i++)
2346 if (use->cost_map[i].cand == cand)
2347 return use->cost_map + i;
2348
2349 return NULL;
2350 }
2351
2352 /* Returns estimate on cost of computing SEQ. */
2353
2354 static unsigned
2355 seq_cost (rtx seq)
2356 {
2357 unsigned cost = 0;
2358 rtx set;
2359
2360 for (; seq; seq = NEXT_INSN (seq))
2361 {
2362 set = single_set (seq);
2363 if (set)
2364 cost += rtx_cost (set, SET);
2365 else
2366 cost++;
2367 }
2368
2369 return cost;
2370 }
2371
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2373 static rtx
2374 produce_memory_decl_rtl (tree obj, int *regno)
2375 {
2376 rtx x;
2377
2378 gcc_assert (obj);
2379 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2380 {
2381 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382 x = gen_rtx_SYMBOL_REF (Pmode, name);
2383 }
2384 else
2385 x = gen_raw_REG (Pmode, (*regno)++);
2386
2387 return gen_rtx_MEM (DECL_MODE (obj), x);
2388 }
2389
2390 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2391 walk_tree. DATA contains the actual fake register number. */
2392
2393 static tree
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2395 {
2396 tree obj = NULL_TREE;
2397 rtx x = NULL_RTX;
2398 int *regno = data;
2399
2400 switch (TREE_CODE (*expr_p))
2401 {
2402 case ADDR_EXPR:
2403 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404 handled_component_p (*expr_p);
2405 expr_p = &TREE_OPERAND (*expr_p, 0))
2406 continue;
2407 obj = *expr_p;
2408 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409 x = produce_memory_decl_rtl (obj, regno);
2410 break;
2411
2412 case SSA_NAME:
2413 *ws = 0;
2414 obj = SSA_NAME_VAR (*expr_p);
2415 if (!DECL_RTL_SET_P (obj))
2416 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2417 break;
2418
2419 case VAR_DECL:
2420 case PARM_DECL:
2421 case RESULT_DECL:
2422 *ws = 0;
2423 obj = *expr_p;
2424
2425 if (DECL_RTL_SET_P (obj))
2426 break;
2427
2428 if (DECL_MODE (obj) == BLKmode)
2429 x = produce_memory_decl_rtl (obj, regno);
2430 else
2431 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2432
2433 break;
2434
2435 default:
2436 break;
2437 }
2438
2439 if (x)
2440 {
2441 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442 SET_DECL_RTL (obj, x);
2443 }
2444
2445 return NULL_TREE;
2446 }
2447
2448 /* Determines cost of the computation of EXPR. */
2449
2450 static unsigned
2451 computation_cost (tree expr)
2452 {
2453 rtx seq, rslt;
2454 tree type = TREE_TYPE (expr);
2455 unsigned cost;
2456 /* Avoid using hard regs in ways which may be unsupported. */
2457 int regno = LAST_VIRTUAL_REGISTER + 1;
2458
2459 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2460 start_sequence ();
2461 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2462 seq = get_insns ();
2463 end_sequence ();
2464
2465 cost = seq_cost (seq);
2466 if (MEM_P (rslt))
2467 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2468
2469 return cost;
2470 }
2471
2472 /* Returns variable containing the value of candidate CAND at statement AT. */
2473
2474 static tree
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2476 {
2477 if (stmt_after_increment (loop, cand, stmt))
2478 return cand->var_after;
2479 else
2480 return cand->var_before;
2481 }
2482
2483 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2484 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2485
2486 int
2487 tree_int_cst_sign_bit (tree t)
2488 {
2489 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490 unsigned HOST_WIDE_INT w;
2491
2492 if (bitno < HOST_BITS_PER_WIDE_INT)
2493 w = TREE_INT_CST_LOW (t);
2494 else
2495 {
2496 w = TREE_INT_CST_HIGH (t);
2497 bitno -= HOST_BITS_PER_WIDE_INT;
2498 }
2499
2500 return (w >> bitno) & 1;
2501 }
2502
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504 store cst to MUL and return true. Otherwise return false.
2505 The returned value is always sign-extended, regardless of the
2506 signedness of TOP and BOT. */
2507
2508 static bool
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2510 {
2511 tree mby;
2512 enum tree_code code;
2513 double_int res, p0, p1;
2514 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2515
2516 STRIP_NOPS (top);
2517 STRIP_NOPS (bot);
2518
2519 if (operand_equal_p (top, bot, 0))
2520 {
2521 *mul = double_int_one;
2522 return true;
2523 }
2524
2525 code = TREE_CODE (top);
2526 switch (code)
2527 {
2528 case MULT_EXPR:
2529 mby = TREE_OPERAND (top, 1);
2530 if (TREE_CODE (mby) != INTEGER_CST)
2531 return false;
2532
2533 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2534 return false;
2535
2536 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2537 precision);
2538 return true;
2539
2540 case PLUS_EXPR:
2541 case MINUS_EXPR:
2542 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2544 return false;
2545
2546 if (code == MINUS_EXPR)
2547 p1 = double_int_neg (p1);
2548 *mul = double_int_sext (double_int_add (p0, p1), precision);
2549 return true;
2550
2551 case INTEGER_CST:
2552 if (TREE_CODE (bot) != INTEGER_CST)
2553 return false;
2554
2555 p0 = double_int_sext (tree_to_double_int (top), precision);
2556 p1 = double_int_sext (tree_to_double_int (bot), precision);
2557 if (double_int_zero_p (p1))
2558 return false;
2559 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2560 precision);
2561 return double_int_zero_p (res);
2562
2563 default:
2564 return false;
2565 }
2566 }
2567
2568 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2569 same precision that is at least as wide as the precision of TYPE, stores
2570 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2571 type of A and B. */
2572
2573 static tree
2574 determine_common_wider_type (tree *a, tree *b)
2575 {
2576 tree wider_type = NULL;
2577 tree suba, subb;
2578 tree atype = TREE_TYPE (*a);
2579
2580 if ((TREE_CODE (*a) == NOP_EXPR
2581 || TREE_CODE (*a) == CONVERT_EXPR))
2582 {
2583 suba = TREE_OPERAND (*a, 0);
2584 wider_type = TREE_TYPE (suba);
2585 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2586 return atype;
2587 }
2588 else
2589 return atype;
2590
2591 if ((TREE_CODE (*b) == NOP_EXPR
2592 || TREE_CODE (*b) == CONVERT_EXPR))
2593 {
2594 subb = TREE_OPERAND (*b, 0);
2595 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2596 return atype;
2597 }
2598 else
2599 return atype;
2600
2601 *a = suba;
2602 *b = subb;
2603 return wider_type;
2604 }
2605
2606 /* Determines the expression by that USE is expressed from induction variable
2607 CAND at statement AT in LOOP. The expression is stored in a decomposed
2608 form into AFF. Returns false if USE cannot be expressed using CAND. */
2609
2610 static bool
2611 get_computation_aff (struct loop *loop,
2612 struct iv_use *use, struct iv_cand *cand, tree at,
2613 struct affine_tree_combination *aff)
2614 {
2615 tree ubase = use->iv->base;
2616 tree ustep = use->iv->step;
2617 tree cbase = cand->iv->base;
2618 tree cstep = cand->iv->step, cstep_common;
2619 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2620 tree common_type, var;
2621 tree uutype;
2622 aff_tree cbase_aff, var_aff;
2623 double_int rat;
2624
2625 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2626 {
2627 /* We do not have a precision to express the values of use. */
2628 return false;
2629 }
2630
2631 var = var_at_stmt (loop, cand, at);
2632 uutype = unsigned_type_for (utype);
2633
2634 /* If the conversion is not noop, perform it. */
2635 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2636 {
2637 cstep = fold_convert (uutype, cstep);
2638 cbase = fold_convert (uutype, cbase);
2639 var = fold_convert (uutype, var);
2640 }
2641
2642 if (!constant_multiple_of (ustep, cstep, &rat))
2643 return false;
2644
2645 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2646 type, we achieve better folding by computing their difference in this
2647 wider type, and cast the result to UUTYPE. We do not need to worry about
2648 overflows, as all the arithmetics will in the end be performed in UUTYPE
2649 anyway. */
2650 common_type = determine_common_wider_type (&ubase, &cbase);
2651
2652 /* use = ubase - ratio * cbase + ratio * var. */
2653 tree_to_aff_combination (ubase, common_type, aff);
2654 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2655 tree_to_aff_combination (var, uutype, &var_aff);
2656
2657 /* We need to shift the value if we are after the increment. */
2658 if (stmt_after_increment (loop, cand, at))
2659 {
2660 aff_tree cstep_aff;
2661
2662 if (common_type != uutype)
2663 cstep_common = fold_convert (common_type, cstep);
2664 else
2665 cstep_common = cstep;
2666
2667 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2668 aff_combination_add (&cbase_aff, &cstep_aff);
2669 }
2670
2671 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2672 aff_combination_add (aff, &cbase_aff);
2673 if (common_type != uutype)
2674 aff_combination_convert (aff, uutype);
2675
2676 aff_combination_scale (&var_aff, rat);
2677 aff_combination_add (aff, &var_aff);
2678
2679 return true;
2680 }
2681
2682 /* Determines the expression by that USE is expressed from induction variable
2683 CAND at statement AT in LOOP. The computation is unshared. */
2684
2685 static tree
2686 get_computation_at (struct loop *loop,
2687 struct iv_use *use, struct iv_cand *cand, tree at)
2688 {
2689 aff_tree aff;
2690 tree type = TREE_TYPE (use->iv->base);
2691
2692 if (!get_computation_aff (loop, use, cand, at, &aff))
2693 return NULL_TREE;
2694 unshare_aff_combination (&aff);
2695 return fold_convert (type, aff_combination_to_tree (&aff));
2696 }
2697
2698 /* Determines the expression by that USE is expressed from induction variable
2699 CAND in LOOP. The computation is unshared. */
2700
2701 static tree
2702 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2703 {
2704 return get_computation_at (loop, use, cand, use->stmt);
2705 }
2706
2707 /* Returns cost of addition in MODE. */
2708
2709 static unsigned
2710 add_cost (enum machine_mode mode)
2711 {
2712 static unsigned costs[NUM_MACHINE_MODES];
2713 rtx seq;
2714 unsigned cost;
2715
2716 if (costs[mode])
2717 return costs[mode];
2718
2719 start_sequence ();
2720 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2721 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2722 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2723 NULL_RTX);
2724 seq = get_insns ();
2725 end_sequence ();
2726
2727 cost = seq_cost (seq);
2728 if (!cost)
2729 cost = 1;
2730
2731 costs[mode] = cost;
2732
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "Addition in %s costs %d\n",
2735 GET_MODE_NAME (mode), cost);
2736 return cost;
2737 }
2738
2739 /* Entry in a hashtable of already known costs for multiplication. */
2740 struct mbc_entry
2741 {
2742 HOST_WIDE_INT cst; /* The constant to multiply by. */
2743 enum machine_mode mode; /* In mode. */
2744 unsigned cost; /* The cost. */
2745 };
2746
2747 /* Counts hash value for the ENTRY. */
2748
2749 static hashval_t
2750 mbc_entry_hash (const void *entry)
2751 {
2752 const struct mbc_entry *e = entry;
2753
2754 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2755 }
2756
2757 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2758
2759 static int
2760 mbc_entry_eq (const void *entry1, const void *entry2)
2761 {
2762 const struct mbc_entry *e1 = entry1;
2763 const struct mbc_entry *e2 = entry2;
2764
2765 return (e1->mode == e2->mode
2766 && e1->cst == e2->cst);
2767 }
2768
2769 /* Returns cost of multiplication by constant CST in MODE. */
2770
2771 unsigned
2772 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2773 {
2774 static htab_t costs;
2775 struct mbc_entry **cached, act;
2776 rtx seq;
2777 unsigned cost;
2778
2779 if (!costs)
2780 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2781
2782 act.mode = mode;
2783 act.cst = cst;
2784 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2785 if (*cached)
2786 return (*cached)->cost;
2787
2788 *cached = XNEW (struct mbc_entry);
2789 (*cached)->mode = mode;
2790 (*cached)->cst = cst;
2791
2792 start_sequence ();
2793 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2794 gen_int_mode (cst, mode), NULL_RTX, 0);
2795 seq = get_insns ();
2796 end_sequence ();
2797
2798 cost = seq_cost (seq);
2799
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2801 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2802 (int) cst, GET_MODE_NAME (mode), cost);
2803
2804 (*cached)->cost = cost;
2805
2806 return cost;
2807 }
2808
2809 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2810 validity for a memory reference accessing memory of mode MODE. */
2811
2812 bool
2813 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2814 {
2815 #define MAX_RATIO 128
2816 static sbitmap valid_mult[MAX_MACHINE_MODE];
2817
2818 if (!valid_mult[mode])
2819 {
2820 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2821 rtx addr;
2822 HOST_WIDE_INT i;
2823
2824 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2825 sbitmap_zero (valid_mult[mode]);
2826 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2827 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2828 {
2829 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2830 if (memory_address_p (mode, addr))
2831 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2832 }
2833
2834 if (dump_file && (dump_flags & TDF_DETAILS))
2835 {
2836 fprintf (dump_file, " allowed multipliers:");
2837 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2838 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2839 fprintf (dump_file, " %d", (int) i);
2840 fprintf (dump_file, "\n");
2841 fprintf (dump_file, "\n");
2842 }
2843 }
2844
2845 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2846 return false;
2847
2848 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2849 }
2850
2851 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2852 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2853 variable is omitted. Compute the cost for a memory reference that accesses
2854 a memory location of mode MEM_MODE.
2855
2856 TODO -- there must be some better way. This all is quite crude. */
2857
2858 static unsigned
2859 get_address_cost (bool symbol_present, bool var_present,
2860 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2861 enum machine_mode mem_mode)
2862 {
2863 static bool initialized[MAX_MACHINE_MODE];
2864 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2865 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2866 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2867 unsigned cost, acost;
2868 bool offset_p, ratio_p;
2869 HOST_WIDE_INT s_offset;
2870 unsigned HOST_WIDE_INT mask;
2871 unsigned bits;
2872
2873 if (!initialized[mem_mode])
2874 {
2875 HOST_WIDE_INT i;
2876 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2877 int old_cse_not_expected;
2878 unsigned sym_p, var_p, off_p, rat_p, add_c;
2879 rtx seq, addr, base;
2880 rtx reg0, reg1;
2881
2882 initialized[mem_mode] = true;
2883
2884 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2885
2886 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2887 for (i = start; i <= 1 << 20; i <<= 1)
2888 {
2889 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2890 if (!memory_address_p (mem_mode, addr))
2891 break;
2892 }
2893 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2894 off[mem_mode] = max_offset[mem_mode];
2895
2896 for (i = start; i <= 1 << 20; i <<= 1)
2897 {
2898 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2899 if (!memory_address_p (mem_mode, addr))
2900 break;
2901 }
2902 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2903
2904 if (dump_file && (dump_flags & TDF_DETAILS))
2905 {
2906 fprintf (dump_file, "get_address_cost:\n");
2907 fprintf (dump_file, " min offset %s %d\n",
2908 GET_MODE_NAME (mem_mode),
2909 (int) min_offset[mem_mode]);
2910 fprintf (dump_file, " max offset %s %d\n",
2911 GET_MODE_NAME (mem_mode),
2912 (int) max_offset[mem_mode]);
2913 }
2914
2915 rat[mem_mode] = 1;
2916 for (i = 2; i <= MAX_RATIO; i++)
2917 if (multiplier_allowed_in_address_p (i, mem_mode))
2918 {
2919 rat[mem_mode] = i;
2920 break;
2921 }
2922
2923 /* Compute the cost of various addressing modes. */
2924 acost = 0;
2925 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2926 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2927
2928 for (i = 0; i < 16; i++)
2929 {
2930 sym_p = i & 1;
2931 var_p = (i >> 1) & 1;
2932 off_p = (i >> 2) & 1;
2933 rat_p = (i >> 3) & 1;
2934
2935 addr = reg0;
2936 if (rat_p)
2937 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2938 gen_int_mode (rat[mem_mode], Pmode));
2939
2940 if (var_p)
2941 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2942
2943 if (sym_p)
2944 {
2945 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2946 if (off_p)
2947 base = gen_rtx_fmt_e (CONST, Pmode,
2948 gen_rtx_fmt_ee (PLUS, Pmode,
2949 base,
2950 gen_int_mode (off[mem_mode],
2951 Pmode)));
2952 }
2953 else if (off_p)
2954 base = gen_int_mode (off[mem_mode], Pmode);
2955 else
2956 base = NULL_RTX;
2957
2958 if (base)
2959 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2960
2961 start_sequence ();
2962 /* To avoid splitting addressing modes, pretend that no cse will
2963 follow. */
2964 old_cse_not_expected = cse_not_expected;
2965 cse_not_expected = true;
2966 addr = memory_address (mem_mode, addr);
2967 cse_not_expected = old_cse_not_expected;
2968 seq = get_insns ();
2969 end_sequence ();
2970
2971 acost = seq_cost (seq);
2972 acost += address_cost (addr, mem_mode);
2973
2974 if (!acost)
2975 acost = 1;
2976 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2977 }
2978
2979 /* On some targets, it is quite expensive to load symbol to a register,
2980 which makes addresses that contain symbols look much more expensive.
2981 However, the symbol will have to be loaded in any case before the
2982 loop (and quite likely we have it in register already), so it does not
2983 make much sense to penalize them too heavily. So make some final
2984 tweaks for the SYMBOL_PRESENT modes:
2985
2986 If VAR_PRESENT is false, and the mode obtained by changing symbol to
2987 var is cheaper, use this mode with small penalty.
2988 If VAR_PRESENT is true, try whether the mode with
2989 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2990 if this is the case, use it. */
2991 add_c = add_cost (Pmode);
2992 for (i = 0; i < 8; i++)
2993 {
2994 var_p = i & 1;
2995 off_p = (i >> 1) & 1;
2996 rat_p = (i >> 2) & 1;
2997
2998 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
2999 if (var_p)
3000 acost += add_c;
3001
3002 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3003 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3004 }
3005
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3007 {
3008 fprintf (dump_file, "Address costs:\n");
3009
3010 for (i = 0; i < 16; i++)
3011 {
3012 sym_p = i & 1;
3013 var_p = (i >> 1) & 1;
3014 off_p = (i >> 2) & 1;
3015 rat_p = (i >> 3) & 1;
3016
3017 fprintf (dump_file, " ");
3018 if (sym_p)
3019 fprintf (dump_file, "sym + ");
3020 if (var_p)
3021 fprintf (dump_file, "var + ");
3022 if (off_p)
3023 fprintf (dump_file, "cst + ");
3024 if (rat_p)
3025 fprintf (dump_file, "rat * ");
3026
3027 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3028 fprintf (dump_file, "index costs %d\n", acost);
3029 }
3030 fprintf (dump_file, "\n");
3031 }
3032 }
3033
3034 bits = GET_MODE_BITSIZE (Pmode);
3035 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3036 offset &= mask;
3037 if ((offset >> (bits - 1) & 1))
3038 offset |= ~mask;
3039 s_offset = offset;
3040
3041 cost = 0;
3042 offset_p = (s_offset != 0
3043 && min_offset[mem_mode] <= s_offset
3044 && s_offset <= max_offset[mem_mode]);
3045 ratio_p = (ratio != 1
3046 && multiplier_allowed_in_address_p (ratio, mem_mode));
3047
3048 if (ratio != 1 && !ratio_p)
3049 cost += multiply_by_cost (ratio, Pmode);
3050
3051 if (s_offset && !offset_p && !symbol_present)
3052 cost += add_cost (Pmode);
3053
3054 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3055 return cost + acost;
3056 }
3057
3058 /* Estimates cost of forcing expression EXPR into a variable. */
3059
3060 unsigned
3061 force_expr_to_var_cost (tree expr)
3062 {
3063 static bool costs_initialized = false;
3064 static unsigned integer_cost;
3065 static unsigned symbol_cost;
3066 static unsigned address_cost;
3067 tree op0, op1;
3068 unsigned cost0, cost1, cost;
3069 enum machine_mode mode;
3070
3071 if (!costs_initialized)
3072 {
3073 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3074 rtx x = gen_rtx_MEM (DECL_MODE (var),
3075 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3076 tree addr;
3077 tree type = build_pointer_type (integer_type_node);
3078
3079 integer_cost = computation_cost (build_int_cst (integer_type_node,
3080 2000));
3081
3082 SET_DECL_RTL (var, x);
3083 TREE_STATIC (var) = 1;
3084 addr = build1 (ADDR_EXPR, type, var);
3085 symbol_cost = computation_cost (addr) + 1;
3086
3087 address_cost
3088 = computation_cost (build2 (PLUS_EXPR, type,
3089 addr,
3090 build_int_cst (type, 2000))) + 1;
3091 if (dump_file && (dump_flags & TDF_DETAILS))
3092 {
3093 fprintf (dump_file, "force_expr_to_var_cost:\n");
3094 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3095 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3096 fprintf (dump_file, " address %d\n", (int) address_cost);
3097 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3098 fprintf (dump_file, "\n");
3099 }
3100
3101 costs_initialized = true;
3102 }
3103
3104 STRIP_NOPS (expr);
3105
3106 if (SSA_VAR_P (expr))
3107 return 0;
3108
3109 if (TREE_INVARIANT (expr))
3110 {
3111 if (TREE_CODE (expr) == INTEGER_CST)
3112 return integer_cost;
3113
3114 if (TREE_CODE (expr) == ADDR_EXPR)
3115 {
3116 tree obj = TREE_OPERAND (expr, 0);
3117
3118 if (TREE_CODE (obj) == VAR_DECL
3119 || TREE_CODE (obj) == PARM_DECL
3120 || TREE_CODE (obj) == RESULT_DECL)
3121 return symbol_cost;
3122 }
3123
3124 return address_cost;
3125 }
3126
3127 switch (TREE_CODE (expr))
3128 {
3129 case PLUS_EXPR:
3130 case MINUS_EXPR:
3131 case MULT_EXPR:
3132 op0 = TREE_OPERAND (expr, 0);
3133 op1 = TREE_OPERAND (expr, 1);
3134 STRIP_NOPS (op0);
3135 STRIP_NOPS (op1);
3136
3137 if (is_gimple_val (op0))
3138 cost0 = 0;
3139 else
3140 cost0 = force_expr_to_var_cost (op0);
3141
3142 if (is_gimple_val (op1))
3143 cost1 = 0;
3144 else
3145 cost1 = force_expr_to_var_cost (op1);
3146
3147 break;
3148
3149 default:
3150 /* Just an arbitrary value, FIXME. */
3151 return target_spill_cost;
3152 }
3153
3154 mode = TYPE_MODE (TREE_TYPE (expr));
3155 switch (TREE_CODE (expr))
3156 {
3157 case PLUS_EXPR:
3158 case MINUS_EXPR:
3159 cost = add_cost (mode);
3160 break;
3161
3162 case MULT_EXPR:
3163 if (cst_and_fits_in_hwi (op0))
3164 cost = multiply_by_cost (int_cst_value (op0), mode);
3165 else if (cst_and_fits_in_hwi (op1))
3166 cost = multiply_by_cost (int_cst_value (op1), mode);
3167 else
3168 return target_spill_cost;
3169 break;
3170
3171 default:
3172 gcc_unreachable ();
3173 }
3174
3175 cost += cost0;
3176 cost += cost1;
3177
3178 /* Bound the cost by target_spill_cost. The parts of complicated
3179 computations often are either loop invariant or at least can
3180 be shared between several iv uses, so letting this grow without
3181 limits would not give reasonable results. */
3182 return cost < target_spill_cost ? cost : target_spill_cost;
3183 }
3184
3185 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3186 invariants the computation depends on. */
3187
3188 static unsigned
3189 force_var_cost (struct ivopts_data *data,
3190 tree expr, bitmap *depends_on)
3191 {
3192 if (depends_on)
3193 {
3194 fd_ivopts_data = data;
3195 walk_tree (&expr, find_depends, depends_on, NULL);
3196 }
3197
3198 return force_expr_to_var_cost (expr);
3199 }
3200
3201 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3202 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3203 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3204 invariants the computation depends on. */
3205
3206 static unsigned
3207 split_address_cost (struct ivopts_data *data,
3208 tree addr, bool *symbol_present, bool *var_present,
3209 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3210 {
3211 tree core;
3212 HOST_WIDE_INT bitsize;
3213 HOST_WIDE_INT bitpos;
3214 tree toffset;
3215 enum machine_mode mode;
3216 int unsignedp, volatilep;
3217
3218 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3219 &unsignedp, &volatilep, false);
3220
3221 if (toffset != 0
3222 || bitpos % BITS_PER_UNIT != 0
3223 || TREE_CODE (core) != VAR_DECL)
3224 {
3225 *symbol_present = false;
3226 *var_present = true;
3227 fd_ivopts_data = data;
3228 walk_tree (&addr, find_depends, depends_on, NULL);
3229 return target_spill_cost;
3230 }
3231
3232 *offset += bitpos / BITS_PER_UNIT;
3233 if (TREE_STATIC (core)
3234 || DECL_EXTERNAL (core))
3235 {
3236 *symbol_present = true;
3237 *var_present = false;
3238 return 0;
3239 }
3240
3241 *symbol_present = false;
3242 *var_present = true;
3243 return 0;
3244 }
3245
3246 /* Estimates cost of expressing difference of addresses E1 - E2 as
3247 var + symbol + offset. The value of offset is added to OFFSET,
3248 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3249 part is missing. DEPENDS_ON is a set of the invariants the computation
3250 depends on. */
3251
3252 static unsigned
3253 ptr_difference_cost (struct ivopts_data *data,
3254 tree e1, tree e2, bool *symbol_present, bool *var_present,
3255 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3256 {
3257 HOST_WIDE_INT diff = 0;
3258 unsigned cost;
3259
3260 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3261
3262 if (ptr_difference_const (e1, e2, &diff))
3263 {
3264 *offset += diff;
3265 *symbol_present = false;
3266 *var_present = false;
3267 return 0;
3268 }
3269
3270 if (e2 == integer_zero_node)
3271 return split_address_cost (data, TREE_OPERAND (e1, 0),
3272 symbol_present, var_present, offset, depends_on);
3273
3274 *symbol_present = false;
3275 *var_present = true;
3276
3277 cost = force_var_cost (data, e1, depends_on);
3278 cost += force_var_cost (data, e2, depends_on);
3279 cost += add_cost (Pmode);
3280
3281 return cost;
3282 }
3283
3284 /* Estimates cost of expressing difference E1 - E2 as
3285 var + symbol + offset. The value of offset is added to OFFSET,
3286 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3287 part is missing. DEPENDS_ON is a set of the invariants the computation
3288 depends on. */
3289
3290 static unsigned
3291 difference_cost (struct ivopts_data *data,
3292 tree e1, tree e2, bool *symbol_present, bool *var_present,
3293 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3294 {
3295 unsigned cost;
3296 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3297 unsigned HOST_WIDE_INT off1, off2;
3298
3299 e1 = strip_offset (e1, &off1);
3300 e2 = strip_offset (e2, &off2);
3301 *offset += off1 - off2;
3302
3303 STRIP_NOPS (e1);
3304 STRIP_NOPS (e2);
3305
3306 if (TREE_CODE (e1) == ADDR_EXPR)
3307 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3308 depends_on);
3309 *symbol_present = false;
3310
3311 if (operand_equal_p (e1, e2, 0))
3312 {
3313 *var_present = false;
3314 return 0;
3315 }
3316 *var_present = true;
3317 if (integer_zerop (e2))
3318 return force_var_cost (data, e1, depends_on);
3319
3320 if (integer_zerop (e1))
3321 {
3322 cost = force_var_cost (data, e2, depends_on);
3323 cost += multiply_by_cost (-1, mode);
3324
3325 return cost;
3326 }
3327
3328 cost = force_var_cost (data, e1, depends_on);
3329 cost += force_var_cost (data, e2, depends_on);
3330 cost += add_cost (mode);
3331
3332 return cost;
3333 }
3334
3335 /* Determines the cost of the computation by that USE is expressed
3336 from induction variable CAND. If ADDRESS_P is true, we just need
3337 to create an address from it, otherwise we want to get it into
3338 register. A set of invariants we depend on is stored in
3339 DEPENDS_ON. AT is the statement at that the value is computed. */
3340
3341 static unsigned
3342 get_computation_cost_at (struct ivopts_data *data,
3343 struct iv_use *use, struct iv_cand *cand,
3344 bool address_p, bitmap *depends_on, tree at)
3345 {
3346 tree ubase = use->iv->base, ustep = use->iv->step;
3347 tree cbase, cstep;
3348 tree utype = TREE_TYPE (ubase), ctype;
3349 unsigned HOST_WIDE_INT cstepi, offset = 0;
3350 HOST_WIDE_INT ratio, aratio;
3351 bool var_present, symbol_present;
3352 unsigned cost = 0, n_sums;
3353 double_int rat;
3354
3355 *depends_on = NULL;
3356
3357 /* Only consider real candidates. */
3358 if (!cand->iv)
3359 return INFTY;
3360
3361 cbase = cand->iv->base;
3362 cstep = cand->iv->step;
3363 ctype = TREE_TYPE (cbase);
3364
3365 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3366 {
3367 /* We do not have a precision to express the values of use. */
3368 return INFTY;
3369 }
3370
3371 if (address_p)
3372 {
3373 /* Do not try to express address of an object with computation based
3374 on address of a different object. This may cause problems in rtl
3375 level alias analysis (that does not expect this to be happening,
3376 as this is illegal in C), and would be unlikely to be useful
3377 anyway. */
3378 if (use->iv->base_object
3379 && cand->iv->base_object
3380 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3381 return INFTY;
3382 }
3383
3384 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3385 {
3386 /* TODO -- add direct handling of this case. */
3387 goto fallback;
3388 }
3389
3390 /* CSTEPI is removed from the offset in case statement is after the
3391 increment. If the step is not constant, we use zero instead.
3392 This is a bit imprecise (there is the extra addition), but
3393 redundancy elimination is likely to transform the code so that
3394 it uses value of the variable before increment anyway,
3395 so it is not that much unrealistic. */
3396 if (cst_and_fits_in_hwi (cstep))
3397 cstepi = int_cst_value (cstep);
3398 else
3399 cstepi = 0;
3400
3401 if (!constant_multiple_of (ustep, cstep, &rat))
3402 return INFTY;
3403
3404 if (double_int_fits_in_shwi_p (rat))
3405 ratio = double_int_to_shwi (rat);
3406 else
3407 return INFTY;
3408
3409 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3410 or ratio == 1, it is better to handle this like
3411
3412 ubase - ratio * cbase + ratio * var
3413
3414 (also holds in the case ratio == -1, TODO. */
3415
3416 if (cst_and_fits_in_hwi (cbase))
3417 {
3418 offset = - ratio * int_cst_value (cbase);
3419 cost += difference_cost (data,
3420 ubase, integer_zero_node,
3421 &symbol_present, &var_present, &offset,
3422 depends_on);
3423 }
3424 else if (ratio == 1)
3425 {
3426 cost += difference_cost (data,
3427 ubase, cbase,
3428 &symbol_present, &var_present, &offset,
3429 depends_on);
3430 }
3431 else
3432 {
3433 cost += force_var_cost (data, cbase, depends_on);
3434 cost += add_cost (TYPE_MODE (ctype));
3435 cost += difference_cost (data,
3436 ubase, integer_zero_node,
3437 &symbol_present, &var_present, &offset,
3438 depends_on);
3439 }
3440
3441 /* If we are after the increment, the value of the candidate is higher by
3442 one iteration. */
3443 if (stmt_after_increment (data->current_loop, cand, at))
3444 offset -= ratio * cstepi;
3445
3446 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3447 (symbol/var/const parts may be omitted). If we are looking for an address,
3448 find the cost of addressing this. */
3449 if (address_p)
3450 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3451 TYPE_MODE (TREE_TYPE (*use->op_p)));
3452
3453 /* Otherwise estimate the costs for computing the expression. */
3454 aratio = ratio > 0 ? ratio : -ratio;
3455 if (!symbol_present && !var_present && !offset)
3456 {
3457 if (ratio != 1)
3458 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3459
3460 return cost;
3461 }
3462
3463 if (aratio != 1)
3464 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3465
3466 n_sums = 1;
3467 if (var_present
3468 /* Symbol + offset should be compile-time computable. */
3469 && (symbol_present || offset))
3470 n_sums++;
3471
3472 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3473
3474 fallback:
3475 {
3476 /* Just get the expression, expand it and measure the cost. */
3477 tree comp = get_computation_at (data->current_loop, use, cand, at);
3478
3479 if (!comp)
3480 return INFTY;
3481
3482 if (address_p)
3483 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3484
3485 return computation_cost (comp);
3486 }
3487 }
3488
3489 /* Determines the cost of the computation by that USE is expressed
3490 from induction variable CAND. If ADDRESS_P is true, we just need
3491 to create an address from it, otherwise we want to get it into
3492 register. A set of invariants we depend on is stored in
3493 DEPENDS_ON. */
3494
3495 static unsigned
3496 get_computation_cost (struct ivopts_data *data,
3497 struct iv_use *use, struct iv_cand *cand,
3498 bool address_p, bitmap *depends_on)
3499 {
3500 return get_computation_cost_at (data,
3501 use, cand, address_p, depends_on, use->stmt);
3502 }
3503
3504 /* Determines cost of basing replacement of USE on CAND in a generic
3505 expression. */
3506
3507 static bool
3508 determine_use_iv_cost_generic (struct ivopts_data *data,
3509 struct iv_use *use, struct iv_cand *cand)
3510 {
3511 bitmap depends_on;
3512 unsigned cost;
3513
3514 /* The simple case first -- if we need to express value of the preserved
3515 original biv, the cost is 0. This also prevents us from counting the
3516 cost of increment twice -- once at this use and once in the cost of
3517 the candidate. */
3518 if (cand->pos == IP_ORIGINAL
3519 && cand->incremented_at == use->stmt)
3520 {
3521 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3522 return true;
3523 }
3524
3525 cost = get_computation_cost (data, use, cand, false, &depends_on);
3526 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3527
3528 return cost != INFTY;
3529 }
3530
3531 /* Determines cost of basing replacement of USE on CAND in an address. */
3532
3533 static bool
3534 determine_use_iv_cost_address (struct ivopts_data *data,
3535 struct iv_use *use, struct iv_cand *cand)
3536 {
3537 bitmap depends_on;
3538 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3539
3540 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3541
3542 return cost != INFTY;
3543 }
3544
3545 /* Computes value of candidate CAND at position AT in iteration NITER, and
3546 stores it to VAL. */
3547
3548 static void
3549 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3550 aff_tree *val)
3551 {
3552 aff_tree step, delta, nit;
3553 struct iv *iv = cand->iv;
3554 tree type = TREE_TYPE (iv->base);
3555
3556 tree_to_aff_combination (iv->step, type, &step);
3557 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3558 aff_combination_convert (&nit, type);
3559 aff_combination_mult (&nit, &step, &delta);
3560 if (stmt_after_increment (loop, cand, at))
3561 aff_combination_add (&delta, &step);
3562
3563 tree_to_aff_combination (iv->base, type, val);
3564 aff_combination_add (val, &delta);
3565 }
3566
3567 /* Returns period of induction variable iv. */
3568
3569 static tree
3570 iv_period (struct iv *iv)
3571 {
3572 tree step = iv->step, period, type;
3573 tree pow2div;
3574
3575 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3576
3577 /* Period of the iv is gcd (step, type range). Since type range is power
3578 of two, it suffices to determine the maximum power of two that divides
3579 step. */
3580 pow2div = num_ending_zeros (step);
3581 type = unsigned_type_for (TREE_TYPE (step));
3582
3583 period = build_low_bits_mask (type,
3584 (TYPE_PRECISION (type)
3585 - tree_low_cst (pow2div, 1)));
3586
3587 return period;
3588 }
3589
3590 /* Returns the comparison operator used when eliminating the iv USE. */
3591
3592 static enum tree_code
3593 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3594 {
3595 struct loop *loop = data->current_loop;
3596 basic_block ex_bb;
3597 edge exit;
3598
3599 ex_bb = bb_for_stmt (use->stmt);
3600 exit = EDGE_SUCC (ex_bb, 0);
3601 if (flow_bb_inside_loop_p (loop, exit->dest))
3602 exit = EDGE_SUCC (ex_bb, 1);
3603
3604 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3605 }
3606
3607 /* Check whether it is possible to express the condition in USE by comparison
3608 of candidate CAND. If so, store the value compared with to BOUND. */
3609
3610 static bool
3611 may_eliminate_iv (struct ivopts_data *data,
3612 struct iv_use *use, struct iv_cand *cand, tree *bound)
3613 {
3614 basic_block ex_bb;
3615 edge exit;
3616 tree nit, nit_type;
3617 tree wider_type, period, per_type;
3618 struct loop *loop = data->current_loop;
3619 aff_tree bnd;
3620
3621 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3622 return false;
3623
3624 /* For now works only for exits that dominate the loop latch. TODO -- extend
3625 for other conditions inside loop body. */
3626 ex_bb = bb_for_stmt (use->stmt);
3627 if (use->stmt != last_stmt (ex_bb)
3628 || TREE_CODE (use->stmt) != COND_EXPR)
3629 return false;
3630 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3631 return false;
3632
3633 exit = EDGE_SUCC (ex_bb, 0);
3634 if (flow_bb_inside_loop_p (loop, exit->dest))
3635 exit = EDGE_SUCC (ex_bb, 1);
3636 if (flow_bb_inside_loop_p (loop, exit->dest))
3637 return false;
3638
3639 nit = niter_for_exit (data, exit);
3640 if (!nit)
3641 return false;
3642
3643 nit_type = TREE_TYPE (nit);
3644
3645 /* Determine whether we may use the variable to test whether niter iterations
3646 elapsed. This is the case iff the period of the induction variable is
3647 greater than the number of iterations. */
3648 period = iv_period (cand->iv);
3649 if (!period)
3650 return false;
3651 per_type = TREE_TYPE (period);
3652
3653 wider_type = TREE_TYPE (period);
3654 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3655 wider_type = per_type;
3656 else
3657 wider_type = nit_type;
3658
3659 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3660 fold_convert (wider_type, period),
3661 fold_convert (wider_type, nit))))
3662 return false;
3663
3664 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3665 *bound = aff_combination_to_tree (&bnd);
3666 return true;
3667 }
3668
3669 /* Determines cost of basing replacement of USE on CAND in a condition. */
3670
3671 static bool
3672 determine_use_iv_cost_condition (struct ivopts_data *data,
3673 struct iv_use *use, struct iv_cand *cand)
3674 {
3675 tree bound = NULL_TREE;
3676 struct iv *cmp_iv;
3677 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3678 unsigned elim_cost, express_cost, cost;
3679 bool ok;
3680
3681 /* Only consider real candidates. */
3682 if (!cand->iv)
3683 {
3684 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3685 return false;
3686 }
3687
3688 /* Try iv elimination. */
3689 if (may_eliminate_iv (data, use, cand, &bound))
3690 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3691 else
3692 elim_cost = INFTY;
3693
3694 /* Try expressing the original giv. If it is compared with an invariant,
3695 note that we cannot get rid of it. */
3696 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3697 gcc_assert (ok);
3698
3699 express_cost = get_computation_cost (data, use, cand, false,
3700 &depends_on_express);
3701 fd_ivopts_data = data;
3702 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3703
3704 /* Choose the better approach. */
3705 if (elim_cost < express_cost)
3706 {
3707 cost = elim_cost;
3708 depends_on = depends_on_elim;
3709 depends_on_elim = NULL;
3710 }
3711 else
3712 {
3713 cost = express_cost;
3714 depends_on = depends_on_express;
3715 depends_on_express = NULL;
3716 bound = NULL_TREE;
3717 }
3718
3719 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3720
3721 if (depends_on_elim)
3722 BITMAP_FREE (depends_on_elim);
3723 if (depends_on_express)
3724 BITMAP_FREE (depends_on_express);
3725
3726 return cost != INFTY;
3727 }
3728
3729 /* Determines cost of basing replacement of USE on CAND. Returns false
3730 if USE cannot be based on CAND. */
3731
3732 static bool
3733 determine_use_iv_cost (struct ivopts_data *data,
3734 struct iv_use *use, struct iv_cand *cand)
3735 {
3736 switch (use->type)
3737 {
3738 case USE_NONLINEAR_EXPR:
3739 return determine_use_iv_cost_generic (data, use, cand);
3740
3741 case USE_ADDRESS:
3742 return determine_use_iv_cost_address (data, use, cand);
3743
3744 case USE_COMPARE:
3745 return determine_use_iv_cost_condition (data, use, cand);
3746
3747 default:
3748 gcc_unreachable ();
3749 }
3750 }
3751
3752 /* Determines costs of basing the use of the iv on an iv candidate. */
3753
3754 static void
3755 determine_use_iv_costs (struct ivopts_data *data)
3756 {
3757 unsigned i, j;
3758 struct iv_use *use;
3759 struct iv_cand *cand;
3760 bitmap to_clear = BITMAP_ALLOC (NULL);
3761
3762 alloc_use_cost_map (data);
3763
3764 for (i = 0; i < n_iv_uses (data); i++)
3765 {
3766 use = iv_use (data, i);
3767
3768 if (data->consider_all_candidates)
3769 {
3770 for (j = 0; j < n_iv_cands (data); j++)
3771 {
3772 cand = iv_cand (data, j);
3773 determine_use_iv_cost (data, use, cand);
3774 }
3775 }
3776 else
3777 {
3778 bitmap_iterator bi;
3779
3780 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3781 {
3782 cand = iv_cand (data, j);
3783 if (!determine_use_iv_cost (data, use, cand))
3784 bitmap_set_bit (to_clear, j);
3785 }
3786
3787 /* Remove the candidates for that the cost is infinite from
3788 the list of related candidates. */
3789 bitmap_and_compl_into (use->related_cands, to_clear);
3790 bitmap_clear (to_clear);
3791 }
3792 }
3793
3794 BITMAP_FREE (to_clear);
3795
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 {
3798 fprintf (dump_file, "Use-candidate costs:\n");
3799
3800 for (i = 0; i < n_iv_uses (data); i++)
3801 {
3802 use = iv_use (data, i);
3803
3804 fprintf (dump_file, "Use %d:\n", i);
3805 fprintf (dump_file, " cand\tcost\tdepends on\n");
3806 for (j = 0; j < use->n_map_members; j++)
3807 {
3808 if (!use->cost_map[j].cand
3809 || use->cost_map[j].cost == INFTY)
3810 continue;
3811
3812 fprintf (dump_file, " %d\t%d\t",
3813 use->cost_map[j].cand->id,
3814 use->cost_map[j].cost);
3815 if (use->cost_map[j].depends_on)
3816 bitmap_print (dump_file,
3817 use->cost_map[j].depends_on, "","");
3818 fprintf (dump_file, "\n");
3819 }
3820
3821 fprintf (dump_file, "\n");
3822 }
3823 fprintf (dump_file, "\n");
3824 }
3825 }
3826
3827 /* Determines cost of the candidate CAND. */
3828
3829 static void
3830 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3831 {
3832 unsigned cost_base, cost_step;
3833 tree base;
3834
3835 if (!cand->iv)
3836 {
3837 cand->cost = 0;
3838 return;
3839 }
3840
3841 /* There are two costs associated with the candidate -- its increment
3842 and its initialization. The second is almost negligible for any loop
3843 that rolls enough, so we take it just very little into account. */
3844
3845 base = cand->iv->base;
3846 cost_base = force_var_cost (data, base, NULL);
3847 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3848
3849 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3850
3851 /* Prefer the original iv unless we may gain something by replacing it;
3852 this is not really relevant for artificial ivs created by other
3853 passes. */
3854 if (cand->pos == IP_ORIGINAL
3855 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3856 cand->cost--;
3857
3858 /* Prefer not to insert statements into latch unless there are some
3859 already (so that we do not create unnecessary jumps). */
3860 if (cand->pos == IP_END
3861 && empty_block_p (ip_end_pos (data->current_loop)))
3862 cand->cost++;
3863 }
3864
3865 /* Determines costs of computation of the candidates. */
3866
3867 static void
3868 determine_iv_costs (struct ivopts_data *data)
3869 {
3870 unsigned i;
3871
3872 if (dump_file && (dump_flags & TDF_DETAILS))
3873 {
3874 fprintf (dump_file, "Candidate costs:\n");
3875 fprintf (dump_file, " cand\tcost\n");
3876 }
3877
3878 for (i = 0; i < n_iv_cands (data); i++)
3879 {
3880 struct iv_cand *cand = iv_cand (data, i);
3881
3882 determine_iv_cost (data, cand);
3883
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3885 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3886 }
3887
3888 if (dump_file && (dump_flags & TDF_DETAILS))
3889 fprintf (dump_file, "\n");
3890 }
3891
3892 /* Calculates cost for having SIZE induction variables. */
3893
3894 static unsigned
3895 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3896 {
3897 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3898 }
3899
3900 /* For each size of the induction variable set determine the penalty. */
3901
3902 static void
3903 determine_set_costs (struct ivopts_data *data)
3904 {
3905 unsigned j, n;
3906 tree phi, op;
3907 struct loop *loop = data->current_loop;
3908 bitmap_iterator bi;
3909
3910 /* We use the following model (definitely improvable, especially the
3911 cost function -- TODO):
3912
3913 We estimate the number of registers available (using MD data), name it A.
3914
3915 We estimate the number of registers used by the loop, name it U. This
3916 number is obtained as the number of loop phi nodes (not counting virtual
3917 registers and bivs) + the number of variables from outside of the loop.
3918
3919 We set a reserve R (free regs that are used for temporary computations,
3920 etc.). For now the reserve is a constant 3.
3921
3922 Let I be the number of induction variables.
3923
3924 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3925 make a lot of ivs without a reason).
3926 -- if A - R < U + I <= A, the cost is I * PRES_COST
3927 -- if U + I > A, the cost is I * PRES_COST and
3928 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3929
3930 if (dump_file && (dump_flags & TDF_DETAILS))
3931 {
3932 fprintf (dump_file, "Global costs:\n");
3933 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3934 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3935 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3936 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3937 }
3938
3939 n = 0;
3940 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3941 {
3942 op = PHI_RESULT (phi);
3943
3944 if (!is_gimple_reg (op))
3945 continue;
3946
3947 if (get_iv (data, op))
3948 continue;
3949
3950 n++;
3951 }
3952
3953 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3954 {
3955 struct version_info *info = ver_info (data, j);
3956
3957 if (info->inv_id && info->has_nonlin_use)
3958 n++;
3959 }
3960
3961 data->regs_used = n;
3962 if (dump_file && (dump_flags & TDF_DETAILS))
3963 fprintf (dump_file, " regs_used %d\n", n);
3964
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3966 {
3967 fprintf (dump_file, " cost for size:\n");
3968 fprintf (dump_file, " ivs\tcost\n");
3969 for (j = 0; j <= 2 * target_avail_regs; j++)
3970 fprintf (dump_file, " %d\t%d\n", j,
3971 ivopts_global_cost_for_size (data, j));
3972 fprintf (dump_file, "\n");
3973 }
3974 }
3975
3976 /* Returns true if A is a cheaper cost pair than B. */
3977
3978 static bool
3979 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3980 {
3981 if (!a)
3982 return false;
3983
3984 if (!b)
3985 return true;
3986
3987 if (a->cost < b->cost)
3988 return true;
3989
3990 if (a->cost > b->cost)
3991 return false;
3992
3993 /* In case the costs are the same, prefer the cheaper candidate. */
3994 if (a->cand->cost < b->cand->cost)
3995 return true;
3996
3997 return false;
3998 }
3999
4000 /* Computes the cost field of IVS structure. */
4001
4002 static void
4003 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4004 {
4005 unsigned cost = 0;
4006
4007 cost += ivs->cand_use_cost;
4008 cost += ivs->cand_cost;
4009 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4010
4011 ivs->cost = cost;
4012 }
4013
4014 /* Remove invariants in set INVS to set IVS. */
4015
4016 static void
4017 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4018 {
4019 bitmap_iterator bi;
4020 unsigned iid;
4021
4022 if (!invs)
4023 return;
4024
4025 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4026 {
4027 ivs->n_invariant_uses[iid]--;
4028 if (ivs->n_invariant_uses[iid] == 0)
4029 ivs->n_regs--;
4030 }
4031 }
4032
4033 /* Set USE not to be expressed by any candidate in IVS. */
4034
4035 static void
4036 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4037 struct iv_use *use)
4038 {
4039 unsigned uid = use->id, cid;
4040 struct cost_pair *cp;
4041
4042 cp = ivs->cand_for_use[uid];
4043 if (!cp)
4044 return;
4045 cid = cp->cand->id;
4046
4047 ivs->bad_uses++;
4048 ivs->cand_for_use[uid] = NULL;
4049 ivs->n_cand_uses[cid]--;
4050
4051 if (ivs->n_cand_uses[cid] == 0)
4052 {
4053 bitmap_clear_bit (ivs->cands, cid);
4054 /* Do not count the pseudocandidates. */
4055 if (cp->cand->iv)
4056 ivs->n_regs--;
4057 ivs->n_cands--;
4058 ivs->cand_cost -= cp->cand->cost;
4059
4060 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4061 }
4062
4063 ivs->cand_use_cost -= cp->cost;
4064
4065 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4066 iv_ca_recount_cost (data, ivs);
4067 }
4068
4069 /* Add invariants in set INVS to set IVS. */
4070
4071 static void
4072 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4073 {
4074 bitmap_iterator bi;
4075 unsigned iid;
4076
4077 if (!invs)
4078 return;
4079
4080 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4081 {
4082 ivs->n_invariant_uses[iid]++;
4083 if (ivs->n_invariant_uses[iid] == 1)
4084 ivs->n_regs++;
4085 }
4086 }
4087
4088 /* Set cost pair for USE in set IVS to CP. */
4089
4090 static void
4091 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4092 struct iv_use *use, struct cost_pair *cp)
4093 {
4094 unsigned uid = use->id, cid;
4095
4096 if (ivs->cand_for_use[uid] == cp)
4097 return;
4098
4099 if (ivs->cand_for_use[uid])
4100 iv_ca_set_no_cp (data, ivs, use);
4101
4102 if (cp)
4103 {
4104 cid = cp->cand->id;
4105
4106 ivs->bad_uses--;
4107 ivs->cand_for_use[uid] = cp;
4108 ivs->n_cand_uses[cid]++;
4109 if (ivs->n_cand_uses[cid] == 1)
4110 {
4111 bitmap_set_bit (ivs->cands, cid);
4112 /* Do not count the pseudocandidates. */
4113 if (cp->cand->iv)
4114 ivs->n_regs++;
4115 ivs->n_cands++;
4116 ivs->cand_cost += cp->cand->cost;
4117
4118 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4119 }
4120
4121 ivs->cand_use_cost += cp->cost;
4122 iv_ca_set_add_invariants (ivs, cp->depends_on);
4123 iv_ca_recount_cost (data, ivs);
4124 }
4125 }
4126
4127 /* Extend set IVS by expressing USE by some of the candidates in it
4128 if possible. */
4129
4130 static void
4131 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4132 struct iv_use *use)
4133 {
4134 struct cost_pair *best_cp = NULL, *cp;
4135 bitmap_iterator bi;
4136 unsigned i;
4137
4138 gcc_assert (ivs->upto >= use->id);
4139
4140 if (ivs->upto == use->id)
4141 {
4142 ivs->upto++;
4143 ivs->bad_uses++;
4144 }
4145
4146 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4147 {
4148 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4149
4150 if (cheaper_cost_pair (cp, best_cp))
4151 best_cp = cp;
4152 }
4153
4154 iv_ca_set_cp (data, ivs, use, best_cp);
4155 }
4156
4157 /* Get cost for assignment IVS. */
4158
4159 static unsigned
4160 iv_ca_cost (struct iv_ca *ivs)
4161 {
4162 return (ivs->bad_uses ? INFTY : ivs->cost);
4163 }
4164
4165 /* Returns true if all dependences of CP are among invariants in IVS. */
4166
4167 static bool
4168 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4169 {
4170 unsigned i;
4171 bitmap_iterator bi;
4172
4173 if (!cp->depends_on)
4174 return true;
4175
4176 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4177 {
4178 if (ivs->n_invariant_uses[i] == 0)
4179 return false;
4180 }
4181
4182 return true;
4183 }
4184
4185 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4186 it before NEXT_CHANGE. */
4187
4188 static struct iv_ca_delta *
4189 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4190 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4191 {
4192 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4193
4194 change->use = use;
4195 change->old_cp = old_cp;
4196 change->new_cp = new_cp;
4197 change->next_change = next_change;
4198
4199 return change;
4200 }
4201
4202 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4203 are rewritten. */
4204
4205 static struct iv_ca_delta *
4206 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4207 {
4208 struct iv_ca_delta *last;
4209
4210 if (!l2)
4211 return l1;
4212
4213 if (!l1)
4214 return l2;
4215
4216 for (last = l1; last->next_change; last = last->next_change)
4217 continue;
4218 last->next_change = l2;
4219
4220 return l1;
4221 }
4222
4223 /* Returns candidate by that USE is expressed in IVS. */
4224
4225 static struct cost_pair *
4226 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4227 {
4228 return ivs->cand_for_use[use->id];
4229 }
4230
4231 /* Reverse the list of changes DELTA, forming the inverse to it. */
4232
4233 static struct iv_ca_delta *
4234 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4235 {
4236 struct iv_ca_delta *act, *next, *prev = NULL;
4237 struct cost_pair *tmp;
4238
4239 for (act = delta; act; act = next)
4240 {
4241 next = act->next_change;
4242 act->next_change = prev;
4243 prev = act;
4244
4245 tmp = act->old_cp;
4246 act->old_cp = act->new_cp;
4247 act->new_cp = tmp;
4248 }
4249
4250 return prev;
4251 }
4252
4253 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4254 reverted instead. */
4255
4256 static void
4257 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4258 struct iv_ca_delta *delta, bool forward)
4259 {
4260 struct cost_pair *from, *to;
4261 struct iv_ca_delta *act;
4262
4263 if (!forward)
4264 delta = iv_ca_delta_reverse (delta);
4265
4266 for (act = delta; act; act = act->next_change)
4267 {
4268 from = act->old_cp;
4269 to = act->new_cp;
4270 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4271 iv_ca_set_cp (data, ivs, act->use, to);
4272 }
4273
4274 if (!forward)
4275 iv_ca_delta_reverse (delta);
4276 }
4277
4278 /* Returns true if CAND is used in IVS. */
4279
4280 static bool
4281 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4282 {
4283 return ivs->n_cand_uses[cand->id] > 0;
4284 }
4285
4286 /* Returns number of induction variable candidates in the set IVS. */
4287
4288 static unsigned
4289 iv_ca_n_cands (struct iv_ca *ivs)
4290 {
4291 return ivs->n_cands;
4292 }
4293
4294 /* Free the list of changes DELTA. */
4295
4296 static void
4297 iv_ca_delta_free (struct iv_ca_delta **delta)
4298 {
4299 struct iv_ca_delta *act, *next;
4300
4301 for (act = *delta; act; act = next)
4302 {
4303 next = act->next_change;
4304 free (act);
4305 }
4306
4307 *delta = NULL;
4308 }
4309
4310 /* Allocates new iv candidates assignment. */
4311
4312 static struct iv_ca *
4313 iv_ca_new (struct ivopts_data *data)
4314 {
4315 struct iv_ca *nw = XNEW (struct iv_ca);
4316
4317 nw->upto = 0;
4318 nw->bad_uses = 0;
4319 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4320 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4321 nw->cands = BITMAP_ALLOC (NULL);
4322 nw->n_cands = 0;
4323 nw->n_regs = 0;
4324 nw->cand_use_cost = 0;
4325 nw->cand_cost = 0;
4326 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4327 nw->cost = 0;
4328
4329 return nw;
4330 }
4331
4332 /* Free memory occupied by the set IVS. */
4333
4334 static void
4335 iv_ca_free (struct iv_ca **ivs)
4336 {
4337 free ((*ivs)->cand_for_use);
4338 free ((*ivs)->n_cand_uses);
4339 BITMAP_FREE ((*ivs)->cands);
4340 free ((*ivs)->n_invariant_uses);
4341 free (*ivs);
4342 *ivs = NULL;
4343 }
4344
4345 /* Dumps IVS to FILE. */
4346
4347 static void
4348 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4349 {
4350 const char *pref = " invariants ";
4351 unsigned i;
4352
4353 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4354 bitmap_print (file, ivs->cands, " candidates ","\n");
4355
4356 for (i = 1; i <= data->max_inv_id; i++)
4357 if (ivs->n_invariant_uses[i])
4358 {
4359 fprintf (file, "%s%d", pref, i);
4360 pref = ", ";
4361 }
4362 fprintf (file, "\n");
4363 }
4364
4365 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4366 new set, and store differences in DELTA. Number of induction variables
4367 in the new set is stored to N_IVS. */
4368
4369 static unsigned
4370 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4371 struct iv_cand *cand, struct iv_ca_delta **delta,
4372 unsigned *n_ivs)
4373 {
4374 unsigned i, cost;
4375 struct iv_use *use;
4376 struct cost_pair *old_cp, *new_cp;
4377
4378 *delta = NULL;
4379 for (i = 0; i < ivs->upto; i++)
4380 {
4381 use = iv_use (data, i);
4382 old_cp = iv_ca_cand_for_use (ivs, use);
4383
4384 if (old_cp
4385 && old_cp->cand == cand)
4386 continue;
4387
4388 new_cp = get_use_iv_cost (data, use, cand);
4389 if (!new_cp)
4390 continue;
4391
4392 if (!iv_ca_has_deps (ivs, new_cp))
4393 continue;
4394
4395 if (!cheaper_cost_pair (new_cp, old_cp))
4396 continue;
4397
4398 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4399 }
4400
4401 iv_ca_delta_commit (data, ivs, *delta, true);
4402 cost = iv_ca_cost (ivs);
4403 if (n_ivs)
4404 *n_ivs = iv_ca_n_cands (ivs);
4405 iv_ca_delta_commit (data, ivs, *delta, false);
4406
4407 return cost;
4408 }
4409
4410 /* Try narrowing set IVS by removing CAND. Return the cost of
4411 the new set and store the differences in DELTA. */
4412
4413 static unsigned
4414 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4415 struct iv_cand *cand, struct iv_ca_delta **delta)
4416 {
4417 unsigned i, ci;
4418 struct iv_use *use;
4419 struct cost_pair *old_cp, *new_cp, *cp;
4420 bitmap_iterator bi;
4421 struct iv_cand *cnd;
4422 unsigned cost;
4423
4424 *delta = NULL;
4425 for (i = 0; i < n_iv_uses (data); i++)
4426 {
4427 use = iv_use (data, i);
4428
4429 old_cp = iv_ca_cand_for_use (ivs, use);
4430 if (old_cp->cand != cand)
4431 continue;
4432
4433 new_cp = NULL;
4434
4435 if (data->consider_all_candidates)
4436 {
4437 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4438 {
4439 if (ci == cand->id)
4440 continue;
4441
4442 cnd = iv_cand (data, ci);
4443
4444 cp = get_use_iv_cost (data, use, cnd);
4445 if (!cp)
4446 continue;
4447 if (!iv_ca_has_deps (ivs, cp))
4448 continue;
4449
4450 if (!cheaper_cost_pair (cp, new_cp))
4451 continue;
4452
4453 new_cp = cp;
4454 }
4455 }
4456 else
4457 {
4458 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4459 {
4460 if (ci == cand->id)
4461 continue;
4462
4463 cnd = iv_cand (data, ci);
4464
4465 cp = get_use_iv_cost (data, use, cnd);
4466 if (!cp)
4467 continue;
4468 if (!iv_ca_has_deps (ivs, cp))
4469 continue;
4470
4471 if (!cheaper_cost_pair (cp, new_cp))
4472 continue;
4473
4474 new_cp = cp;
4475 }
4476 }
4477
4478 if (!new_cp)
4479 {
4480 iv_ca_delta_free (delta);
4481 return INFTY;
4482 }
4483
4484 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4485 }
4486
4487 iv_ca_delta_commit (data, ivs, *delta, true);
4488 cost = iv_ca_cost (ivs);
4489 iv_ca_delta_commit (data, ivs, *delta, false);
4490
4491 return cost;
4492 }
4493
4494 /* Try optimizing the set of candidates IVS by removing candidates different
4495 from to EXCEPT_CAND from it. Return cost of the new set, and store
4496 differences in DELTA. */
4497
4498 static unsigned
4499 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4500 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4501 {
4502 bitmap_iterator bi;
4503 struct iv_ca_delta *act_delta, *best_delta;
4504 unsigned i, best_cost, acost;
4505 struct iv_cand *cand;
4506
4507 best_delta = NULL;
4508 best_cost = iv_ca_cost (ivs);
4509
4510 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4511 {
4512 cand = iv_cand (data, i);
4513
4514 if (cand == except_cand)
4515 continue;
4516
4517 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4518
4519 if (acost < best_cost)
4520 {
4521 best_cost = acost;
4522 iv_ca_delta_free (&best_delta);
4523 best_delta = act_delta;
4524 }
4525 else
4526 iv_ca_delta_free (&act_delta);
4527 }
4528
4529 if (!best_delta)
4530 {
4531 *delta = NULL;
4532 return best_cost;
4533 }
4534
4535 /* Recurse to possibly remove other unnecessary ivs. */
4536 iv_ca_delta_commit (data, ivs, best_delta, true);
4537 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4538 iv_ca_delta_commit (data, ivs, best_delta, false);
4539 *delta = iv_ca_delta_join (best_delta, *delta);
4540 return best_cost;
4541 }
4542
4543 /* Tries to extend the sets IVS in the best possible way in order
4544 to express the USE. */
4545
4546 static bool
4547 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4548 struct iv_use *use)
4549 {
4550 unsigned best_cost, act_cost;
4551 unsigned i;
4552 bitmap_iterator bi;
4553 struct iv_cand *cand;
4554 struct iv_ca_delta *best_delta = NULL, *act_delta;
4555 struct cost_pair *cp;
4556
4557 iv_ca_add_use (data, ivs, use);
4558 best_cost = iv_ca_cost (ivs);
4559
4560 cp = iv_ca_cand_for_use (ivs, use);
4561 if (cp)
4562 {
4563 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4564 iv_ca_set_no_cp (data, ivs, use);
4565 }
4566
4567 /* First try important candidates. Only if it fails, try the specific ones.
4568 Rationale -- in loops with many variables the best choice often is to use
4569 just one generic biv. If we added here many ivs specific to the uses,
4570 the optimization algorithm later would be likely to get stuck in a local
4571 minimum, thus causing us to create too many ivs. The approach from
4572 few ivs to more seems more likely to be successful -- starting from few
4573 ivs, replacing an expensive use by a specific iv should always be a
4574 win. */
4575 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4576 {
4577 cand = iv_cand (data, i);
4578
4579 if (iv_ca_cand_used_p (ivs, cand))
4580 continue;
4581
4582 cp = get_use_iv_cost (data, use, cand);
4583 if (!cp)
4584 continue;
4585
4586 iv_ca_set_cp (data, ivs, use, cp);
4587 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4588 iv_ca_set_no_cp (data, ivs, use);
4589 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4590
4591 if (act_cost < best_cost)
4592 {
4593 best_cost = act_cost;
4594
4595 iv_ca_delta_free (&best_delta);
4596 best_delta = act_delta;
4597 }
4598 else
4599 iv_ca_delta_free (&act_delta);
4600 }
4601
4602 if (best_cost == INFTY)
4603 {
4604 for (i = 0; i < use->n_map_members; i++)
4605 {
4606 cp = use->cost_map + i;
4607 cand = cp->cand;
4608 if (!cand)
4609 continue;
4610
4611 /* Already tried this. */
4612 if (cand->important)
4613 continue;
4614
4615 if (iv_ca_cand_used_p (ivs, cand))
4616 continue;
4617
4618 act_delta = NULL;
4619 iv_ca_set_cp (data, ivs, use, cp);
4620 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4621 iv_ca_set_no_cp (data, ivs, use);
4622 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4623 cp, act_delta);
4624
4625 if (act_cost < best_cost)
4626 {
4627 best_cost = act_cost;
4628
4629 if (best_delta)
4630 iv_ca_delta_free (&best_delta);
4631 best_delta = act_delta;
4632 }
4633 else
4634 iv_ca_delta_free (&act_delta);
4635 }
4636 }
4637
4638 iv_ca_delta_commit (data, ivs, best_delta, true);
4639 iv_ca_delta_free (&best_delta);
4640
4641 return (best_cost != INFTY);
4642 }
4643
4644 /* Finds an initial assignment of candidates to uses. */
4645
4646 static struct iv_ca *
4647 get_initial_solution (struct ivopts_data *data)
4648 {
4649 struct iv_ca *ivs = iv_ca_new (data);
4650 unsigned i;
4651
4652 for (i = 0; i < n_iv_uses (data); i++)
4653 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4654 {
4655 iv_ca_free (&ivs);
4656 return NULL;
4657 }
4658
4659 return ivs;
4660 }
4661
4662 /* Tries to improve set of induction variables IVS. */
4663
4664 static bool
4665 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4666 {
4667 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4668 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4669 struct iv_cand *cand;
4670
4671 /* Try extending the set of induction variables by one. */
4672 for (i = 0; i < n_iv_cands (data); i++)
4673 {
4674 cand = iv_cand (data, i);
4675
4676 if (iv_ca_cand_used_p (ivs, cand))
4677 continue;
4678
4679 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4680 if (!act_delta)
4681 continue;
4682
4683 /* If we successfully added the candidate and the set is small enough,
4684 try optimizing it by removing other candidates. */
4685 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4686 {
4687 iv_ca_delta_commit (data, ivs, act_delta, true);
4688 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4689 iv_ca_delta_commit (data, ivs, act_delta, false);
4690 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4691 }
4692
4693 if (acost < best_cost)
4694 {
4695 best_cost = acost;
4696 iv_ca_delta_free (&best_delta);
4697 best_delta = act_delta;
4698 }
4699 else
4700 iv_ca_delta_free (&act_delta);
4701 }
4702
4703 if (!best_delta)
4704 {
4705 /* Try removing the candidates from the set instead. */
4706 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4707
4708 /* Nothing more we can do. */
4709 if (!best_delta)
4710 return false;
4711 }
4712
4713 iv_ca_delta_commit (data, ivs, best_delta, true);
4714 gcc_assert (best_cost == iv_ca_cost (ivs));
4715 iv_ca_delta_free (&best_delta);
4716 return true;
4717 }
4718
4719 /* Attempts to find the optimal set of induction variables. We do simple
4720 greedy heuristic -- we try to replace at most one candidate in the selected
4721 solution and remove the unused ivs while this improves the cost. */
4722
4723 static struct iv_ca *
4724 find_optimal_iv_set (struct ivopts_data *data)
4725 {
4726 unsigned i;
4727 struct iv_ca *set;
4728 struct iv_use *use;
4729
4730 /* Get the initial solution. */
4731 set = get_initial_solution (data);
4732 if (!set)
4733 {
4734 if (dump_file && (dump_flags & TDF_DETAILS))
4735 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4736 return NULL;
4737 }
4738
4739 if (dump_file && (dump_flags & TDF_DETAILS))
4740 {
4741 fprintf (dump_file, "Initial set of candidates:\n");
4742 iv_ca_dump (data, dump_file, set);
4743 }
4744
4745 while (try_improve_iv_set (data, set))
4746 {
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4748 {
4749 fprintf (dump_file, "Improved to:\n");
4750 iv_ca_dump (data, dump_file, set);
4751 }
4752 }
4753
4754 if (dump_file && (dump_flags & TDF_DETAILS))
4755 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4756
4757 for (i = 0; i < n_iv_uses (data); i++)
4758 {
4759 use = iv_use (data, i);
4760 use->selected = iv_ca_cand_for_use (set, use)->cand;
4761 }
4762
4763 return set;
4764 }
4765
4766 /* Creates a new induction variable corresponding to CAND. */
4767
4768 static void
4769 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4770 {
4771 block_stmt_iterator incr_pos;
4772 tree base;
4773 bool after = false;
4774
4775 if (!cand->iv)
4776 return;
4777
4778 switch (cand->pos)
4779 {
4780 case IP_NORMAL:
4781 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4782 break;
4783
4784 case IP_END:
4785 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4786 after = true;
4787 break;
4788
4789 case IP_ORIGINAL:
4790 /* Mark that the iv is preserved. */
4791 name_info (data, cand->var_before)->preserve_biv = true;
4792 name_info (data, cand->var_after)->preserve_biv = true;
4793
4794 /* Rewrite the increment so that it uses var_before directly. */
4795 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4796
4797 return;
4798 }
4799
4800 gimple_add_tmp_var (cand->var_before);
4801 add_referenced_var (cand->var_before);
4802
4803 base = unshare_expr (cand->iv->base);
4804
4805 create_iv (base, unshare_expr (cand->iv->step),
4806 cand->var_before, data->current_loop,
4807 &incr_pos, after, &cand->var_before, &cand->var_after);
4808 }
4809
4810 /* Creates new induction variables described in SET. */
4811
4812 static void
4813 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4814 {
4815 unsigned i;
4816 struct iv_cand *cand;
4817 bitmap_iterator bi;
4818
4819 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4820 {
4821 cand = iv_cand (data, i);
4822 create_new_iv (data, cand);
4823 }
4824 }
4825
4826 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4827 is true, remove also the ssa name defined by the statement. */
4828
4829 static void
4830 remove_statement (tree stmt, bool including_defined_name)
4831 {
4832 if (TREE_CODE (stmt) == PHI_NODE)
4833 {
4834 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4835 }
4836 else
4837 {
4838 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4839
4840 bsi_remove (&bsi, true);
4841 release_defs (stmt);
4842 }
4843 }
4844
4845 /* Rewrites USE (definition of iv used in a nonlinear expression)
4846 using candidate CAND. */
4847
4848 static void
4849 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4850 struct iv_use *use, struct iv_cand *cand)
4851 {
4852 tree comp;
4853 tree op, stmts, tgt, ass;
4854 block_stmt_iterator bsi, pbsi;
4855
4856 /* An important special case -- if we are asked to express value of
4857 the original iv by itself, just exit; there is no need to
4858 introduce a new computation (that might also need casting the
4859 variable to unsigned and back). */
4860 if (cand->pos == IP_ORIGINAL
4861 && cand->incremented_at == use->stmt)
4862 {
4863 tree step, ctype, utype;
4864 enum tree_code incr_code = PLUS_EXPR;
4865
4866 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4867 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4868
4869 step = cand->iv->step;
4870 ctype = TREE_TYPE (step);
4871 utype = TREE_TYPE (cand->var_after);
4872 if (TREE_CODE (step) == NEGATE_EXPR)
4873 {
4874 incr_code = MINUS_EXPR;
4875 step = TREE_OPERAND (step, 0);
4876 }
4877
4878 /* Check whether we may leave the computation unchanged.
4879 This is the case only if it does not rely on other
4880 computations in the loop -- otherwise, the computation
4881 we rely upon may be removed in remove_unused_ivs,
4882 thus leading to ICE. */
4883 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4884 if (TREE_CODE (op) == PLUS_EXPR
4885 || TREE_CODE (op) == MINUS_EXPR)
4886 {
4887 if (TREE_OPERAND (op, 0) == cand->var_before)
4888 op = TREE_OPERAND (op, 1);
4889 else if (TREE_CODE (op) == PLUS_EXPR
4890 && TREE_OPERAND (op, 1) == cand->var_before)
4891 op = TREE_OPERAND (op, 0);
4892 else
4893 op = NULL_TREE;
4894 }
4895 else
4896 op = NULL_TREE;
4897
4898 if (op
4899 && (TREE_CODE (op) == INTEGER_CST
4900 || operand_equal_p (op, step, 0)))
4901 return;
4902
4903 /* Otherwise, add the necessary computations to express
4904 the iv. */
4905 op = fold_convert (ctype, cand->var_before);
4906 comp = fold_convert (utype,
4907 build2 (incr_code, ctype, op,
4908 unshare_expr (step)));
4909 }
4910 else
4911 {
4912 comp = get_computation (data->current_loop, use, cand);
4913 gcc_assert (comp != NULL_TREE);
4914 }
4915
4916 switch (TREE_CODE (use->stmt))
4917 {
4918 case PHI_NODE:
4919 tgt = PHI_RESULT (use->stmt);
4920
4921 /* If we should keep the biv, do not replace it. */
4922 if (name_info (data, tgt)->preserve_biv)
4923 return;
4924
4925 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4926 while (!bsi_end_p (pbsi)
4927 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4928 {
4929 bsi = pbsi;
4930 bsi_next (&pbsi);
4931 }
4932 break;
4933
4934 case GIMPLE_MODIFY_STMT:
4935 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4936 bsi = bsi_for_stmt (use->stmt);
4937 break;
4938
4939 default:
4940 gcc_unreachable ();
4941 }
4942
4943 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4944
4945 if (TREE_CODE (use->stmt) == PHI_NODE)
4946 {
4947 if (stmts)
4948 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4949 ass = build_gimple_modify_stmt (tgt, op);
4950 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4951 remove_statement (use->stmt, false);
4952 SSA_NAME_DEF_STMT (tgt) = ass;
4953 }
4954 else
4955 {
4956 if (stmts)
4957 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4958 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4959 }
4960 }
4961
4962 /* Replaces ssa name in index IDX by its basic variable. Callback for
4963 for_each_index. */
4964
4965 static bool
4966 idx_remove_ssa_names (tree base, tree *idx,
4967 void *data ATTRIBUTE_UNUSED)
4968 {
4969 tree *op;
4970
4971 if (TREE_CODE (*idx) == SSA_NAME)
4972 *idx = SSA_NAME_VAR (*idx);
4973
4974 if (TREE_CODE (base) == ARRAY_REF)
4975 {
4976 op = &TREE_OPERAND (base, 2);
4977 if (*op
4978 && TREE_CODE (*op) == SSA_NAME)
4979 *op = SSA_NAME_VAR (*op);
4980 op = &TREE_OPERAND (base, 3);
4981 if (*op
4982 && TREE_CODE (*op) == SSA_NAME)
4983 *op = SSA_NAME_VAR (*op);
4984 }
4985
4986 return true;
4987 }
4988
4989 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4990
4991 static tree
4992 unshare_and_remove_ssa_names (tree ref)
4993 {
4994 ref = unshare_expr (ref);
4995 for_each_index (&ref, idx_remove_ssa_names, NULL);
4996
4997 return ref;
4998 }
4999
5000 /* Extract the alias analysis info for the memory reference REF. There are
5001 several ways how this information may be stored and what precisely is
5002 its semantics depending on the type of the reference, but there always is
5003 somewhere hidden one _DECL node that is used to determine the set of
5004 virtual operands for the reference. The code below deciphers this jungle
5005 and extracts this single useful piece of information. */
5006
5007 static tree
5008 get_ref_tag (tree ref, tree orig)
5009 {
5010 tree var = get_base_address (ref);
5011 tree aref = NULL_TREE, tag, sv;
5012 HOST_WIDE_INT offset, size, maxsize;
5013
5014 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5015 {
5016 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5017 if (ref)
5018 break;
5019 }
5020
5021 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5022 return unshare_expr (sv);
5023
5024 if (!var)
5025 return NULL_TREE;
5026
5027 if (TREE_CODE (var) == INDIRECT_REF)
5028 {
5029 /* If the base is a dereference of a pointer, first check its name memory
5030 tag. If it does not have one, use its symbol memory tag. */
5031 var = TREE_OPERAND (var, 0);
5032 if (TREE_CODE (var) != SSA_NAME)
5033 return NULL_TREE;
5034
5035 if (SSA_NAME_PTR_INFO (var))
5036 {
5037 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5038 if (tag)
5039 return tag;
5040 }
5041
5042 var = SSA_NAME_VAR (var);
5043 tag = symbol_mem_tag (var);
5044 gcc_assert (tag != NULL_TREE);
5045 return tag;
5046 }
5047 else
5048 {
5049 if (!DECL_P (var))
5050 return NULL_TREE;
5051
5052 tag = symbol_mem_tag (var);
5053 if (tag)
5054 return tag;
5055
5056 return var;
5057 }
5058 }
5059
5060 /* Copies the reference information from OLD_REF to NEW_REF. */
5061
5062 static void
5063 copy_ref_info (tree new_ref, tree old_ref)
5064 {
5065 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5066 copy_mem_ref_info (new_ref, old_ref);
5067 else
5068 {
5069 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5070 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5071 }
5072 }
5073
5074 /* Rewrites USE (address that is an iv) using candidate CAND. */
5075
5076 static void
5077 rewrite_use_address (struct ivopts_data *data,
5078 struct iv_use *use, struct iv_cand *cand)
5079 {
5080 aff_tree aff;
5081 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5082 tree ref;
5083 bool ok;
5084
5085 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5086 gcc_assert (ok);
5087 unshare_aff_combination (&aff);
5088
5089 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5090 copy_ref_info (ref, *use->op_p);
5091 *use->op_p = ref;
5092 }
5093
5094 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5095 candidate CAND. */
5096
5097 static void
5098 rewrite_use_compare (struct ivopts_data *data,
5099 struct iv_use *use, struct iv_cand *cand)
5100 {
5101 tree comp, *var_p, op, bound;
5102 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5103 enum tree_code compare;
5104 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5105 bool ok;
5106
5107 bound = cp->value;
5108 if (bound)
5109 {
5110 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5111 tree var_type = TREE_TYPE (var);
5112
5113 compare = iv_elimination_compare (data, use);
5114 bound = unshare_expr (fold_convert (var_type, bound));
5115 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5116
5117 *use->op_p = build2 (compare, boolean_type_node, var, op);
5118 return;
5119 }
5120
5121 /* The induction variable elimination failed; just express the original
5122 giv. */
5123 comp = get_computation (data->current_loop, use, cand);
5124 gcc_assert (comp != NULL_TREE);
5125
5126 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5127 gcc_assert (ok);
5128
5129 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5130 }
5131
5132 /* Rewrites USE using candidate CAND. */
5133
5134 static void
5135 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5136 {
5137 push_stmt_changes (&use->stmt);
5138
5139 switch (use->type)
5140 {
5141 case USE_NONLINEAR_EXPR:
5142 rewrite_use_nonlinear_expr (data, use, cand);
5143 break;
5144
5145 case USE_ADDRESS:
5146 rewrite_use_address (data, use, cand);
5147 break;
5148
5149 case USE_COMPARE:
5150 rewrite_use_compare (data, use, cand);
5151 break;
5152
5153 default:
5154 gcc_unreachable ();
5155 }
5156
5157 pop_stmt_changes (&use->stmt);
5158 }
5159
5160 /* Rewrite the uses using the selected induction variables. */
5161
5162 static void
5163 rewrite_uses (struct ivopts_data *data)
5164 {
5165 unsigned i;
5166 struct iv_cand *cand;
5167 struct iv_use *use;
5168
5169 for (i = 0; i < n_iv_uses (data); i++)
5170 {
5171 use = iv_use (data, i);
5172 cand = use->selected;
5173 gcc_assert (cand);
5174
5175 rewrite_use (data, use, cand);
5176 }
5177 }
5178
5179 /* Removes the ivs that are not used after rewriting. */
5180
5181 static void
5182 remove_unused_ivs (struct ivopts_data *data)
5183 {
5184 unsigned j;
5185 bitmap_iterator bi;
5186
5187 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5188 {
5189 struct version_info *info;
5190
5191 info = ver_info (data, j);
5192 if (info->iv
5193 && !integer_zerop (info->iv->step)
5194 && !info->inv_id
5195 && !info->iv->have_use_for
5196 && !info->preserve_biv)
5197 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5198 }
5199 }
5200
5201 /* Frees data allocated by the optimization of a single loop. */
5202
5203 static void
5204 free_loop_data (struct ivopts_data *data)
5205 {
5206 unsigned i, j;
5207 bitmap_iterator bi;
5208 tree obj;
5209
5210 if (data->niters)
5211 {
5212 pointer_map_destroy (data->niters);
5213 data->niters = NULL;
5214 }
5215
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5217 {
5218 struct version_info *info;
5219
5220 info = ver_info (data, i);
5221 if (info->iv)
5222 free (info->iv);
5223 info->iv = NULL;
5224 info->has_nonlin_use = false;
5225 info->preserve_biv = false;
5226 info->inv_id = 0;
5227 }
5228 bitmap_clear (data->relevant);
5229 bitmap_clear (data->important_candidates);
5230
5231 for (i = 0; i < n_iv_uses (data); i++)
5232 {
5233 struct iv_use *use = iv_use (data, i);
5234
5235 free (use->iv);
5236 BITMAP_FREE (use->related_cands);
5237 for (j = 0; j < use->n_map_members; j++)
5238 if (use->cost_map[j].depends_on)
5239 BITMAP_FREE (use->cost_map[j].depends_on);
5240 free (use->cost_map);
5241 free (use);
5242 }
5243 VEC_truncate (iv_use_p, data->iv_uses, 0);
5244
5245 for (i = 0; i < n_iv_cands (data); i++)
5246 {
5247 struct iv_cand *cand = iv_cand (data, i);
5248
5249 if (cand->iv)
5250 free (cand->iv);
5251 if (cand->depends_on)
5252 BITMAP_FREE (cand->depends_on);
5253 free (cand);
5254 }
5255 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5256
5257 if (data->version_info_size < num_ssa_names)
5258 {
5259 data->version_info_size = 2 * num_ssa_names;
5260 free (data->version_info);
5261 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5262 }
5263
5264 data->max_inv_id = 0;
5265
5266 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5267 SET_DECL_RTL (obj, NULL_RTX);
5268
5269 VEC_truncate (tree, decl_rtl_to_reset, 0);
5270 }
5271
5272 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5273 loop tree. */
5274
5275 static void
5276 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5277 {
5278 free_loop_data (data);
5279 free (data->version_info);
5280 BITMAP_FREE (data->relevant);
5281 BITMAP_FREE (data->important_candidates);
5282
5283 VEC_free (tree, heap, decl_rtl_to_reset);
5284 VEC_free (iv_use_p, heap, data->iv_uses);
5285 VEC_free (iv_cand_p, heap, data->iv_candidates);
5286 }
5287
5288 /* Optimizes the LOOP. Returns true if anything changed. */
5289
5290 static bool
5291 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5292 {
5293 bool changed = false;
5294 struct iv_ca *iv_ca;
5295 edge exit;
5296
5297 gcc_assert (!data->niters);
5298 data->current_loop = loop;
5299
5300 if (dump_file && (dump_flags & TDF_DETAILS))
5301 {
5302 fprintf (dump_file, "Processing loop %d\n", loop->num);
5303
5304 exit = single_dom_exit (loop);
5305 if (exit)
5306 {
5307 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5308 exit->src->index, exit->dest->index);
5309 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5310 fprintf (dump_file, "\n");
5311 }
5312
5313 fprintf (dump_file, "\n");
5314 }
5315
5316 /* For each ssa name determines whether it behaves as an induction variable
5317 in some loop. */
5318 if (!find_induction_variables (data))
5319 goto finish;
5320
5321 /* Finds interesting uses (item 1). */
5322 find_interesting_uses (data);
5323 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5324 goto finish;
5325
5326 /* Finds candidates for the induction variables (item 2). */
5327 find_iv_candidates (data);
5328
5329 /* Calculates the costs (item 3, part 1). */
5330 determine_use_iv_costs (data);
5331 determine_iv_costs (data);
5332 determine_set_costs (data);
5333
5334 /* Find the optimal set of induction variables (item 3, part 2). */
5335 iv_ca = find_optimal_iv_set (data);
5336 if (!iv_ca)
5337 goto finish;
5338 changed = true;
5339
5340 /* Create the new induction variables (item 4, part 1). */
5341 create_new_ivs (data, iv_ca);
5342 iv_ca_free (&iv_ca);
5343
5344 /* Rewrite the uses (item 4, part 2). */
5345 rewrite_uses (data);
5346
5347 /* Remove the ivs that are unused after rewriting. */
5348 remove_unused_ivs (data);
5349
5350 /* We have changed the structure of induction variables; it might happen
5351 that definitions in the scev database refer to some of them that were
5352 eliminated. */
5353 scev_reset ();
5354
5355 finish:
5356 free_loop_data (data);
5357
5358 return changed;
5359 }
5360
5361 /* Main entry point. Optimizes induction variables in loops. */
5362
5363 void
5364 tree_ssa_iv_optimize (void)
5365 {
5366 struct loop *loop;
5367 struct ivopts_data data;
5368 loop_iterator li;
5369
5370 tree_ssa_iv_optimize_init (&data);
5371
5372 /* Optimize the loops starting with the innermost ones. */
5373 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5374 {
5375 if (dump_file && (dump_flags & TDF_DETAILS))
5376 flow_loop_dump (loop, dump_file, NULL, 1);
5377
5378 tree_ssa_iv_optimize_loop (&data, loop);
5379 }
5380
5381 tree_ssa_iv_optimize_finalize (&data);
5382 }