target.h (globalize_decl_name): New.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93
94 /* The infinite cost. */
95 #define INFTY 10000000
96
97 /* The expected number of loop iterations. TODO -- use profiling instead of
98 this. */
99 #define AVG_LOOP_NITER(LOOP) 5
100
101
102 /* Representation of the induction variable. */
103 struct iv
104 {
105 tree base; /* Initial value of the iv. */
106 tree base_object; /* A memory object to that the induction variable points. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
112 };
113
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
116 {
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 };
124
125 /* Types of uses. */
126 enum use_type
127 {
128 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
129 USE_ADDRESS, /* Use in an address. */
130 USE_COMPARE /* Use is a compare. */
131 };
132
133 /* The candidate - cost pair. */
134 struct cost_pair
135 {
136 struct iv_cand *cand; /* The candidate. */
137 unsigned cost; /* The cost. */
138 bitmap depends_on; /* The list of invariants that have to be
139 preserved. */
140 tree value; /* For final value elimination, the expression for
141 the final value of the iv. For iv elimination,
142 the new bound to compare with. */
143 };
144
145 /* Use. */
146 struct iv_use
147 {
148 unsigned id; /* The id of the use. */
149 enum use_type type; /* Type of the use. */
150 struct iv *iv; /* The induction variable it is based on. */
151 tree stmt; /* Statement in that it occurs. */
152 tree *op_p; /* The place where it occurs. */
153 bitmap related_cands; /* The set of "related" iv candidates, plus the common
154 important ones. */
155
156 unsigned n_map_members; /* Number of candidates in the cost_map list. */
157 struct cost_pair *cost_map;
158 /* The costs wrto the iv candidates. */
159
160 struct iv_cand *selected;
161 /* The selected candidate. */
162 };
163
164 /* The position where the iv is computed. */
165 enum iv_position
166 {
167 IP_NORMAL, /* At the end, just before the exit condition. */
168 IP_END, /* At the end of the latch block. */
169 IP_ORIGINAL /* The original biv. */
170 };
171
172 /* The induction variable candidate. */
173 struct iv_cand
174 {
175 unsigned id; /* The number of the candidate. */
176 bool important; /* Whether this is an "important" candidate, i.e. such
177 that it should be considered by all uses. */
178 enum iv_position pos; /* Where it is computed. */
179 tree incremented_at; /* For original biv, the statement where it is
180 incremented. */
181 tree var_before; /* The variable used for it before increment. */
182 tree var_after; /* The variable used for it after increment. */
183 struct iv *iv; /* The value of the candidate. NULL for
184 "pseudocandidate" used to indicate the possibility
185 to replace the final value of an iv by direct
186 computation of the value. */
187 unsigned cost; /* Cost of the candidate. */
188 bitmap depends_on; /* The list of invariants that are used in step of the
189 biv. */
190 };
191
192 /* The data used by the induction variable optimizations. */
193
194 typedef struct iv_use *iv_use_p;
195 DEF_VEC_P(iv_use_p);
196 DEF_VEC_ALLOC_P(iv_use_p,heap);
197
198 typedef struct iv_cand *iv_cand_p;
199 DEF_VEC_P(iv_cand_p);
200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201
202 struct ivopts_data
203 {
204 /* The currently optimized loop. */
205 struct loop *current_loop;
206
207 /* Number of registers used in it. */
208 unsigned regs_used;
209
210 /* Numbers of iterations for all exits of the current loop. */
211 htab_t niters;
212
213 /* The size of version_info array allocated. */
214 unsigned version_info_size;
215
216 /* The array of information for the ssa names. */
217 struct version_info *version_info;
218
219 /* The bitmap of indices in version_info whose value was changed. */
220 bitmap relevant;
221
222 /* The maximum invariant id. */
223 unsigned max_inv_id;
224
225 /* The uses of induction variables. */
226 VEC(iv_use_p,heap) *iv_uses;
227
228 /* The candidates. */
229 VEC(iv_cand_p,heap) *iv_candidates;
230
231 /* A bitmap of important candidates. */
232 bitmap important_candidates;
233
234 /* Whether to consider just related and important candidates when replacing a
235 use. */
236 bool consider_all_candidates;
237 };
238
239 /* An assignment of iv candidates to uses. */
240
241 struct iv_ca
242 {
243 /* The number of uses covered by the assignment. */
244 unsigned upto;
245
246 /* Number of uses that cannot be expressed by the candidates in the set. */
247 unsigned bad_uses;
248
249 /* Candidate assigned to a use, together with the related costs. */
250 struct cost_pair **cand_for_use;
251
252 /* Number of times each candidate is used. */
253 unsigned *n_cand_uses;
254
255 /* The candidates used. */
256 bitmap cands;
257
258 /* The number of candidates in the set. */
259 unsigned n_cands;
260
261 /* Total number of registers needed. */
262 unsigned n_regs;
263
264 /* Total cost of expressing uses. */
265 unsigned cand_use_cost;
266
267 /* Total cost of candidates. */
268 unsigned cand_cost;
269
270 /* Number of times each invariant is used. */
271 unsigned *n_invariant_uses;
272
273 /* Total cost of the assignment. */
274 unsigned cost;
275 };
276
277 /* Difference of two iv candidate assignments. */
278
279 struct iv_ca_delta
280 {
281 /* Changed use. */
282 struct iv_use *use;
283
284 /* An old assignment (for rollback purposes). */
285 struct cost_pair *old_cp;
286
287 /* A new assignment. */
288 struct cost_pair *new_cp;
289
290 /* Next change in the list. */
291 struct iv_ca_delta *next_change;
292 };
293
294 /* Bound on number of candidates below that all candidates are considered. */
295
296 #define CONSIDER_ALL_CANDIDATES_BOUND \
297 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298
299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
300 optimizing such a loop would help, and it would take ages). */
301
302 #define MAX_CONSIDERED_USES \
303 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304
305 /* If there are at most this number of ivs in the set, try removing unnecessary
306 ivs from the set always. */
307
308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
309 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310
311 /* The list of trees for that the decl_rtl field must be reset is stored
312 here. */
313
314 static VEC(tree,heap) *decl_rtl_to_reset;
315
316 /* Number of uses recorded in DATA. */
317
318 static inline unsigned
319 n_iv_uses (struct ivopts_data *data)
320 {
321 return VEC_length (iv_use_p, data->iv_uses);
322 }
323
324 /* Ith use recorded in DATA. */
325
326 static inline struct iv_use *
327 iv_use (struct ivopts_data *data, unsigned i)
328 {
329 return VEC_index (iv_use_p, data->iv_uses, i);
330 }
331
332 /* Number of candidates recorded in DATA. */
333
334 static inline unsigned
335 n_iv_cands (struct ivopts_data *data)
336 {
337 return VEC_length (iv_cand_p, data->iv_candidates);
338 }
339
340 /* Ith candidate recorded in DATA. */
341
342 static inline struct iv_cand *
343 iv_cand (struct ivopts_data *data, unsigned i)
344 {
345 return VEC_index (iv_cand_p, data->iv_candidates, i);
346 }
347
348 /* The single loop exit if it dominates the latch, NULL otherwise. */
349
350 edge
351 single_dom_exit (struct loop *loop)
352 {
353 edge exit = single_exit (loop);
354
355 if (!exit)
356 return NULL;
357
358 if (!just_once_each_iteration_p (loop, exit->src))
359 return NULL;
360
361 return exit;
362 }
363
364 /* Dumps information about the induction variable IV to FILE. */
365
366 extern void dump_iv (FILE *, struct iv *);
367 void
368 dump_iv (FILE *file, struct iv *iv)
369 {
370 if (iv->ssa_name)
371 {
372 fprintf (file, "ssa name ");
373 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
374 fprintf (file, "\n");
375 }
376
377 fprintf (file, " type ");
378 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
379 fprintf (file, "\n");
380
381 if (iv->step)
382 {
383 fprintf (file, " base ");
384 print_generic_expr (file, iv->base, TDF_SLIM);
385 fprintf (file, "\n");
386
387 fprintf (file, " step ");
388 print_generic_expr (file, iv->step, TDF_SLIM);
389 fprintf (file, "\n");
390 }
391 else
392 {
393 fprintf (file, " invariant ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
396 }
397
398 if (iv->base_object)
399 {
400 fprintf (file, " base object ");
401 print_generic_expr (file, iv->base_object, TDF_SLIM);
402 fprintf (file, "\n");
403 }
404
405 if (iv->biv_p)
406 fprintf (file, " is a biv\n");
407 }
408
409 /* Dumps information about the USE to FILE. */
410
411 extern void dump_use (FILE *, struct iv_use *);
412 void
413 dump_use (FILE *file, struct iv_use *use)
414 {
415 fprintf (file, "use %d\n", use->id);
416
417 switch (use->type)
418 {
419 case USE_NONLINEAR_EXPR:
420 fprintf (file, " generic\n");
421 break;
422
423 case USE_ADDRESS:
424 fprintf (file, " address\n");
425 break;
426
427 case USE_COMPARE:
428 fprintf (file, " compare\n");
429 break;
430
431 default:
432 gcc_unreachable ();
433 }
434
435 fprintf (file, " in statement ");
436 print_generic_expr (file, use->stmt, TDF_SLIM);
437 fprintf (file, "\n");
438
439 fprintf (file, " at position ");
440 if (use->op_p)
441 print_generic_expr (file, *use->op_p, TDF_SLIM);
442 fprintf (file, "\n");
443
444 dump_iv (file, use->iv);
445
446 if (use->related_cands)
447 {
448 fprintf (file, " related candidates ");
449 dump_bitmap (file, use->related_cands);
450 }
451 }
452
453 /* Dumps information about the uses to FILE. */
454
455 extern void dump_uses (FILE *, struct ivopts_data *);
456 void
457 dump_uses (FILE *file, struct ivopts_data *data)
458 {
459 unsigned i;
460 struct iv_use *use;
461
462 for (i = 0; i < n_iv_uses (data); i++)
463 {
464 use = iv_use (data, i);
465
466 dump_use (file, use);
467 fprintf (file, "\n");
468 }
469 }
470
471 /* Dumps information about induction variable candidate CAND to FILE. */
472
473 extern void dump_cand (FILE *, struct iv_cand *);
474 void
475 dump_cand (FILE *file, struct iv_cand *cand)
476 {
477 struct iv *iv = cand->iv;
478
479 fprintf (file, "candidate %d%s\n",
480 cand->id, cand->important ? " (important)" : "");
481
482 if (cand->depends_on)
483 {
484 fprintf (file, " depends on ");
485 dump_bitmap (file, cand->depends_on);
486 }
487
488 if (!iv)
489 {
490 fprintf (file, " final value replacement\n");
491 return;
492 }
493
494 switch (cand->pos)
495 {
496 case IP_NORMAL:
497 fprintf (file, " incremented before exit test\n");
498 break;
499
500 case IP_END:
501 fprintf (file, " incremented at end\n");
502 break;
503
504 case IP_ORIGINAL:
505 fprintf (file, " original biv\n");
506 break;
507 }
508
509 dump_iv (file, iv);
510 }
511
512 /* Returns the info for ssa version VER. */
513
514 static inline struct version_info *
515 ver_info (struct ivopts_data *data, unsigned ver)
516 {
517 return data->version_info + ver;
518 }
519
520 /* Returns the info for ssa name NAME. */
521
522 static inline struct version_info *
523 name_info (struct ivopts_data *data, tree name)
524 {
525 return ver_info (data, SSA_NAME_VERSION (name));
526 }
527
528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
529 emitted in LOOP. */
530
531 static bool
532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
533 {
534 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
535
536 gcc_assert (bb);
537
538 if (sbb == loop->latch)
539 return true;
540
541 if (sbb != bb)
542 return false;
543
544 return stmt == last_stmt (bb);
545 }
546
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
549
550 static bool
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
552 {
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
556
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
558 return false;
559
560 if (stmt_bb != cand_bb)
561 return true;
562
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
566 {
567 if (bsi_stmt (bsi) == cand->incremented_at)
568 return false;
569 if (bsi_stmt (bsi) == stmt)
570 return true;
571 }
572 }
573
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
576
577 static bool
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
579 {
580 switch (cand->pos)
581 {
582 case IP_END:
583 return false;
584
585 case IP_NORMAL:
586 return stmt_after_ip_normal_pos (loop, stmt);
587
588 case IP_ORIGINAL:
589 return stmt_after_ip_original_pos (cand, stmt);
590
591 default:
592 gcc_unreachable ();
593 }
594 }
595
596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
597
598 static bool
599 abnormal_ssa_name_p (tree exp)
600 {
601 if (!exp)
602 return false;
603
604 if (TREE_CODE (exp) != SSA_NAME)
605 return false;
606
607 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
608 }
609
610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
611 abnormal phi node. Callback for for_each_index. */
612
613 static bool
614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
615 void *data ATTRIBUTE_UNUSED)
616 {
617 if (TREE_CODE (base) == ARRAY_REF)
618 {
619 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
620 return false;
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
622 return false;
623 }
624
625 return !abnormal_ssa_name_p (*index);
626 }
627
628 /* Returns true if EXPR contains a ssa name that occurs in an
629 abnormal phi node. */
630
631 bool
632 contains_abnormal_ssa_name_p (tree expr)
633 {
634 enum tree_code code;
635 enum tree_code_class class;
636
637 if (!expr)
638 return false;
639
640 code = TREE_CODE (expr);
641 class = TREE_CODE_CLASS (code);
642
643 if (code == SSA_NAME)
644 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
645
646 if (code == INTEGER_CST
647 || is_gimple_min_invariant (expr))
648 return false;
649
650 if (code == ADDR_EXPR)
651 return !for_each_index (&TREE_OPERAND (expr, 0),
652 idx_contains_abnormal_ssa_name_p,
653 NULL);
654
655 switch (class)
656 {
657 case tcc_binary:
658 case tcc_comparison:
659 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
660 return true;
661
662 /* Fallthru. */
663 case tcc_unary:
664 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
665 return true;
666
667 break;
668
669 default:
670 gcc_unreachable ();
671 }
672
673 return false;
674 }
675
676 /* Element of the table in that we cache the numbers of iterations obtained
677 from exits of the loop. */
678
679 struct nfe_cache_elt
680 {
681 /* The edge for that the number of iterations is cached. */
682 edge exit;
683
684 /* Number of iterations corresponding to this exit, or NULL if it cannot be
685 determined. */
686 tree niter;
687 };
688
689 /* Hash function for nfe_cache_elt E. */
690
691 static hashval_t
692 nfe_hash (const void *e)
693 {
694 const struct nfe_cache_elt *elt = e;
695
696 return htab_hash_pointer (elt->exit);
697 }
698
699 /* Equality function for nfe_cache_elt E1 and edge E2. */
700
701 static int
702 nfe_eq (const void *e1, const void *e2)
703 {
704 const struct nfe_cache_elt *elt1 = e1;
705
706 return elt1->exit == e2;
707 }
708
709 /* Returns tree describing number of iterations determined from
710 EXIT of DATA->current_loop, or NULL if something goes wrong. */
711
712 static tree
713 niter_for_exit (struct ivopts_data *data, edge exit)
714 {
715 struct nfe_cache_elt *nfe_desc;
716 struct tree_niter_desc desc;
717 PTR *slot;
718
719 slot = htab_find_slot_with_hash (data->niters, exit,
720 htab_hash_pointer (exit),
721 INSERT);
722
723 if (!*slot)
724 {
725 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
726 nfe_desc->exit = exit;
727
728 /* Try to determine number of iterations. We must know it
729 unconditionally (i.e., without possibility of # of iterations
730 being zero). Also, we cannot safely work with ssa names that
731 appear in phi nodes on abnormal edges, so that we do not create
732 overlapping life ranges for them (PR 27283). */
733 if (number_of_iterations_exit (data->current_loop,
734 exit, &desc, true)
735 && integer_zerop (desc.may_be_zero)
736 && !contains_abnormal_ssa_name_p (desc.niter))
737 nfe_desc->niter = desc.niter;
738 else
739 nfe_desc->niter = NULL_TREE;
740 }
741 else
742 nfe_desc = *slot;
743
744 return nfe_desc->niter;
745 }
746
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
750
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
753 {
754 edge exit = single_dom_exit (data->current_loop);
755
756 if (!exit)
757 return NULL;
758
759 return niter_for_exit (data, exit);
760 }
761
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
764
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 {
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
777 }
778
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
781
782 static tree
783 determine_base_object (tree expr)
784 {
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj, op0, op1;
787
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (TREE_CODE (expr) == NOP_EXPR
792 || TREE_CODE (expr) == CONVERT_EXPR)
793 return determine_base_object (TREE_OPERAND (expr, 0));
794
795 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
796 return NULL_TREE;
797
798 switch (code)
799 {
800 case INTEGER_CST:
801 return NULL_TREE;
802
803 case ADDR_EXPR:
804 obj = TREE_OPERAND (expr, 0);
805 base = get_base_address (obj);
806
807 if (!base)
808 return expr;
809
810 if (TREE_CODE (base) == INDIRECT_REF)
811 return determine_base_object (TREE_OPERAND (base, 0));
812
813 return fold_convert (ptr_type_node,
814 build_fold_addr_expr (base));
815
816 case PLUS_EXPR:
817 case MINUS_EXPR:
818 op0 = determine_base_object (TREE_OPERAND (expr, 0));
819 op1 = determine_base_object (TREE_OPERAND (expr, 1));
820
821 if (!op1)
822 return op0;
823
824 if (!op0)
825 return (code == PLUS_EXPR
826 ? op1
827 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
828
829 return fold_build2 (code, ptr_type_node, op0, op1);
830
831 default:
832 return fold_convert (ptr_type_node, expr);
833 }
834 }
835
836 /* Allocates an induction variable with given initial value BASE and step STEP
837 for loop LOOP. */
838
839 static struct iv *
840 alloc_iv (tree base, tree step)
841 {
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
844
845 iv->base = base;
846 iv->base_object = determine_base_object (base);
847 iv->step = step;
848 iv->biv_p = false;
849 iv->have_use_for = false;
850 iv->use_id = 0;
851 iv->ssa_name = NULL_TREE;
852
853 return iv;
854 }
855
856 /* Sets STEP and BASE for induction variable IV. */
857
858 static void
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
860 {
861 struct version_info *info = name_info (data, iv);
862
863 gcc_assert (!info->iv);
864
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
868 }
869
870 /* Finds induction variable declaration for VAR. */
871
872 static struct iv *
873 get_iv (struct ivopts_data *data, tree var)
874 {
875 basic_block bb;
876 tree type = TREE_TYPE (var);
877
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
880 return NULL;
881
882 if (!name_info (data, var)->iv)
883 {
884 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
885
886 if (!bb
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
889 }
890
891 return name_info (data, var)->iv;
892 }
893
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
896
897 static tree
898 determine_biv_step (tree phi)
899 {
900 struct loop *loop = bb_for_stmt (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
902 affine_iv iv;
903
904 if (!is_gimple_reg (name))
905 return NULL_TREE;
906
907 if (!simple_iv (loop, phi, name, &iv, true))
908 return NULL_TREE;
909
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
911 }
912
913 /* Finds basic ivs. */
914
915 static bool
916 find_bivs (struct ivopts_data *data)
917 {
918 tree phi, step, type, base;
919 bool found = false;
920 struct loop *loop = data->current_loop;
921
922 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
923 {
924 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
925 continue;
926
927 step = determine_biv_step (phi);
928 if (!step)
929 continue;
930
931 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
932 base = expand_simple_operations (base);
933 if (contains_abnormal_ssa_name_p (base)
934 || contains_abnormal_ssa_name_p (step))
935 continue;
936
937 type = TREE_TYPE (PHI_RESULT (phi));
938 base = fold_convert (type, base);
939 if (step)
940 step = fold_convert (type, step);
941
942 set_iv (data, PHI_RESULT (phi), base, step);
943 found = true;
944 }
945
946 return found;
947 }
948
949 /* Marks basic ivs. */
950
951 static void
952 mark_bivs (struct ivopts_data *data)
953 {
954 tree phi, var;
955 struct iv *iv, *incr_iv;
956 struct loop *loop = data->current_loop;
957 basic_block incr_bb;
958
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
960 {
961 iv = get_iv (data, PHI_RESULT (phi));
962 if (!iv)
963 continue;
964
965 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
966 incr_iv = get_iv (data, var);
967 if (!incr_iv)
968 continue;
969
970 /* If the increment is in the subloop, ignore it. */
971 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
972 if (incr_bb->loop_father != data->current_loop
973 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
974 continue;
975
976 iv->biv_p = true;
977 incr_iv->biv_p = true;
978 }
979 }
980
981 /* Checks whether STMT defines a linear induction variable and stores its
982 parameters to IV. */
983
984 static bool
985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
986 {
987 tree lhs;
988 struct loop *loop = data->current_loop;
989
990 iv->base = NULL_TREE;
991 iv->step = NULL_TREE;
992
993 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
994 return false;
995
996 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
997 if (TREE_CODE (lhs) != SSA_NAME)
998 return false;
999
1000 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001 return false;
1002 iv->base = expand_simple_operations (iv->base);
1003
1004 if (contains_abnormal_ssa_name_p (iv->base)
1005 || contains_abnormal_ssa_name_p (iv->step))
1006 return false;
1007
1008 return true;
1009 }
1010
1011 /* Finds general ivs in statement STMT. */
1012
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1015 {
1016 affine_iv iv;
1017
1018 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019 return;
1020
1021 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1022 }
1023
1024 /* Finds general ivs in basic block BB. */
1025
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1028 {
1029 block_stmt_iterator bsi;
1030
1031 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032 find_givs_in_stmt (data, bsi_stmt (bsi));
1033 }
1034
1035 /* Finds general ivs. */
1036
1037 static void
1038 find_givs (struct ivopts_data *data)
1039 {
1040 struct loop *loop = data->current_loop;
1041 basic_block *body = get_loop_body_in_dom_order (loop);
1042 unsigned i;
1043
1044 for (i = 0; i < loop->num_nodes; i++)
1045 find_givs_in_bb (data, body[i]);
1046 free (body);
1047 }
1048
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050 variable and if so, its initial value and step. */
1051
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1054 {
1055 unsigned i;
1056 bitmap_iterator bi;
1057
1058 if (!find_bivs (data))
1059 return false;
1060
1061 find_givs (data);
1062 mark_bivs (data);
1063
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 {
1066 tree niter = niter_for_single_dom_exit (data);
1067
1068 if (niter)
1069 {
1070 fprintf (dump_file, " number of iterations ");
1071 print_generic_expr (dump_file, niter, TDF_SLIM);
1072 fprintf (dump_file, "\n\n");
1073 };
1074
1075 fprintf (dump_file, "Induction variables:\n\n");
1076
1077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1078 {
1079 if (ver_info (data, i)->iv)
1080 dump_iv (dump_file, ver_info (data, i)->iv);
1081 }
1082 }
1083
1084 return true;
1085 }
1086
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1088
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091 tree stmt, enum use_type use_type)
1092 {
1093 struct iv_use *use = XCNEW (struct iv_use);
1094
1095 use->id = n_iv_uses (data);
1096 use->type = use_type;
1097 use->iv = iv;
1098 use->stmt = stmt;
1099 use->op_p = use_p;
1100 use->related_cands = BITMAP_ALLOC (NULL);
1101
1102 /* To avoid showing ssa name in the dumps, if it was not reset by the
1103 caller. */
1104 iv->ssa_name = NULL_TREE;
1105
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1107 dump_use (dump_file, use);
1108
1109 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1110
1111 return use;
1112 }
1113
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115 NONLINEAR_USE is true if the invariant is used in a way we do not
1116 handle specially. */
1117
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1120 {
1121 basic_block bb;
1122 struct version_info *info;
1123
1124 if (TREE_CODE (op) != SSA_NAME
1125 || !is_gimple_reg (op))
1126 return;
1127
1128 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129 if (bb
1130 && flow_bb_inside_loop_p (data->current_loop, bb))
1131 return;
1132
1133 info = name_info (data, op);
1134 info->name = op;
1135 info->has_nonlin_use |= nonlinear_use;
1136 if (!info->inv_id)
1137 info->inv_id = ++data->max_inv_id;
1138 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1139 }
1140
1141 /* Checks whether the use OP is interesting and if so, records it. */
1142
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1145 {
1146 struct iv *iv;
1147 struct iv *civ;
1148 tree stmt;
1149 struct iv_use *use;
1150
1151 if (TREE_CODE (op) != SSA_NAME)
1152 return NULL;
1153
1154 iv = get_iv (data, op);
1155 if (!iv)
1156 return NULL;
1157
1158 if (iv->have_use_for)
1159 {
1160 use = iv_use (data, iv->use_id);
1161
1162 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163 return use;
1164 }
1165
1166 if (integer_zerop (iv->step))
1167 {
1168 record_invariant (data, op, true);
1169 return NULL;
1170 }
1171 iv->have_use_for = true;
1172
1173 civ = XNEW (struct iv);
1174 *civ = *iv;
1175
1176 stmt = SSA_NAME_DEF_STMT (op);
1177 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1179
1180 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181 iv->use_id = use->id;
1182
1183 return use;
1184 }
1185
1186 /* Given a condition *COND_P, checks whether it is a compare of an induction
1187 variable and an invariant. If this is the case, CONTROL_VAR is set
1188 to location of the iv, BOUND to the location of the invariant,
1189 IV_VAR and IV_BOUND are set to the corresponding induction variable
1190 descriptions, and true is returned. If this is not the case,
1191 CONTROL_VAR and BOUND are set to the arguments of the condition and
1192 false is returned. */
1193
1194 static bool
1195 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1196 tree **control_var, tree **bound,
1197 struct iv **iv_var, struct iv **iv_bound)
1198 {
1199 /* The nodes returned when COND has just one operand. Note that you should
1200 not modify anything in BOUND or IV_BOUND because of this. */
1201 static struct iv const_iv;
1202 static tree zero;
1203 tree cond = *cond_p;
1204 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1205 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1206 bool ret = false;
1207
1208 zero = integer_zero_node;
1209 const_iv.step = integer_zero_node;
1210
1211 if (TREE_CODE (cond) == SSA_NAME)
1212 {
1213 op0 = cond_p;
1214 iv0 = get_iv (data, cond);
1215 ret = (iv0 && !integer_zerop (iv0->step));
1216 goto end;
1217 }
1218
1219 if (!COMPARISON_CLASS_P (cond))
1220 {
1221 op0 = cond_p;
1222 goto end;
1223 }
1224
1225 op0 = &TREE_OPERAND (cond, 0);
1226 op1 = &TREE_OPERAND (cond, 1);
1227 if (TREE_CODE (*op0) == SSA_NAME)
1228 iv0 = get_iv (data, *op0);
1229 if (TREE_CODE (*op1) == SSA_NAME)
1230 iv1 = get_iv (data, *op1);
1231
1232 /* Exactly one of the compared values must be an iv, and the other one must
1233 be an invariant. */
1234 if (!iv0 || !iv1)
1235 goto end;
1236
1237 if (integer_zerop (iv0->step))
1238 {
1239 /* Control variable may be on the other side. */
1240 tmp_op = op0; op0 = op1; op1 = tmp_op;
1241 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1242 }
1243 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1244
1245 end:
1246 if (control_var)
1247 *control_var = op0;;
1248 if (iv_var)
1249 *iv_var = iv0;;
1250 if (bound)
1251 *bound = op1;
1252 if (iv_bound)
1253 *iv_bound = iv1;
1254
1255 return ret;
1256 }
1257
1258 /* Checks whether the condition *COND_P in STMT is interesting
1259 and if so, records it. */
1260
1261 static void
1262 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1263 {
1264 tree *var_p, *bound_p;
1265 struct iv *var_iv, *civ;
1266
1267 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1268 {
1269 find_interesting_uses_op (data, *var_p);
1270 find_interesting_uses_op (data, *bound_p);
1271 return;
1272 }
1273
1274 civ = XNEW (struct iv);
1275 *civ = *var_iv;
1276 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1277 }
1278
1279 /* Returns true if expression EXPR is obviously invariant in LOOP,
1280 i.e. if all its operands are defined outside of the LOOP. */
1281
1282 bool
1283 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1284 {
1285 basic_block def_bb;
1286 unsigned i, len;
1287
1288 if (is_gimple_min_invariant (expr))
1289 return true;
1290
1291 if (TREE_CODE (expr) == SSA_NAME)
1292 {
1293 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1294 if (def_bb
1295 && flow_bb_inside_loop_p (loop, def_bb))
1296 return false;
1297
1298 return true;
1299 }
1300
1301 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1302 return false;
1303
1304 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1305 for (i = 0; i < len; i++)
1306 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1307 return false;
1308
1309 return true;
1310 }
1311
1312 /* Cumulates the steps of indices into DATA and replaces their values with the
1313 initial ones. Returns false when the value of the index cannot be determined.
1314 Callback for for_each_index. */
1315
1316 struct ifs_ivopts_data
1317 {
1318 struct ivopts_data *ivopts_data;
1319 tree stmt;
1320 tree step;
1321 };
1322
1323 static bool
1324 idx_find_step (tree base, tree *idx, void *data)
1325 {
1326 struct ifs_ivopts_data *dta = data;
1327 struct iv *iv;
1328 tree step, iv_base, iv_step, lbound, off;
1329 struct loop *loop = dta->ivopts_data->current_loop;
1330
1331 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1332 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1333 return false;
1334
1335 /* If base is a component ref, require that the offset of the reference
1336 be invariant. */
1337 if (TREE_CODE (base) == COMPONENT_REF)
1338 {
1339 off = component_ref_field_offset (base);
1340 return expr_invariant_in_loop_p (loop, off);
1341 }
1342
1343 /* If base is array, first check whether we will be able to move the
1344 reference out of the loop (in order to take its address in strength
1345 reduction). In order for this to work we need both lower bound
1346 and step to be loop invariants. */
1347 if (TREE_CODE (base) == ARRAY_REF)
1348 {
1349 step = array_ref_element_size (base);
1350 lbound = array_ref_low_bound (base);
1351
1352 if (!expr_invariant_in_loop_p (loop, step)
1353 || !expr_invariant_in_loop_p (loop, lbound))
1354 return false;
1355 }
1356
1357 if (TREE_CODE (*idx) != SSA_NAME)
1358 return true;
1359
1360 iv = get_iv (dta->ivopts_data, *idx);
1361 if (!iv)
1362 return false;
1363
1364 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1365 *&x[0], which is not folded and does not trigger the
1366 ARRAY_REF path below. */
1367 *idx = iv->base;
1368
1369 if (integer_zerop (iv->step))
1370 return true;
1371
1372 if (TREE_CODE (base) == ARRAY_REF)
1373 {
1374 step = array_ref_element_size (base);
1375
1376 /* We only handle addresses whose step is an integer constant. */
1377 if (TREE_CODE (step) != INTEGER_CST)
1378 return false;
1379 }
1380 else
1381 /* The step for pointer arithmetics already is 1 byte. */
1382 step = build_int_cst (sizetype, 1);
1383
1384 iv_base = iv->base;
1385 iv_step = iv->step;
1386 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1387 sizetype, &iv_base, &iv_step, dta->stmt,
1388 false))
1389 {
1390 /* The index might wrap. */
1391 return false;
1392 }
1393
1394 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1395 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1396
1397 return true;
1398 }
1399
1400 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1401 object is passed to it in DATA. */
1402
1403 static bool
1404 idx_record_use (tree base, tree *idx,
1405 void *data)
1406 {
1407 find_interesting_uses_op (data, *idx);
1408 if (TREE_CODE (base) == ARRAY_REF)
1409 {
1410 find_interesting_uses_op (data, array_ref_element_size (base));
1411 find_interesting_uses_op (data, array_ref_low_bound (base));
1412 }
1413 return true;
1414 }
1415
1416 /* Returns true if memory reference REF may be unaligned. */
1417
1418 static bool
1419 may_be_unaligned_p (tree ref)
1420 {
1421 tree base;
1422 tree base_type;
1423 HOST_WIDE_INT bitsize;
1424 HOST_WIDE_INT bitpos;
1425 tree toffset;
1426 enum machine_mode mode;
1427 int unsignedp, volatilep;
1428 unsigned base_align;
1429
1430 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1431 thus they are not misaligned. */
1432 if (TREE_CODE (ref) == TARGET_MEM_REF)
1433 return false;
1434
1435 /* The test below is basically copy of what expr.c:normal_inner_ref
1436 does to check whether the object must be loaded by parts when
1437 STRICT_ALIGNMENT is true. */
1438 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1439 &unsignedp, &volatilep, true);
1440 base_type = TREE_TYPE (base);
1441 base_align = TYPE_ALIGN (base_type);
1442
1443 if (mode != BLKmode
1444 && (base_align < GET_MODE_ALIGNMENT (mode)
1445 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1446 || bitpos % BITS_PER_UNIT != 0))
1447 return true;
1448
1449 return false;
1450 }
1451
1452 /* Return true if EXPR may be non-addressable. */
1453
1454 static bool
1455 may_be_nonaddressable_p (tree expr)
1456 {
1457 switch (TREE_CODE (expr))
1458 {
1459 case COMPONENT_REF:
1460 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1461 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1462
1463 case ARRAY_REF:
1464 case ARRAY_RANGE_REF:
1465 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1466
1467 case VIEW_CONVERT_EXPR:
1468 /* This kind of view-conversions may wrap non-addressable objects
1469 and make them look addressable. After some processing the
1470 non-addressability may be uncovered again, causing ADDR_EXPRs
1471 of inappropriate objects to be built. */
1472 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1473 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1474
1475 default:
1476 break;
1477 }
1478
1479 return false;
1480 }
1481
1482 /* Finds addresses in *OP_P inside STMT. */
1483
1484 static void
1485 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1486 {
1487 tree base = *op_p, step = build_int_cst (sizetype, 0);
1488 struct iv *civ;
1489 struct ifs_ivopts_data ifs_ivopts_data;
1490
1491 /* Do not play with volatile memory references. A bit too conservative,
1492 perhaps, but safe. */
1493 if (stmt_ann (stmt)->has_volatile_ops)
1494 goto fail;
1495
1496 /* Ignore bitfields for now. Not really something terribly complicated
1497 to handle. TODO. */
1498 if (TREE_CODE (base) == BIT_FIELD_REF)
1499 goto fail;
1500
1501 if (may_be_nonaddressable_p (base))
1502 goto fail;
1503
1504 if (STRICT_ALIGNMENT
1505 && may_be_unaligned_p (base))
1506 goto fail;
1507
1508 base = unshare_expr (base);
1509
1510 if (TREE_CODE (base) == TARGET_MEM_REF)
1511 {
1512 tree type = build_pointer_type (TREE_TYPE (base));
1513 tree astep;
1514
1515 if (TMR_BASE (base)
1516 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1517 {
1518 civ = get_iv (data, TMR_BASE (base));
1519 if (!civ)
1520 goto fail;
1521
1522 TMR_BASE (base) = civ->base;
1523 step = civ->step;
1524 }
1525 if (TMR_INDEX (base)
1526 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1527 {
1528 civ = get_iv (data, TMR_INDEX (base));
1529 if (!civ)
1530 goto fail;
1531
1532 TMR_INDEX (base) = civ->base;
1533 astep = civ->step;
1534
1535 if (astep)
1536 {
1537 if (TMR_STEP (base))
1538 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1539
1540 step = fold_build2 (PLUS_EXPR, type, step, astep);
1541 }
1542 }
1543
1544 if (integer_zerop (step))
1545 goto fail;
1546 base = tree_mem_ref_addr (type, base);
1547 }
1548 else
1549 {
1550 ifs_ivopts_data.ivopts_data = data;
1551 ifs_ivopts_data.stmt = stmt;
1552 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1553 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1554 || integer_zerop (ifs_ivopts_data.step))
1555 goto fail;
1556 step = ifs_ivopts_data.step;
1557
1558 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1559 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1560
1561 base = build_fold_addr_expr (base);
1562
1563 /* Substituting bases of IVs into the base expression might
1564 have caused folding opportunities. */
1565 if (TREE_CODE (base) == ADDR_EXPR)
1566 {
1567 tree *ref = &TREE_OPERAND (base, 0);
1568 while (handled_component_p (*ref))
1569 ref = &TREE_OPERAND (*ref, 0);
1570 if (TREE_CODE (*ref) == INDIRECT_REF)
1571 *ref = fold_indirect_ref (*ref);
1572 }
1573 }
1574
1575 civ = alloc_iv (base, step);
1576 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1577 return;
1578
1579 fail:
1580 for_each_index (op_p, idx_record_use, data);
1581 }
1582
1583 /* Finds and records invariants used in STMT. */
1584
1585 static void
1586 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1587 {
1588 ssa_op_iter iter;
1589 use_operand_p use_p;
1590 tree op;
1591
1592 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1593 {
1594 op = USE_FROM_PTR (use_p);
1595 record_invariant (data, op, false);
1596 }
1597 }
1598
1599 /* Finds interesting uses of induction variables in the statement STMT. */
1600
1601 static void
1602 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1603 {
1604 struct iv *iv;
1605 tree op, lhs, rhs;
1606 ssa_op_iter iter;
1607 use_operand_p use_p;
1608
1609 find_invariants_stmt (data, stmt);
1610
1611 if (TREE_CODE (stmt) == COND_EXPR)
1612 {
1613 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1614 return;
1615 }
1616
1617 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1618 {
1619 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1620 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1621
1622 if (TREE_CODE (lhs) == SSA_NAME)
1623 {
1624 /* If the statement defines an induction variable, the uses are not
1625 interesting by themselves. */
1626
1627 iv = get_iv (data, lhs);
1628
1629 if (iv && !integer_zerop (iv->step))
1630 return;
1631 }
1632
1633 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1634 {
1635 case tcc_comparison:
1636 find_interesting_uses_cond (data, stmt,
1637 &GIMPLE_STMT_OPERAND (stmt, 1));
1638 return;
1639
1640 case tcc_reference:
1641 find_interesting_uses_address (data, stmt,
1642 &GIMPLE_STMT_OPERAND (stmt, 1));
1643 if (REFERENCE_CLASS_P (lhs))
1644 find_interesting_uses_address (data, stmt,
1645 &GIMPLE_STMT_OPERAND (stmt, 0));
1646 return;
1647
1648 default: ;
1649 }
1650
1651 if (REFERENCE_CLASS_P (lhs)
1652 && is_gimple_val (rhs))
1653 {
1654 find_interesting_uses_address (data, stmt,
1655 &GIMPLE_STMT_OPERAND (stmt, 0));
1656 find_interesting_uses_op (data, rhs);
1657 return;
1658 }
1659
1660 /* TODO -- we should also handle address uses of type
1661
1662 memory = call (whatever);
1663
1664 and
1665
1666 call (memory). */
1667 }
1668
1669 if (TREE_CODE (stmt) == PHI_NODE
1670 && bb_for_stmt (stmt) == data->current_loop->header)
1671 {
1672 lhs = PHI_RESULT (stmt);
1673 iv = get_iv (data, lhs);
1674
1675 if (iv && !integer_zerop (iv->step))
1676 return;
1677 }
1678
1679 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1680 {
1681 op = USE_FROM_PTR (use_p);
1682
1683 if (TREE_CODE (op) != SSA_NAME)
1684 continue;
1685
1686 iv = get_iv (data, op);
1687 if (!iv)
1688 continue;
1689
1690 find_interesting_uses_op (data, op);
1691 }
1692 }
1693
1694 /* Finds interesting uses of induction variables outside of loops
1695 on loop exit edge EXIT. */
1696
1697 static void
1698 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1699 {
1700 tree phi, def;
1701
1702 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1703 {
1704 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1705 if (is_gimple_reg (def))
1706 find_interesting_uses_op (data, def);
1707 }
1708 }
1709
1710 /* Finds uses of the induction variables that are interesting. */
1711
1712 static void
1713 find_interesting_uses (struct ivopts_data *data)
1714 {
1715 basic_block bb;
1716 block_stmt_iterator bsi;
1717 tree phi;
1718 basic_block *body = get_loop_body (data->current_loop);
1719 unsigned i;
1720 struct version_info *info;
1721 edge e;
1722
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Uses:\n\n");
1725
1726 for (i = 0; i < data->current_loop->num_nodes; i++)
1727 {
1728 edge_iterator ei;
1729 bb = body[i];
1730
1731 FOR_EACH_EDGE (e, ei, bb->succs)
1732 if (e->dest != EXIT_BLOCK_PTR
1733 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1734 find_interesting_uses_outside (data, e);
1735
1736 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1737 find_interesting_uses_stmt (data, phi);
1738 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1739 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1740 }
1741
1742 if (dump_file && (dump_flags & TDF_DETAILS))
1743 {
1744 bitmap_iterator bi;
1745
1746 fprintf (dump_file, "\n");
1747
1748 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1749 {
1750 info = ver_info (data, i);
1751 if (info->inv_id)
1752 {
1753 fprintf (dump_file, " ");
1754 print_generic_expr (dump_file, info->name, TDF_SLIM);
1755 fprintf (dump_file, " is invariant (%d)%s\n",
1756 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1757 }
1758 }
1759
1760 fprintf (dump_file, "\n");
1761 }
1762
1763 free (body);
1764 }
1765
1766 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1767 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1768 we are at the top-level of the processed address. */
1769
1770 static tree
1771 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1772 unsigned HOST_WIDE_INT *offset)
1773 {
1774 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1775 enum tree_code code;
1776 tree type, orig_type = TREE_TYPE (expr);
1777 unsigned HOST_WIDE_INT off0, off1, st;
1778 tree orig_expr = expr;
1779
1780 STRIP_NOPS (expr);
1781
1782 type = TREE_TYPE (expr);
1783 code = TREE_CODE (expr);
1784 *offset = 0;
1785
1786 switch (code)
1787 {
1788 case INTEGER_CST:
1789 if (!cst_and_fits_in_hwi (expr)
1790 || integer_zerop (expr))
1791 return orig_expr;
1792
1793 *offset = int_cst_value (expr);
1794 return build_int_cst (orig_type, 0);
1795
1796 case PLUS_EXPR:
1797 case MINUS_EXPR:
1798 op0 = TREE_OPERAND (expr, 0);
1799 op1 = TREE_OPERAND (expr, 1);
1800
1801 op0 = strip_offset_1 (op0, false, false, &off0);
1802 op1 = strip_offset_1 (op1, false, false, &off1);
1803
1804 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1805 if (op0 == TREE_OPERAND (expr, 0)
1806 && op1 == TREE_OPERAND (expr, 1))
1807 return orig_expr;
1808
1809 if (integer_zerop (op1))
1810 expr = op0;
1811 else if (integer_zerop (op0))
1812 {
1813 if (code == PLUS_EXPR)
1814 expr = op1;
1815 else
1816 expr = fold_build1 (NEGATE_EXPR, type, op1);
1817 }
1818 else
1819 expr = fold_build2 (code, type, op0, op1);
1820
1821 return fold_convert (orig_type, expr);
1822
1823 case ARRAY_REF:
1824 if (!inside_addr)
1825 return orig_expr;
1826
1827 step = array_ref_element_size (expr);
1828 if (!cst_and_fits_in_hwi (step))
1829 break;
1830
1831 st = int_cst_value (step);
1832 op1 = TREE_OPERAND (expr, 1);
1833 op1 = strip_offset_1 (op1, false, false, &off1);
1834 *offset = off1 * st;
1835
1836 if (top_compref
1837 && integer_zerop (op1))
1838 {
1839 /* Strip the component reference completely. */
1840 op0 = TREE_OPERAND (expr, 0);
1841 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1842 *offset += off0;
1843 return op0;
1844 }
1845 break;
1846
1847 case COMPONENT_REF:
1848 if (!inside_addr)
1849 return orig_expr;
1850
1851 tmp = component_ref_field_offset (expr);
1852 if (top_compref
1853 && cst_and_fits_in_hwi (tmp))
1854 {
1855 /* Strip the component reference completely. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858 *offset = off0 + int_cst_value (tmp);
1859 return op0;
1860 }
1861 break;
1862
1863 case ADDR_EXPR:
1864 op0 = TREE_OPERAND (expr, 0);
1865 op0 = strip_offset_1 (op0, true, true, &off0);
1866 *offset += off0;
1867
1868 if (op0 == TREE_OPERAND (expr, 0))
1869 return orig_expr;
1870
1871 expr = build_fold_addr_expr (op0);
1872 return fold_convert (orig_type, expr);
1873
1874 case INDIRECT_REF:
1875 inside_addr = false;
1876 break;
1877
1878 default:
1879 return orig_expr;
1880 }
1881
1882 /* Default handling of expressions for that we want to recurse into
1883 the first operand. */
1884 op0 = TREE_OPERAND (expr, 0);
1885 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1886 *offset += off0;
1887
1888 if (op0 == TREE_OPERAND (expr, 0)
1889 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1890 return orig_expr;
1891
1892 expr = copy_node (expr);
1893 TREE_OPERAND (expr, 0) = op0;
1894 if (op1)
1895 TREE_OPERAND (expr, 1) = op1;
1896
1897 /* Inside address, we might strip the top level component references,
1898 thus changing type of the expression. Handling of ADDR_EXPR
1899 will fix that. */
1900 expr = fold_convert (orig_type, expr);
1901
1902 return expr;
1903 }
1904
1905 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1906
1907 static tree
1908 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1909 {
1910 return strip_offset_1 (expr, false, false, offset);
1911 }
1912
1913 /* Returns variant of TYPE that can be used as base for different uses.
1914 We return unsigned type with the same precision, which avoids problems
1915 with overflows. */
1916
1917 static tree
1918 generic_type_for (tree type)
1919 {
1920 if (POINTER_TYPE_P (type))
1921 return unsigned_type_for (type);
1922
1923 if (TYPE_UNSIGNED (type))
1924 return type;
1925
1926 return unsigned_type_for (type);
1927 }
1928
1929 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1930 the bitmap to that we should store it. */
1931
1932 static struct ivopts_data *fd_ivopts_data;
1933 static tree
1934 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1935 {
1936 bitmap *depends_on = data;
1937 struct version_info *info;
1938
1939 if (TREE_CODE (*expr_p) != SSA_NAME)
1940 return NULL_TREE;
1941 info = name_info (fd_ivopts_data, *expr_p);
1942
1943 if (!info->inv_id || info->has_nonlin_use)
1944 return NULL_TREE;
1945
1946 if (!*depends_on)
1947 *depends_on = BITMAP_ALLOC (NULL);
1948 bitmap_set_bit (*depends_on, info->inv_id);
1949
1950 return NULL_TREE;
1951 }
1952
1953 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1954 position to POS. If USE is not NULL, the candidate is set as related to
1955 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1956 replacement of the final value of the iv by a direct computation. */
1957
1958 static struct iv_cand *
1959 add_candidate_1 (struct ivopts_data *data,
1960 tree base, tree step, bool important, enum iv_position pos,
1961 struct iv_use *use, tree incremented_at)
1962 {
1963 unsigned i;
1964 struct iv_cand *cand = NULL;
1965 tree type, orig_type;
1966
1967 if (base)
1968 {
1969 orig_type = TREE_TYPE (base);
1970 type = generic_type_for (orig_type);
1971 if (type != orig_type)
1972 {
1973 base = fold_convert (type, base);
1974 step = fold_convert (type, step);
1975 }
1976 }
1977
1978 for (i = 0; i < n_iv_cands (data); i++)
1979 {
1980 cand = iv_cand (data, i);
1981
1982 if (cand->pos != pos)
1983 continue;
1984
1985 if (cand->incremented_at != incremented_at)
1986 continue;
1987
1988 if (!cand->iv)
1989 {
1990 if (!base && !step)
1991 break;
1992
1993 continue;
1994 }
1995
1996 if (!base && !step)
1997 continue;
1998
1999 if (operand_equal_p (base, cand->iv->base, 0)
2000 && operand_equal_p (step, cand->iv->step, 0))
2001 break;
2002 }
2003
2004 if (i == n_iv_cands (data))
2005 {
2006 cand = XCNEW (struct iv_cand);
2007 cand->id = i;
2008
2009 if (!base && !step)
2010 cand->iv = NULL;
2011 else
2012 cand->iv = alloc_iv (base, step);
2013
2014 cand->pos = pos;
2015 if (pos != IP_ORIGINAL && cand->iv)
2016 {
2017 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2018 cand->var_after = cand->var_before;
2019 }
2020 cand->important = important;
2021 cand->incremented_at = incremented_at;
2022 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2023
2024 if (step
2025 && TREE_CODE (step) != INTEGER_CST)
2026 {
2027 fd_ivopts_data = data;
2028 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2029 }
2030
2031 if (dump_file && (dump_flags & TDF_DETAILS))
2032 dump_cand (dump_file, cand);
2033 }
2034
2035 if (important && !cand->important)
2036 {
2037 cand->important = true;
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2039 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2040 }
2041
2042 if (use)
2043 {
2044 bitmap_set_bit (use->related_cands, i);
2045 if (dump_file && (dump_flags & TDF_DETAILS))
2046 fprintf (dump_file, "Candidate %d is related to use %d\n",
2047 cand->id, use->id);
2048 }
2049
2050 return cand;
2051 }
2052
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2054 is allowed.
2055
2056 The purpose is to avoid splitting latch edge with a biv increment, thus
2057 creating a jump, possibly confusing other optimization passes and leaving
2058 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2059 is not available (so we do not have a better alternative), or if the latch
2060 edge is already nonempty. */
2061
2062 static bool
2063 allow_ip_end_pos_p (struct loop *loop)
2064 {
2065 if (!ip_normal_pos (loop))
2066 return true;
2067
2068 if (!empty_block_p (ip_end_pos (loop)))
2069 return true;
2070
2071 return false;
2072 }
2073
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. The candidate computation is scheduled on all available positions. */
2077
2078 static void
2079 add_candidate (struct ivopts_data *data,
2080 tree base, tree step, bool important, struct iv_use *use)
2081 {
2082 if (ip_normal_pos (data->current_loop))
2083 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2084 if (ip_end_pos (data->current_loop)
2085 && allow_ip_end_pos_p (data->current_loop))
2086 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2087 }
2088
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090 type with SIZE bits. */
2091
2092 static void
2093 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2094 unsigned int size)
2095 {
2096 tree type = lang_hooks.types.type_for_size (size, true);
2097 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2098 true, NULL);
2099 }
2100
2101 /* Adds standard iv candidates. */
2102
2103 static void
2104 add_standard_iv_candidates (struct ivopts_data *data)
2105 {
2106 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2107
2108 /* The same for a double-integer type if it is still fast enough. */
2109 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2110 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2111 }
2112
2113
2114 /* Adds candidates bases on the old induction variable IV. */
2115
2116 static void
2117 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2118 {
2119 tree phi, def;
2120 struct iv_cand *cand;
2121
2122 add_candidate (data, iv->base, iv->step, true, NULL);
2123
2124 /* The same, but with initial value zero. */
2125 add_candidate (data,
2126 build_int_cst (TREE_TYPE (iv->base), 0),
2127 iv->step, true, NULL);
2128
2129 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2130 if (TREE_CODE (phi) == PHI_NODE)
2131 {
2132 /* Additionally record the possibility of leaving the original iv
2133 untouched. */
2134 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2135 cand = add_candidate_1 (data,
2136 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2137 SSA_NAME_DEF_STMT (def));
2138 cand->var_before = iv->ssa_name;
2139 cand->var_after = def;
2140 }
2141 }
2142
2143 /* Adds candidates based on the old induction variables. */
2144
2145 static void
2146 add_old_ivs_candidates (struct ivopts_data *data)
2147 {
2148 unsigned i;
2149 struct iv *iv;
2150 bitmap_iterator bi;
2151
2152 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2153 {
2154 iv = ver_info (data, i)->iv;
2155 if (iv && iv->biv_p && !integer_zerop (iv->step))
2156 add_old_iv_candidates (data, iv);
2157 }
2158 }
2159
2160 /* Adds candidates based on the value of the induction variable IV and USE. */
2161
2162 static void
2163 add_iv_value_candidates (struct ivopts_data *data,
2164 struct iv *iv, struct iv_use *use)
2165 {
2166 unsigned HOST_WIDE_INT offset;
2167 tree base;
2168
2169 add_candidate (data, iv->base, iv->step, false, use);
2170
2171 /* The same, but with initial value zero. Make such variable important,
2172 since it is generic enough so that possibly many uses may be based
2173 on it. */
2174 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2175 iv->step, true, use);
2176
2177 /* Third, try removing the constant offset. */
2178 base = strip_offset (iv->base, &offset);
2179 if (offset)
2180 add_candidate (data, base, iv->step, false, use);
2181 }
2182
2183 /* Adds candidates based on the uses. */
2184
2185 static void
2186 add_derived_ivs_candidates (struct ivopts_data *data)
2187 {
2188 unsigned i;
2189
2190 for (i = 0; i < n_iv_uses (data); i++)
2191 {
2192 struct iv_use *use = iv_use (data, i);
2193
2194 if (!use)
2195 continue;
2196
2197 switch (use->type)
2198 {
2199 case USE_NONLINEAR_EXPR:
2200 case USE_COMPARE:
2201 case USE_ADDRESS:
2202 /* Just add the ivs based on the value of the iv used here. */
2203 add_iv_value_candidates (data, use->iv, use);
2204 break;
2205
2206 default:
2207 gcc_unreachable ();
2208 }
2209 }
2210 }
2211
2212 /* Record important candidates and add them to related_cands bitmaps
2213 if needed. */
2214
2215 static void
2216 record_important_candidates (struct ivopts_data *data)
2217 {
2218 unsigned i;
2219 struct iv_use *use;
2220
2221 for (i = 0; i < n_iv_cands (data); i++)
2222 {
2223 struct iv_cand *cand = iv_cand (data, i);
2224
2225 if (cand->important)
2226 bitmap_set_bit (data->important_candidates, i);
2227 }
2228
2229 data->consider_all_candidates = (n_iv_cands (data)
2230 <= CONSIDER_ALL_CANDIDATES_BOUND);
2231
2232 if (data->consider_all_candidates)
2233 {
2234 /* We will not need "related_cands" bitmaps in this case,
2235 so release them to decrease peak memory consumption. */
2236 for (i = 0; i < n_iv_uses (data); i++)
2237 {
2238 use = iv_use (data, i);
2239 BITMAP_FREE (use->related_cands);
2240 }
2241 }
2242 else
2243 {
2244 /* Add important candidates to the related_cands bitmaps. */
2245 for (i = 0; i < n_iv_uses (data); i++)
2246 bitmap_ior_into (iv_use (data, i)->related_cands,
2247 data->important_candidates);
2248 }
2249 }
2250
2251 /* Finds the candidates for the induction variables. */
2252
2253 static void
2254 find_iv_candidates (struct ivopts_data *data)
2255 {
2256 /* Add commonly used ivs. */
2257 add_standard_iv_candidates (data);
2258
2259 /* Add old induction variables. */
2260 add_old_ivs_candidates (data);
2261
2262 /* Add induction variables derived from uses. */
2263 add_derived_ivs_candidates (data);
2264
2265 /* Record the important candidates. */
2266 record_important_candidates (data);
2267 }
2268
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271 we allocate a simple list to every use. */
2272
2273 static void
2274 alloc_use_cost_map (struct ivopts_data *data)
2275 {
2276 unsigned i, size, s, j;
2277
2278 for (i = 0; i < n_iv_uses (data); i++)
2279 {
2280 struct iv_use *use = iv_use (data, i);
2281 bitmap_iterator bi;
2282
2283 if (data->consider_all_candidates)
2284 size = n_iv_cands (data);
2285 else
2286 {
2287 s = 0;
2288 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2289 {
2290 s++;
2291 }
2292
2293 /* Round up to the power of two, so that moduling by it is fast. */
2294 for (size = 1; size < s; size <<= 1)
2295 continue;
2296 }
2297
2298 use->n_map_members = size;
2299 use->cost_map = XCNEWVEC (struct cost_pair, size);
2300 }
2301 }
2302
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304 on invariants DEPENDS_ON and that the value used in expressing it
2305 is VALUE.*/
2306
2307 static void
2308 set_use_iv_cost (struct ivopts_data *data,
2309 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2310 bitmap depends_on, tree value)
2311 {
2312 unsigned i, s;
2313
2314 if (cost == INFTY)
2315 {
2316 BITMAP_FREE (depends_on);
2317 return;
2318 }
2319
2320 if (data->consider_all_candidates)
2321 {
2322 use->cost_map[cand->id].cand = cand;
2323 use->cost_map[cand->id].cost = cost;
2324 use->cost_map[cand->id].depends_on = depends_on;
2325 use->cost_map[cand->id].value = value;
2326 return;
2327 }
2328
2329 /* n_map_members is a power of two, so this computes modulo. */
2330 s = cand->id & (use->n_map_members - 1);
2331 for (i = s; i < use->n_map_members; i++)
2332 if (!use->cost_map[i].cand)
2333 goto found;
2334 for (i = 0; i < s; i++)
2335 if (!use->cost_map[i].cand)
2336 goto found;
2337
2338 gcc_unreachable ();
2339
2340 found:
2341 use->cost_map[i].cand = cand;
2342 use->cost_map[i].cost = cost;
2343 use->cost_map[i].depends_on = depends_on;
2344 use->cost_map[i].value = value;
2345 }
2346
2347 /* Gets cost of (USE, CANDIDATE) pair. */
2348
2349 static struct cost_pair *
2350 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2351 struct iv_cand *cand)
2352 {
2353 unsigned i, s;
2354 struct cost_pair *ret;
2355
2356 if (!cand)
2357 return NULL;
2358
2359 if (data->consider_all_candidates)
2360 {
2361 ret = use->cost_map + cand->id;
2362 if (!ret->cand)
2363 return NULL;
2364
2365 return ret;
2366 }
2367
2368 /* n_map_members is a power of two, so this computes modulo. */
2369 s = cand->id & (use->n_map_members - 1);
2370 for (i = s; i < use->n_map_members; i++)
2371 if (use->cost_map[i].cand == cand)
2372 return use->cost_map + i;
2373
2374 for (i = 0; i < s; i++)
2375 if (use->cost_map[i].cand == cand)
2376 return use->cost_map + i;
2377
2378 return NULL;
2379 }
2380
2381 /* Returns estimate on cost of computing SEQ. */
2382
2383 static unsigned
2384 seq_cost (rtx seq)
2385 {
2386 unsigned cost = 0;
2387 rtx set;
2388
2389 for (; seq; seq = NEXT_INSN (seq))
2390 {
2391 set = single_set (seq);
2392 if (set)
2393 cost += rtx_cost (set, SET);
2394 else
2395 cost++;
2396 }
2397
2398 return cost;
2399 }
2400
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2402 static rtx
2403 produce_memory_decl_rtl (tree obj, int *regno)
2404 {
2405 rtx x;
2406
2407 gcc_assert (obj);
2408 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2409 {
2410 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2411 x = gen_rtx_SYMBOL_REF (Pmode, name);
2412 }
2413 else
2414 x = gen_raw_REG (Pmode, (*regno)++);
2415
2416 return gen_rtx_MEM (DECL_MODE (obj), x);
2417 }
2418
2419 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2420 walk_tree. DATA contains the actual fake register number. */
2421
2422 static tree
2423 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2424 {
2425 tree obj = NULL_TREE;
2426 rtx x = NULL_RTX;
2427 int *regno = data;
2428
2429 switch (TREE_CODE (*expr_p))
2430 {
2431 case ADDR_EXPR:
2432 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2433 handled_component_p (*expr_p);
2434 expr_p = &TREE_OPERAND (*expr_p, 0))
2435 continue;
2436 obj = *expr_p;
2437 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2438 x = produce_memory_decl_rtl (obj, regno);
2439 break;
2440
2441 case SSA_NAME:
2442 *ws = 0;
2443 obj = SSA_NAME_VAR (*expr_p);
2444 if (!DECL_RTL_SET_P (obj))
2445 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446 break;
2447
2448 case VAR_DECL:
2449 case PARM_DECL:
2450 case RESULT_DECL:
2451 *ws = 0;
2452 obj = *expr_p;
2453
2454 if (DECL_RTL_SET_P (obj))
2455 break;
2456
2457 if (DECL_MODE (obj) == BLKmode)
2458 x = produce_memory_decl_rtl (obj, regno);
2459 else
2460 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2461
2462 break;
2463
2464 default:
2465 break;
2466 }
2467
2468 if (x)
2469 {
2470 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2471 SET_DECL_RTL (obj, x);
2472 }
2473
2474 return NULL_TREE;
2475 }
2476
2477 /* Determines cost of the computation of EXPR. */
2478
2479 static unsigned
2480 computation_cost (tree expr)
2481 {
2482 rtx seq, rslt;
2483 tree type = TREE_TYPE (expr);
2484 unsigned cost;
2485 /* Avoid using hard regs in ways which may be unsupported. */
2486 int regno = LAST_VIRTUAL_REGISTER + 1;
2487
2488 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2489 start_sequence ();
2490 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2491 seq = get_insns ();
2492 end_sequence ();
2493
2494 cost = seq_cost (seq);
2495 if (MEM_P (rslt))
2496 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2497
2498 return cost;
2499 }
2500
2501 /* Returns variable containing the value of candidate CAND at statement AT. */
2502
2503 static tree
2504 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2505 {
2506 if (stmt_after_increment (loop, cand, stmt))
2507 return cand->var_after;
2508 else
2509 return cand->var_before;
2510 }
2511
2512 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2513 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2514
2515 int
2516 tree_int_cst_sign_bit (tree t)
2517 {
2518 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2519 unsigned HOST_WIDE_INT w;
2520
2521 if (bitno < HOST_BITS_PER_WIDE_INT)
2522 w = TREE_INT_CST_LOW (t);
2523 else
2524 {
2525 w = TREE_INT_CST_HIGH (t);
2526 bitno -= HOST_BITS_PER_WIDE_INT;
2527 }
2528
2529 return (w >> bitno) & 1;
2530 }
2531
2532 /* If we can prove that TOP = cst * BOT for some constant cst,
2533 store cst to MUL and return true. Otherwise return false.
2534 The returned value is always sign-extended, regardless of the
2535 signedness of TOP and BOT. */
2536
2537 static bool
2538 constant_multiple_of (tree top, tree bot, double_int *mul)
2539 {
2540 tree mby;
2541 enum tree_code code;
2542 double_int res, p0, p1;
2543 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2544
2545 STRIP_NOPS (top);
2546 STRIP_NOPS (bot);
2547
2548 if (operand_equal_p (top, bot, 0))
2549 {
2550 *mul = double_int_one;
2551 return true;
2552 }
2553
2554 code = TREE_CODE (top);
2555 switch (code)
2556 {
2557 case MULT_EXPR:
2558 mby = TREE_OPERAND (top, 1);
2559 if (TREE_CODE (mby) != INTEGER_CST)
2560 return false;
2561
2562 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2563 return false;
2564
2565 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2566 precision);
2567 return true;
2568
2569 case PLUS_EXPR:
2570 case MINUS_EXPR:
2571 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2572 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2573 return false;
2574
2575 if (code == MINUS_EXPR)
2576 p1 = double_int_neg (p1);
2577 *mul = double_int_sext (double_int_add (p0, p1), precision);
2578 return true;
2579
2580 case INTEGER_CST:
2581 if (TREE_CODE (bot) != INTEGER_CST)
2582 return false;
2583
2584 p0 = double_int_sext (tree_to_double_int (top), precision);
2585 p1 = double_int_sext (tree_to_double_int (bot), precision);
2586 if (double_int_zero_p (p1))
2587 return false;
2588 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2589 precision);
2590 return double_int_zero_p (res);
2591
2592 default:
2593 return false;
2594 }
2595 }
2596
2597 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2598 same precision that is at least as wide as the precision of TYPE, stores
2599 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2600 type of A and B. */
2601
2602 static tree
2603 determine_common_wider_type (tree *a, tree *b)
2604 {
2605 tree wider_type = NULL;
2606 tree suba, subb;
2607 tree atype = TREE_TYPE (*a);
2608
2609 if ((TREE_CODE (*a) == NOP_EXPR
2610 || TREE_CODE (*a) == CONVERT_EXPR))
2611 {
2612 suba = TREE_OPERAND (*a, 0);
2613 wider_type = TREE_TYPE (suba);
2614 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2615 return atype;
2616 }
2617 else
2618 return atype;
2619
2620 if ((TREE_CODE (*b) == NOP_EXPR
2621 || TREE_CODE (*b) == CONVERT_EXPR))
2622 {
2623 subb = TREE_OPERAND (*b, 0);
2624 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2625 return atype;
2626 }
2627 else
2628 return atype;
2629
2630 *a = suba;
2631 *b = subb;
2632 return wider_type;
2633 }
2634
2635 /* Determines the expression by that USE is expressed from induction variable
2636 CAND at statement AT in LOOP. The expression is stored in a decomposed
2637 form into AFF. Returns false if USE cannot be expressed using CAND. */
2638
2639 static bool
2640 get_computation_aff (struct loop *loop,
2641 struct iv_use *use, struct iv_cand *cand, tree at,
2642 struct affine_tree_combination *aff)
2643 {
2644 tree ubase = use->iv->base;
2645 tree ustep = use->iv->step;
2646 tree cbase = cand->iv->base;
2647 tree cstep = cand->iv->step, cstep_common;
2648 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2649 tree common_type, var;
2650 tree uutype;
2651 aff_tree cbase_aff, var_aff;
2652 double_int rat;
2653
2654 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2655 {
2656 /* We do not have a precision to express the values of use. */
2657 return false;
2658 }
2659
2660 var = var_at_stmt (loop, cand, at);
2661 uutype = unsigned_type_for (utype);
2662
2663 /* If the conversion is not noop, perform it. */
2664 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2665 {
2666 cstep = fold_convert (uutype, cstep);
2667 cbase = fold_convert (uutype, cbase);
2668 var = fold_convert (uutype, var);
2669 }
2670
2671 if (!constant_multiple_of (ustep, cstep, &rat))
2672 return false;
2673
2674 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2675 type, we achieve better folding by computing their difference in this
2676 wider type, and cast the result to UUTYPE. We do not need to worry about
2677 overflows, as all the arithmetics will in the end be performed in UUTYPE
2678 anyway. */
2679 common_type = determine_common_wider_type (&ubase, &cbase);
2680
2681 /* use = ubase - ratio * cbase + ratio * var. */
2682 tree_to_aff_combination (ubase, common_type, aff);
2683 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2684 tree_to_aff_combination (var, uutype, &var_aff);
2685
2686 /* We need to shift the value if we are after the increment. */
2687 if (stmt_after_increment (loop, cand, at))
2688 {
2689 aff_tree cstep_aff;
2690
2691 if (common_type != uutype)
2692 cstep_common = fold_convert (common_type, cstep);
2693 else
2694 cstep_common = cstep;
2695
2696 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2697 aff_combination_add (&cbase_aff, &cstep_aff);
2698 }
2699
2700 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2701 aff_combination_add (aff, &cbase_aff);
2702 if (common_type != uutype)
2703 aff_combination_convert (aff, uutype);
2704
2705 aff_combination_scale (&var_aff, rat);
2706 aff_combination_add (aff, &var_aff);
2707
2708 return true;
2709 }
2710
2711 /* Determines the expression by that USE is expressed from induction variable
2712 CAND at statement AT in LOOP. The computation is unshared. */
2713
2714 static tree
2715 get_computation_at (struct loop *loop,
2716 struct iv_use *use, struct iv_cand *cand, tree at)
2717 {
2718 aff_tree aff;
2719 tree type = TREE_TYPE (use->iv->base);
2720
2721 if (!get_computation_aff (loop, use, cand, at, &aff))
2722 return NULL_TREE;
2723 unshare_aff_combination (&aff);
2724 return fold_convert (type, aff_combination_to_tree (&aff));
2725 }
2726
2727 /* Determines the expression by that USE is expressed from induction variable
2728 CAND in LOOP. The computation is unshared. */
2729
2730 static tree
2731 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2732 {
2733 return get_computation_at (loop, use, cand, use->stmt);
2734 }
2735
2736 /* Returns cost of addition in MODE. */
2737
2738 static unsigned
2739 add_cost (enum machine_mode mode)
2740 {
2741 static unsigned costs[NUM_MACHINE_MODES];
2742 rtx seq;
2743 unsigned cost;
2744
2745 if (costs[mode])
2746 return costs[mode];
2747
2748 start_sequence ();
2749 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2750 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2751 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2752 NULL_RTX);
2753 seq = get_insns ();
2754 end_sequence ();
2755
2756 cost = seq_cost (seq);
2757 if (!cost)
2758 cost = 1;
2759
2760 costs[mode] = cost;
2761
2762 if (dump_file && (dump_flags & TDF_DETAILS))
2763 fprintf (dump_file, "Addition in %s costs %d\n",
2764 GET_MODE_NAME (mode), cost);
2765 return cost;
2766 }
2767
2768 /* Entry in a hashtable of already known costs for multiplication. */
2769 struct mbc_entry
2770 {
2771 HOST_WIDE_INT cst; /* The constant to multiply by. */
2772 enum machine_mode mode; /* In mode. */
2773 unsigned cost; /* The cost. */
2774 };
2775
2776 /* Counts hash value for the ENTRY. */
2777
2778 static hashval_t
2779 mbc_entry_hash (const void *entry)
2780 {
2781 const struct mbc_entry *e = entry;
2782
2783 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2784 }
2785
2786 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2787
2788 static int
2789 mbc_entry_eq (const void *entry1, const void *entry2)
2790 {
2791 const struct mbc_entry *e1 = entry1;
2792 const struct mbc_entry *e2 = entry2;
2793
2794 return (e1->mode == e2->mode
2795 && e1->cst == e2->cst);
2796 }
2797
2798 /* Returns cost of multiplication by constant CST in MODE. */
2799
2800 unsigned
2801 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2802 {
2803 static htab_t costs;
2804 struct mbc_entry **cached, act;
2805 rtx seq;
2806 unsigned cost;
2807
2808 if (!costs)
2809 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2810
2811 act.mode = mode;
2812 act.cst = cst;
2813 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2814 if (*cached)
2815 return (*cached)->cost;
2816
2817 *cached = XNEW (struct mbc_entry);
2818 (*cached)->mode = mode;
2819 (*cached)->cst = cst;
2820
2821 start_sequence ();
2822 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2823 gen_int_mode (cst, mode), NULL_RTX, 0);
2824 seq = get_insns ();
2825 end_sequence ();
2826
2827 cost = seq_cost (seq);
2828
2829 if (dump_file && (dump_flags & TDF_DETAILS))
2830 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2831 (int) cst, GET_MODE_NAME (mode), cost);
2832
2833 (*cached)->cost = cost;
2834
2835 return cost;
2836 }
2837
2838 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2839 validity for a memory reference accessing memory of mode MODE. */
2840
2841 bool
2842 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2843 {
2844 #define MAX_RATIO 128
2845 static sbitmap valid_mult[MAX_MACHINE_MODE];
2846
2847 if (!valid_mult[mode])
2848 {
2849 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2850 rtx addr;
2851 HOST_WIDE_INT i;
2852
2853 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2854 sbitmap_zero (valid_mult[mode]);
2855 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2856 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2857 {
2858 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2859 if (memory_address_p (mode, addr))
2860 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2861 }
2862
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2864 {
2865 fprintf (dump_file, " allowed multipliers:");
2866 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2867 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2868 fprintf (dump_file, " %d", (int) i);
2869 fprintf (dump_file, "\n");
2870 fprintf (dump_file, "\n");
2871 }
2872 }
2873
2874 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2875 return false;
2876
2877 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2878 }
2879
2880 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2881 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2882 variable is omitted. Compute the cost for a memory reference that accesses
2883 a memory location of mode MEM_MODE.
2884
2885 TODO -- there must be some better way. This all is quite crude. */
2886
2887 static unsigned
2888 get_address_cost (bool symbol_present, bool var_present,
2889 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2890 enum machine_mode mem_mode)
2891 {
2892 static bool initialized[MAX_MACHINE_MODE];
2893 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2894 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2895 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2896 unsigned cost, acost;
2897 bool offset_p, ratio_p;
2898 HOST_WIDE_INT s_offset;
2899 unsigned HOST_WIDE_INT mask;
2900 unsigned bits;
2901
2902 if (!initialized[mem_mode])
2903 {
2904 HOST_WIDE_INT i;
2905 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2906 int old_cse_not_expected;
2907 unsigned sym_p, var_p, off_p, rat_p, add_c;
2908 rtx seq, addr, base;
2909 rtx reg0, reg1;
2910
2911 initialized[mem_mode] = true;
2912
2913 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2914
2915 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2916 for (i = start; i <= 1 << 20; i <<= 1)
2917 {
2918 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2919 if (!memory_address_p (mem_mode, addr))
2920 break;
2921 }
2922 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2923 off[mem_mode] = max_offset[mem_mode];
2924
2925 for (i = start; i <= 1 << 20; i <<= 1)
2926 {
2927 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2928 if (!memory_address_p (mem_mode, addr))
2929 break;
2930 }
2931 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2932
2933 if (dump_file && (dump_flags & TDF_DETAILS))
2934 {
2935 fprintf (dump_file, "get_address_cost:\n");
2936 fprintf (dump_file, " min offset %s %d\n",
2937 GET_MODE_NAME (mem_mode),
2938 (int) min_offset[mem_mode]);
2939 fprintf (dump_file, " max offset %s %d\n",
2940 GET_MODE_NAME (mem_mode),
2941 (int) max_offset[mem_mode]);
2942 }
2943
2944 rat[mem_mode] = 1;
2945 for (i = 2; i <= MAX_RATIO; i++)
2946 if (multiplier_allowed_in_address_p (i, mem_mode))
2947 {
2948 rat[mem_mode] = i;
2949 break;
2950 }
2951
2952 /* Compute the cost of various addressing modes. */
2953 acost = 0;
2954 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2955 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2956
2957 for (i = 0; i < 16; i++)
2958 {
2959 sym_p = i & 1;
2960 var_p = (i >> 1) & 1;
2961 off_p = (i >> 2) & 1;
2962 rat_p = (i >> 3) & 1;
2963
2964 addr = reg0;
2965 if (rat_p)
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2967 gen_int_mode (rat[mem_mode], Pmode));
2968
2969 if (var_p)
2970 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2971
2972 if (sym_p)
2973 {
2974 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2975 if (off_p)
2976 base = gen_rtx_fmt_e (CONST, Pmode,
2977 gen_rtx_fmt_ee (PLUS, Pmode,
2978 base,
2979 gen_int_mode (off[mem_mode],
2980 Pmode)));
2981 }
2982 else if (off_p)
2983 base = gen_int_mode (off[mem_mode], Pmode);
2984 else
2985 base = NULL_RTX;
2986
2987 if (base)
2988 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2989
2990 start_sequence ();
2991 /* To avoid splitting addressing modes, pretend that no cse will
2992 follow. */
2993 old_cse_not_expected = cse_not_expected;
2994 cse_not_expected = true;
2995 addr = memory_address (mem_mode, addr);
2996 cse_not_expected = old_cse_not_expected;
2997 seq = get_insns ();
2998 end_sequence ();
2999
3000 acost = seq_cost (seq);
3001 acost += address_cost (addr, mem_mode);
3002
3003 if (!acost)
3004 acost = 1;
3005 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3006 }
3007
3008 /* On some targets, it is quite expensive to load symbol to a register,
3009 which makes addresses that contain symbols look much more expensive.
3010 However, the symbol will have to be loaded in any case before the
3011 loop (and quite likely we have it in register already), so it does not
3012 make much sense to penalize them too heavily. So make some final
3013 tweaks for the SYMBOL_PRESENT modes:
3014
3015 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3016 var is cheaper, use this mode with small penalty.
3017 If VAR_PRESENT is true, try whether the mode with
3018 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3019 if this is the case, use it. */
3020 add_c = add_cost (Pmode);
3021 for (i = 0; i < 8; i++)
3022 {
3023 var_p = i & 1;
3024 off_p = (i >> 1) & 1;
3025 rat_p = (i >> 2) & 1;
3026
3027 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3028 if (var_p)
3029 acost += add_c;
3030
3031 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3032 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3033 }
3034
3035 if (dump_file && (dump_flags & TDF_DETAILS))
3036 {
3037 fprintf (dump_file, "Address costs:\n");
3038
3039 for (i = 0; i < 16; i++)
3040 {
3041 sym_p = i & 1;
3042 var_p = (i >> 1) & 1;
3043 off_p = (i >> 2) & 1;
3044 rat_p = (i >> 3) & 1;
3045
3046 fprintf (dump_file, " ");
3047 if (sym_p)
3048 fprintf (dump_file, "sym + ");
3049 if (var_p)
3050 fprintf (dump_file, "var + ");
3051 if (off_p)
3052 fprintf (dump_file, "cst + ");
3053 if (rat_p)
3054 fprintf (dump_file, "rat * ");
3055
3056 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3057 fprintf (dump_file, "index costs %d\n", acost);
3058 }
3059 fprintf (dump_file, "\n");
3060 }
3061 }
3062
3063 bits = GET_MODE_BITSIZE (Pmode);
3064 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3065 offset &= mask;
3066 if ((offset >> (bits - 1) & 1))
3067 offset |= ~mask;
3068 s_offset = offset;
3069
3070 cost = 0;
3071 offset_p = (s_offset != 0
3072 && min_offset[mem_mode] <= s_offset
3073 && s_offset <= max_offset[mem_mode]);
3074 ratio_p = (ratio != 1
3075 && multiplier_allowed_in_address_p (ratio, mem_mode));
3076
3077 if (ratio != 1 && !ratio_p)
3078 cost += multiply_by_cost (ratio, Pmode);
3079
3080 if (s_offset && !offset_p && !symbol_present)
3081 cost += add_cost (Pmode);
3082
3083 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3084 return cost + acost;
3085 }
3086
3087 /* Estimates cost of forcing expression EXPR into a variable. */
3088
3089 unsigned
3090 force_expr_to_var_cost (tree expr)
3091 {
3092 static bool costs_initialized = false;
3093 static unsigned integer_cost;
3094 static unsigned symbol_cost;
3095 static unsigned address_cost;
3096 tree op0, op1;
3097 unsigned cost0, cost1, cost;
3098 enum machine_mode mode;
3099
3100 if (!costs_initialized)
3101 {
3102 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3103 rtx x = gen_rtx_MEM (DECL_MODE (var),
3104 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3105 tree addr;
3106 tree type = build_pointer_type (integer_type_node);
3107
3108 integer_cost = computation_cost (build_int_cst (integer_type_node,
3109 2000));
3110
3111 SET_DECL_RTL (var, x);
3112 TREE_STATIC (var) = 1;
3113 addr = build1 (ADDR_EXPR, type, var);
3114 symbol_cost = computation_cost (addr) + 1;
3115
3116 address_cost
3117 = computation_cost (build2 (PLUS_EXPR, type,
3118 addr,
3119 build_int_cst (type, 2000))) + 1;
3120 if (dump_file && (dump_flags & TDF_DETAILS))
3121 {
3122 fprintf (dump_file, "force_expr_to_var_cost:\n");
3123 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3124 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3125 fprintf (dump_file, " address %d\n", (int) address_cost);
3126 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3127 fprintf (dump_file, "\n");
3128 }
3129
3130 costs_initialized = true;
3131 }
3132
3133 STRIP_NOPS (expr);
3134
3135 if (SSA_VAR_P (expr))
3136 return 0;
3137
3138 if (TREE_INVARIANT (expr))
3139 {
3140 if (TREE_CODE (expr) == INTEGER_CST)
3141 return integer_cost;
3142
3143 if (TREE_CODE (expr) == ADDR_EXPR)
3144 {
3145 tree obj = TREE_OPERAND (expr, 0);
3146
3147 if (TREE_CODE (obj) == VAR_DECL
3148 || TREE_CODE (obj) == PARM_DECL
3149 || TREE_CODE (obj) == RESULT_DECL)
3150 return symbol_cost;
3151 }
3152
3153 return address_cost;
3154 }
3155
3156 switch (TREE_CODE (expr))
3157 {
3158 case PLUS_EXPR:
3159 case MINUS_EXPR:
3160 case MULT_EXPR:
3161 op0 = TREE_OPERAND (expr, 0);
3162 op1 = TREE_OPERAND (expr, 1);
3163 STRIP_NOPS (op0);
3164 STRIP_NOPS (op1);
3165
3166 if (is_gimple_val (op0))
3167 cost0 = 0;
3168 else
3169 cost0 = force_expr_to_var_cost (op0);
3170
3171 if (is_gimple_val (op1))
3172 cost1 = 0;
3173 else
3174 cost1 = force_expr_to_var_cost (op1);
3175
3176 break;
3177
3178 default:
3179 /* Just an arbitrary value, FIXME. */
3180 return target_spill_cost;
3181 }
3182
3183 mode = TYPE_MODE (TREE_TYPE (expr));
3184 switch (TREE_CODE (expr))
3185 {
3186 case PLUS_EXPR:
3187 case MINUS_EXPR:
3188 cost = add_cost (mode);
3189 break;
3190
3191 case MULT_EXPR:
3192 if (cst_and_fits_in_hwi (op0))
3193 cost = multiply_by_cost (int_cst_value (op0), mode);
3194 else if (cst_and_fits_in_hwi (op1))
3195 cost = multiply_by_cost (int_cst_value (op1), mode);
3196 else
3197 return target_spill_cost;
3198 break;
3199
3200 default:
3201 gcc_unreachable ();
3202 }
3203
3204 cost += cost0;
3205 cost += cost1;
3206
3207 /* Bound the cost by target_spill_cost. The parts of complicated
3208 computations often are either loop invariant or at least can
3209 be shared between several iv uses, so letting this grow without
3210 limits would not give reasonable results. */
3211 return cost < target_spill_cost ? cost : target_spill_cost;
3212 }
3213
3214 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3215 invariants the computation depends on. */
3216
3217 static unsigned
3218 force_var_cost (struct ivopts_data *data,
3219 tree expr, bitmap *depends_on)
3220 {
3221 if (depends_on)
3222 {
3223 fd_ivopts_data = data;
3224 walk_tree (&expr, find_depends, depends_on, NULL);
3225 }
3226
3227 return force_expr_to_var_cost (expr);
3228 }
3229
3230 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3231 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3232 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3233 invariants the computation depends on. */
3234
3235 static unsigned
3236 split_address_cost (struct ivopts_data *data,
3237 tree addr, bool *symbol_present, bool *var_present,
3238 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3239 {
3240 tree core;
3241 HOST_WIDE_INT bitsize;
3242 HOST_WIDE_INT bitpos;
3243 tree toffset;
3244 enum machine_mode mode;
3245 int unsignedp, volatilep;
3246
3247 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3248 &unsignedp, &volatilep, false);
3249
3250 if (toffset != 0
3251 || bitpos % BITS_PER_UNIT != 0
3252 || TREE_CODE (core) != VAR_DECL)
3253 {
3254 *symbol_present = false;
3255 *var_present = true;
3256 fd_ivopts_data = data;
3257 walk_tree (&addr, find_depends, depends_on, NULL);
3258 return target_spill_cost;
3259 }
3260
3261 *offset += bitpos / BITS_PER_UNIT;
3262 if (TREE_STATIC (core)
3263 || DECL_EXTERNAL (core))
3264 {
3265 *symbol_present = true;
3266 *var_present = false;
3267 return 0;
3268 }
3269
3270 *symbol_present = false;
3271 *var_present = true;
3272 return 0;
3273 }
3274
3275 /* Estimates cost of expressing difference of addresses E1 - E2 as
3276 var + symbol + offset. The value of offset is added to OFFSET,
3277 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3278 part is missing. DEPENDS_ON is a set of the invariants the computation
3279 depends on. */
3280
3281 static unsigned
3282 ptr_difference_cost (struct ivopts_data *data,
3283 tree e1, tree e2, bool *symbol_present, bool *var_present,
3284 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3285 {
3286 HOST_WIDE_INT diff = 0;
3287 unsigned cost;
3288
3289 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3290
3291 if (ptr_difference_const (e1, e2, &diff))
3292 {
3293 *offset += diff;
3294 *symbol_present = false;
3295 *var_present = false;
3296 return 0;
3297 }
3298
3299 if (e2 == integer_zero_node)
3300 return split_address_cost (data, TREE_OPERAND (e1, 0),
3301 symbol_present, var_present, offset, depends_on);
3302
3303 *symbol_present = false;
3304 *var_present = true;
3305
3306 cost = force_var_cost (data, e1, depends_on);
3307 cost += force_var_cost (data, e2, depends_on);
3308 cost += add_cost (Pmode);
3309
3310 return cost;
3311 }
3312
3313 /* Estimates cost of expressing difference E1 - E2 as
3314 var + symbol + offset. The value of offset is added to OFFSET,
3315 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3316 part is missing. DEPENDS_ON is a set of the invariants the computation
3317 depends on. */
3318
3319 static unsigned
3320 difference_cost (struct ivopts_data *data,
3321 tree e1, tree e2, bool *symbol_present, bool *var_present,
3322 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3323 {
3324 unsigned cost;
3325 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3326 unsigned HOST_WIDE_INT off1, off2;
3327
3328 e1 = strip_offset (e1, &off1);
3329 e2 = strip_offset (e2, &off2);
3330 *offset += off1 - off2;
3331
3332 STRIP_NOPS (e1);
3333 STRIP_NOPS (e2);
3334
3335 if (TREE_CODE (e1) == ADDR_EXPR)
3336 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3337 depends_on);
3338 *symbol_present = false;
3339
3340 if (operand_equal_p (e1, e2, 0))
3341 {
3342 *var_present = false;
3343 return 0;
3344 }
3345 *var_present = true;
3346 if (integer_zerop (e2))
3347 return force_var_cost (data, e1, depends_on);
3348
3349 if (integer_zerop (e1))
3350 {
3351 cost = force_var_cost (data, e2, depends_on);
3352 cost += multiply_by_cost (-1, mode);
3353
3354 return cost;
3355 }
3356
3357 cost = force_var_cost (data, e1, depends_on);
3358 cost += force_var_cost (data, e2, depends_on);
3359 cost += add_cost (mode);
3360
3361 return cost;
3362 }
3363
3364 /* Determines the cost of the computation by that USE is expressed
3365 from induction variable CAND. If ADDRESS_P is true, we just need
3366 to create an address from it, otherwise we want to get it into
3367 register. A set of invariants we depend on is stored in
3368 DEPENDS_ON. AT is the statement at that the value is computed. */
3369
3370 static unsigned
3371 get_computation_cost_at (struct ivopts_data *data,
3372 struct iv_use *use, struct iv_cand *cand,
3373 bool address_p, bitmap *depends_on, tree at)
3374 {
3375 tree ubase = use->iv->base, ustep = use->iv->step;
3376 tree cbase, cstep;
3377 tree utype = TREE_TYPE (ubase), ctype;
3378 unsigned HOST_WIDE_INT cstepi, offset = 0;
3379 HOST_WIDE_INT ratio, aratio;
3380 bool var_present, symbol_present;
3381 unsigned cost = 0, n_sums;
3382 double_int rat;
3383
3384 *depends_on = NULL;
3385
3386 /* Only consider real candidates. */
3387 if (!cand->iv)
3388 return INFTY;
3389
3390 cbase = cand->iv->base;
3391 cstep = cand->iv->step;
3392 ctype = TREE_TYPE (cbase);
3393
3394 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3395 {
3396 /* We do not have a precision to express the values of use. */
3397 return INFTY;
3398 }
3399
3400 if (address_p)
3401 {
3402 /* Do not try to express address of an object with computation based
3403 on address of a different object. This may cause problems in rtl
3404 level alias analysis (that does not expect this to be happening,
3405 as this is illegal in C), and would be unlikely to be useful
3406 anyway. */
3407 if (use->iv->base_object
3408 && cand->iv->base_object
3409 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3410 return INFTY;
3411 }
3412
3413 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3414 {
3415 /* TODO -- add direct handling of this case. */
3416 goto fallback;
3417 }
3418
3419 /* CSTEPI is removed from the offset in case statement is after the
3420 increment. If the step is not constant, we use zero instead.
3421 This is a bit imprecise (there is the extra addition), but
3422 redundancy elimination is likely to transform the code so that
3423 it uses value of the variable before increment anyway,
3424 so it is not that much unrealistic. */
3425 if (cst_and_fits_in_hwi (cstep))
3426 cstepi = int_cst_value (cstep);
3427 else
3428 cstepi = 0;
3429
3430 if (!constant_multiple_of (ustep, cstep, &rat))
3431 return INFTY;
3432
3433 if (double_int_fits_in_shwi_p (rat))
3434 ratio = double_int_to_shwi (rat);
3435 else
3436 return INFTY;
3437
3438 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3439 or ratio == 1, it is better to handle this like
3440
3441 ubase - ratio * cbase + ratio * var
3442
3443 (also holds in the case ratio == -1, TODO. */
3444
3445 if (cst_and_fits_in_hwi (cbase))
3446 {
3447 offset = - ratio * int_cst_value (cbase);
3448 cost += difference_cost (data,
3449 ubase, integer_zero_node,
3450 &symbol_present, &var_present, &offset,
3451 depends_on);
3452 }
3453 else if (ratio == 1)
3454 {
3455 cost += difference_cost (data,
3456 ubase, cbase,
3457 &symbol_present, &var_present, &offset,
3458 depends_on);
3459 }
3460 else
3461 {
3462 cost += force_var_cost (data, cbase, depends_on);
3463 cost += add_cost (TYPE_MODE (ctype));
3464 cost += difference_cost (data,
3465 ubase, integer_zero_node,
3466 &symbol_present, &var_present, &offset,
3467 depends_on);
3468 }
3469
3470 /* If we are after the increment, the value of the candidate is higher by
3471 one iteration. */
3472 if (stmt_after_increment (data->current_loop, cand, at))
3473 offset -= ratio * cstepi;
3474
3475 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3476 (symbol/var/const parts may be omitted). If we are looking for an address,
3477 find the cost of addressing this. */
3478 if (address_p)
3479 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3480 TYPE_MODE (TREE_TYPE (*use->op_p)));
3481
3482 /* Otherwise estimate the costs for computing the expression. */
3483 aratio = ratio > 0 ? ratio : -ratio;
3484 if (!symbol_present && !var_present && !offset)
3485 {
3486 if (ratio != 1)
3487 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3488
3489 return cost;
3490 }
3491
3492 if (aratio != 1)
3493 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3494
3495 n_sums = 1;
3496 if (var_present
3497 /* Symbol + offset should be compile-time computable. */
3498 && (symbol_present || offset))
3499 n_sums++;
3500
3501 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3502
3503 fallback:
3504 {
3505 /* Just get the expression, expand it and measure the cost. */
3506 tree comp = get_computation_at (data->current_loop, use, cand, at);
3507
3508 if (!comp)
3509 return INFTY;
3510
3511 if (address_p)
3512 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3513
3514 return computation_cost (comp);
3515 }
3516 }
3517
3518 /* Determines the cost of the computation by that USE is expressed
3519 from induction variable CAND. If ADDRESS_P is true, we just need
3520 to create an address from it, otherwise we want to get it into
3521 register. A set of invariants we depend on is stored in
3522 DEPENDS_ON. */
3523
3524 static unsigned
3525 get_computation_cost (struct ivopts_data *data,
3526 struct iv_use *use, struct iv_cand *cand,
3527 bool address_p, bitmap *depends_on)
3528 {
3529 return get_computation_cost_at (data,
3530 use, cand, address_p, depends_on, use->stmt);
3531 }
3532
3533 /* Determines cost of basing replacement of USE on CAND in a generic
3534 expression. */
3535
3536 static bool
3537 determine_use_iv_cost_generic (struct ivopts_data *data,
3538 struct iv_use *use, struct iv_cand *cand)
3539 {
3540 bitmap depends_on;
3541 unsigned cost;
3542
3543 /* The simple case first -- if we need to express value of the preserved
3544 original biv, the cost is 0. This also prevents us from counting the
3545 cost of increment twice -- once at this use and once in the cost of
3546 the candidate. */
3547 if (cand->pos == IP_ORIGINAL
3548 && cand->incremented_at == use->stmt)
3549 {
3550 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3551 return true;
3552 }
3553
3554 cost = get_computation_cost (data, use, cand, false, &depends_on);
3555 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556
3557 return cost != INFTY;
3558 }
3559
3560 /* Determines cost of basing replacement of USE on CAND in an address. */
3561
3562 static bool
3563 determine_use_iv_cost_address (struct ivopts_data *data,
3564 struct iv_use *use, struct iv_cand *cand)
3565 {
3566 bitmap depends_on;
3567 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3568
3569 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3570
3571 return cost != INFTY;
3572 }
3573
3574 /* Computes value of candidate CAND at position AT in iteration NITER, and
3575 stores it to VAL. */
3576
3577 static void
3578 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3579 aff_tree *val)
3580 {
3581 aff_tree step, delta, nit;
3582 struct iv *iv = cand->iv;
3583 tree type = TREE_TYPE (iv->base);
3584
3585 tree_to_aff_combination (iv->step, type, &step);
3586 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3587 aff_combination_convert (&nit, type);
3588 aff_combination_mult (&nit, &step, &delta);
3589 if (stmt_after_increment (loop, cand, at))
3590 aff_combination_add (&delta, &step);
3591
3592 tree_to_aff_combination (iv->base, type, val);
3593 aff_combination_add (val, &delta);
3594 }
3595
3596 /* Returns period of induction variable iv. */
3597
3598 static tree
3599 iv_period (struct iv *iv)
3600 {
3601 tree step = iv->step, period, type;
3602 tree pow2div;
3603
3604 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3605
3606 /* Period of the iv is gcd (step, type range). Since type range is power
3607 of two, it suffices to determine the maximum power of two that divides
3608 step. */
3609 pow2div = num_ending_zeros (step);
3610 type = unsigned_type_for (TREE_TYPE (step));
3611
3612 period = build_low_bits_mask (type,
3613 (TYPE_PRECISION (type)
3614 - tree_low_cst (pow2div, 1)));
3615
3616 return period;
3617 }
3618
3619 /* Returns the comparison operator used when eliminating the iv USE. */
3620
3621 static enum tree_code
3622 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3623 {
3624 struct loop *loop = data->current_loop;
3625 basic_block ex_bb;
3626 edge exit;
3627
3628 ex_bb = bb_for_stmt (use->stmt);
3629 exit = EDGE_SUCC (ex_bb, 0);
3630 if (flow_bb_inside_loop_p (loop, exit->dest))
3631 exit = EDGE_SUCC (ex_bb, 1);
3632
3633 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3634 }
3635
3636 /* Check whether it is possible to express the condition in USE by comparison
3637 of candidate CAND. If so, store the value compared with to BOUND. */
3638
3639 static bool
3640 may_eliminate_iv (struct ivopts_data *data,
3641 struct iv_use *use, struct iv_cand *cand, tree *bound)
3642 {
3643 basic_block ex_bb;
3644 edge exit;
3645 tree nit, nit_type;
3646 tree wider_type, period, per_type;
3647 struct loop *loop = data->current_loop;
3648 aff_tree bnd;
3649
3650 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3651 return false;
3652
3653 /* For now works only for exits that dominate the loop latch. TODO -- extend
3654 for other conditions inside loop body. */
3655 ex_bb = bb_for_stmt (use->stmt);
3656 if (use->stmt != last_stmt (ex_bb)
3657 || TREE_CODE (use->stmt) != COND_EXPR)
3658 return false;
3659 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3660 return false;
3661
3662 exit = EDGE_SUCC (ex_bb, 0);
3663 if (flow_bb_inside_loop_p (loop, exit->dest))
3664 exit = EDGE_SUCC (ex_bb, 1);
3665 if (flow_bb_inside_loop_p (loop, exit->dest))
3666 return false;
3667
3668 nit = niter_for_exit (data, exit);
3669 if (!nit)
3670 return false;
3671
3672 nit_type = TREE_TYPE (nit);
3673
3674 /* Determine whether we may use the variable to test whether niter iterations
3675 elapsed. This is the case iff the period of the induction variable is
3676 greater than the number of iterations. */
3677 period = iv_period (cand->iv);
3678 if (!period)
3679 return false;
3680 per_type = TREE_TYPE (period);
3681
3682 wider_type = TREE_TYPE (period);
3683 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3684 wider_type = per_type;
3685 else
3686 wider_type = nit_type;
3687
3688 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3689 fold_convert (wider_type, period),
3690 fold_convert (wider_type, nit))))
3691 return false;
3692
3693 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3694 *bound = aff_combination_to_tree (&bnd);
3695 return true;
3696 }
3697
3698 /* Determines cost of basing replacement of USE on CAND in a condition. */
3699
3700 static bool
3701 determine_use_iv_cost_condition (struct ivopts_data *data,
3702 struct iv_use *use, struct iv_cand *cand)
3703 {
3704 tree bound = NULL_TREE;
3705 struct iv *cmp_iv;
3706 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3707 unsigned elim_cost, express_cost, cost;
3708 bool ok;
3709
3710 /* Only consider real candidates. */
3711 if (!cand->iv)
3712 {
3713 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3714 return false;
3715 }
3716
3717 /* Try iv elimination. */
3718 if (may_eliminate_iv (data, use, cand, &bound))
3719 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3720 else
3721 elim_cost = INFTY;
3722
3723 /* Try expressing the original giv. If it is compared with an invariant,
3724 note that we cannot get rid of it. */
3725 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3726 gcc_assert (ok);
3727
3728 express_cost = get_computation_cost (data, use, cand, false,
3729 &depends_on_express);
3730 fd_ivopts_data = data;
3731 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3732
3733 /* Choose the better approach. */
3734 if (elim_cost < express_cost)
3735 {
3736 cost = elim_cost;
3737 depends_on = depends_on_elim;
3738 depends_on_elim = NULL;
3739 }
3740 else
3741 {
3742 cost = express_cost;
3743 depends_on = depends_on_express;
3744 depends_on_express = NULL;
3745 bound = NULL_TREE;
3746 }
3747
3748 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3749
3750 if (depends_on_elim)
3751 BITMAP_FREE (depends_on_elim);
3752 if (depends_on_express)
3753 BITMAP_FREE (depends_on_express);
3754
3755 return cost != INFTY;
3756 }
3757
3758 /* Determines cost of basing replacement of USE on CAND. Returns false
3759 if USE cannot be based on CAND. */
3760
3761 static bool
3762 determine_use_iv_cost (struct ivopts_data *data,
3763 struct iv_use *use, struct iv_cand *cand)
3764 {
3765 switch (use->type)
3766 {
3767 case USE_NONLINEAR_EXPR:
3768 return determine_use_iv_cost_generic (data, use, cand);
3769
3770 case USE_ADDRESS:
3771 return determine_use_iv_cost_address (data, use, cand);
3772
3773 case USE_COMPARE:
3774 return determine_use_iv_cost_condition (data, use, cand);
3775
3776 default:
3777 gcc_unreachable ();
3778 }
3779 }
3780
3781 /* Determines costs of basing the use of the iv on an iv candidate. */
3782
3783 static void
3784 determine_use_iv_costs (struct ivopts_data *data)
3785 {
3786 unsigned i, j;
3787 struct iv_use *use;
3788 struct iv_cand *cand;
3789 bitmap to_clear = BITMAP_ALLOC (NULL);
3790
3791 alloc_use_cost_map (data);
3792
3793 for (i = 0; i < n_iv_uses (data); i++)
3794 {
3795 use = iv_use (data, i);
3796
3797 if (data->consider_all_candidates)
3798 {
3799 for (j = 0; j < n_iv_cands (data); j++)
3800 {
3801 cand = iv_cand (data, j);
3802 determine_use_iv_cost (data, use, cand);
3803 }
3804 }
3805 else
3806 {
3807 bitmap_iterator bi;
3808
3809 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3810 {
3811 cand = iv_cand (data, j);
3812 if (!determine_use_iv_cost (data, use, cand))
3813 bitmap_set_bit (to_clear, j);
3814 }
3815
3816 /* Remove the candidates for that the cost is infinite from
3817 the list of related candidates. */
3818 bitmap_and_compl_into (use->related_cands, to_clear);
3819 bitmap_clear (to_clear);
3820 }
3821 }
3822
3823 BITMAP_FREE (to_clear);
3824
3825 if (dump_file && (dump_flags & TDF_DETAILS))
3826 {
3827 fprintf (dump_file, "Use-candidate costs:\n");
3828
3829 for (i = 0; i < n_iv_uses (data); i++)
3830 {
3831 use = iv_use (data, i);
3832
3833 fprintf (dump_file, "Use %d:\n", i);
3834 fprintf (dump_file, " cand\tcost\tdepends on\n");
3835 for (j = 0; j < use->n_map_members; j++)
3836 {
3837 if (!use->cost_map[j].cand
3838 || use->cost_map[j].cost == INFTY)
3839 continue;
3840
3841 fprintf (dump_file, " %d\t%d\t",
3842 use->cost_map[j].cand->id,
3843 use->cost_map[j].cost);
3844 if (use->cost_map[j].depends_on)
3845 bitmap_print (dump_file,
3846 use->cost_map[j].depends_on, "","");
3847 fprintf (dump_file, "\n");
3848 }
3849
3850 fprintf (dump_file, "\n");
3851 }
3852 fprintf (dump_file, "\n");
3853 }
3854 }
3855
3856 /* Determines cost of the candidate CAND. */
3857
3858 static void
3859 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3860 {
3861 unsigned cost_base, cost_step;
3862 tree base;
3863
3864 if (!cand->iv)
3865 {
3866 cand->cost = 0;
3867 return;
3868 }
3869
3870 /* There are two costs associated with the candidate -- its increment
3871 and its initialization. The second is almost negligible for any loop
3872 that rolls enough, so we take it just very little into account. */
3873
3874 base = cand->iv->base;
3875 cost_base = force_var_cost (data, base, NULL);
3876 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3877
3878 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3879
3880 /* Prefer the original iv unless we may gain something by replacing it;
3881 this is not really relevant for artificial ivs created by other
3882 passes. */
3883 if (cand->pos == IP_ORIGINAL
3884 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3885 cand->cost--;
3886
3887 /* Prefer not to insert statements into latch unless there are some
3888 already (so that we do not create unnecessary jumps). */
3889 if (cand->pos == IP_END
3890 && empty_block_p (ip_end_pos (data->current_loop)))
3891 cand->cost++;
3892 }
3893
3894 /* Determines costs of computation of the candidates. */
3895
3896 static void
3897 determine_iv_costs (struct ivopts_data *data)
3898 {
3899 unsigned i;
3900
3901 if (dump_file && (dump_flags & TDF_DETAILS))
3902 {
3903 fprintf (dump_file, "Candidate costs:\n");
3904 fprintf (dump_file, " cand\tcost\n");
3905 }
3906
3907 for (i = 0; i < n_iv_cands (data); i++)
3908 {
3909 struct iv_cand *cand = iv_cand (data, i);
3910
3911 determine_iv_cost (data, cand);
3912
3913 if (dump_file && (dump_flags & TDF_DETAILS))
3914 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3915 }
3916
3917 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, "\n");
3919 }
3920
3921 /* Calculates cost for having SIZE induction variables. */
3922
3923 static unsigned
3924 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3925 {
3926 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3927 }
3928
3929 /* For each size of the induction variable set determine the penalty. */
3930
3931 static void
3932 determine_set_costs (struct ivopts_data *data)
3933 {
3934 unsigned j, n;
3935 tree phi, op;
3936 struct loop *loop = data->current_loop;
3937 bitmap_iterator bi;
3938
3939 /* We use the following model (definitely improvable, especially the
3940 cost function -- TODO):
3941
3942 We estimate the number of registers available (using MD data), name it A.
3943
3944 We estimate the number of registers used by the loop, name it U. This
3945 number is obtained as the number of loop phi nodes (not counting virtual
3946 registers and bivs) + the number of variables from outside of the loop.
3947
3948 We set a reserve R (free regs that are used for temporary computations,
3949 etc.). For now the reserve is a constant 3.
3950
3951 Let I be the number of induction variables.
3952
3953 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3954 make a lot of ivs without a reason).
3955 -- if A - R < U + I <= A, the cost is I * PRES_COST
3956 -- if U + I > A, the cost is I * PRES_COST and
3957 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3958
3959 if (dump_file && (dump_flags & TDF_DETAILS))
3960 {
3961 fprintf (dump_file, "Global costs:\n");
3962 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3963 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3964 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3965 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3966 }
3967
3968 n = 0;
3969 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3970 {
3971 op = PHI_RESULT (phi);
3972
3973 if (!is_gimple_reg (op))
3974 continue;
3975
3976 if (get_iv (data, op))
3977 continue;
3978
3979 n++;
3980 }
3981
3982 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3983 {
3984 struct version_info *info = ver_info (data, j);
3985
3986 if (info->inv_id && info->has_nonlin_use)
3987 n++;
3988 }
3989
3990 data->regs_used = n;
3991 if (dump_file && (dump_flags & TDF_DETAILS))
3992 fprintf (dump_file, " regs_used %d\n", n);
3993
3994 if (dump_file && (dump_flags & TDF_DETAILS))
3995 {
3996 fprintf (dump_file, " cost for size:\n");
3997 fprintf (dump_file, " ivs\tcost\n");
3998 for (j = 0; j <= 2 * target_avail_regs; j++)
3999 fprintf (dump_file, " %d\t%d\n", j,
4000 ivopts_global_cost_for_size (data, j));
4001 fprintf (dump_file, "\n");
4002 }
4003 }
4004
4005 /* Returns true if A is a cheaper cost pair than B. */
4006
4007 static bool
4008 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4009 {
4010 if (!a)
4011 return false;
4012
4013 if (!b)
4014 return true;
4015
4016 if (a->cost < b->cost)
4017 return true;
4018
4019 if (a->cost > b->cost)
4020 return false;
4021
4022 /* In case the costs are the same, prefer the cheaper candidate. */
4023 if (a->cand->cost < b->cand->cost)
4024 return true;
4025
4026 return false;
4027 }
4028
4029 /* Computes the cost field of IVS structure. */
4030
4031 static void
4032 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4033 {
4034 unsigned cost = 0;
4035
4036 cost += ivs->cand_use_cost;
4037 cost += ivs->cand_cost;
4038 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4039
4040 ivs->cost = cost;
4041 }
4042
4043 /* Remove invariants in set INVS to set IVS. */
4044
4045 static void
4046 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4047 {
4048 bitmap_iterator bi;
4049 unsigned iid;
4050
4051 if (!invs)
4052 return;
4053
4054 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4055 {
4056 ivs->n_invariant_uses[iid]--;
4057 if (ivs->n_invariant_uses[iid] == 0)
4058 ivs->n_regs--;
4059 }
4060 }
4061
4062 /* Set USE not to be expressed by any candidate in IVS. */
4063
4064 static void
4065 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4066 struct iv_use *use)
4067 {
4068 unsigned uid = use->id, cid;
4069 struct cost_pair *cp;
4070
4071 cp = ivs->cand_for_use[uid];
4072 if (!cp)
4073 return;
4074 cid = cp->cand->id;
4075
4076 ivs->bad_uses++;
4077 ivs->cand_for_use[uid] = NULL;
4078 ivs->n_cand_uses[cid]--;
4079
4080 if (ivs->n_cand_uses[cid] == 0)
4081 {
4082 bitmap_clear_bit (ivs->cands, cid);
4083 /* Do not count the pseudocandidates. */
4084 if (cp->cand->iv)
4085 ivs->n_regs--;
4086 ivs->n_cands--;
4087 ivs->cand_cost -= cp->cand->cost;
4088
4089 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4090 }
4091
4092 ivs->cand_use_cost -= cp->cost;
4093
4094 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4095 iv_ca_recount_cost (data, ivs);
4096 }
4097
4098 /* Add invariants in set INVS to set IVS. */
4099
4100 static void
4101 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4102 {
4103 bitmap_iterator bi;
4104 unsigned iid;
4105
4106 if (!invs)
4107 return;
4108
4109 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4110 {
4111 ivs->n_invariant_uses[iid]++;
4112 if (ivs->n_invariant_uses[iid] == 1)
4113 ivs->n_regs++;
4114 }
4115 }
4116
4117 /* Set cost pair for USE in set IVS to CP. */
4118
4119 static void
4120 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4121 struct iv_use *use, struct cost_pair *cp)
4122 {
4123 unsigned uid = use->id, cid;
4124
4125 if (ivs->cand_for_use[uid] == cp)
4126 return;
4127
4128 if (ivs->cand_for_use[uid])
4129 iv_ca_set_no_cp (data, ivs, use);
4130
4131 if (cp)
4132 {
4133 cid = cp->cand->id;
4134
4135 ivs->bad_uses--;
4136 ivs->cand_for_use[uid] = cp;
4137 ivs->n_cand_uses[cid]++;
4138 if (ivs->n_cand_uses[cid] == 1)
4139 {
4140 bitmap_set_bit (ivs->cands, cid);
4141 /* Do not count the pseudocandidates. */
4142 if (cp->cand->iv)
4143 ivs->n_regs++;
4144 ivs->n_cands++;
4145 ivs->cand_cost += cp->cand->cost;
4146
4147 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4148 }
4149
4150 ivs->cand_use_cost += cp->cost;
4151 iv_ca_set_add_invariants (ivs, cp->depends_on);
4152 iv_ca_recount_cost (data, ivs);
4153 }
4154 }
4155
4156 /* Extend set IVS by expressing USE by some of the candidates in it
4157 if possible. */
4158
4159 static void
4160 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4161 struct iv_use *use)
4162 {
4163 struct cost_pair *best_cp = NULL, *cp;
4164 bitmap_iterator bi;
4165 unsigned i;
4166
4167 gcc_assert (ivs->upto >= use->id);
4168
4169 if (ivs->upto == use->id)
4170 {
4171 ivs->upto++;
4172 ivs->bad_uses++;
4173 }
4174
4175 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4176 {
4177 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4178
4179 if (cheaper_cost_pair (cp, best_cp))
4180 best_cp = cp;
4181 }
4182
4183 iv_ca_set_cp (data, ivs, use, best_cp);
4184 }
4185
4186 /* Get cost for assignment IVS. */
4187
4188 static unsigned
4189 iv_ca_cost (struct iv_ca *ivs)
4190 {
4191 return (ivs->bad_uses ? INFTY : ivs->cost);
4192 }
4193
4194 /* Returns true if all dependences of CP are among invariants in IVS. */
4195
4196 static bool
4197 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4198 {
4199 unsigned i;
4200 bitmap_iterator bi;
4201
4202 if (!cp->depends_on)
4203 return true;
4204
4205 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4206 {
4207 if (ivs->n_invariant_uses[i] == 0)
4208 return false;
4209 }
4210
4211 return true;
4212 }
4213
4214 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4215 it before NEXT_CHANGE. */
4216
4217 static struct iv_ca_delta *
4218 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4219 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4220 {
4221 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4222
4223 change->use = use;
4224 change->old_cp = old_cp;
4225 change->new_cp = new_cp;
4226 change->next_change = next_change;
4227
4228 return change;
4229 }
4230
4231 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4232 are rewritten. */
4233
4234 static struct iv_ca_delta *
4235 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4236 {
4237 struct iv_ca_delta *last;
4238
4239 if (!l2)
4240 return l1;
4241
4242 if (!l1)
4243 return l2;
4244
4245 for (last = l1; last->next_change; last = last->next_change)
4246 continue;
4247 last->next_change = l2;
4248
4249 return l1;
4250 }
4251
4252 /* Returns candidate by that USE is expressed in IVS. */
4253
4254 static struct cost_pair *
4255 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4256 {
4257 return ivs->cand_for_use[use->id];
4258 }
4259
4260 /* Reverse the list of changes DELTA, forming the inverse to it. */
4261
4262 static struct iv_ca_delta *
4263 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4264 {
4265 struct iv_ca_delta *act, *next, *prev = NULL;
4266 struct cost_pair *tmp;
4267
4268 for (act = delta; act; act = next)
4269 {
4270 next = act->next_change;
4271 act->next_change = prev;
4272 prev = act;
4273
4274 tmp = act->old_cp;
4275 act->old_cp = act->new_cp;
4276 act->new_cp = tmp;
4277 }
4278
4279 return prev;
4280 }
4281
4282 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4283 reverted instead. */
4284
4285 static void
4286 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4287 struct iv_ca_delta *delta, bool forward)
4288 {
4289 struct cost_pair *from, *to;
4290 struct iv_ca_delta *act;
4291
4292 if (!forward)
4293 delta = iv_ca_delta_reverse (delta);
4294
4295 for (act = delta; act; act = act->next_change)
4296 {
4297 from = act->old_cp;
4298 to = act->new_cp;
4299 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4300 iv_ca_set_cp (data, ivs, act->use, to);
4301 }
4302
4303 if (!forward)
4304 iv_ca_delta_reverse (delta);
4305 }
4306
4307 /* Returns true if CAND is used in IVS. */
4308
4309 static bool
4310 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4311 {
4312 return ivs->n_cand_uses[cand->id] > 0;
4313 }
4314
4315 /* Returns number of induction variable candidates in the set IVS. */
4316
4317 static unsigned
4318 iv_ca_n_cands (struct iv_ca *ivs)
4319 {
4320 return ivs->n_cands;
4321 }
4322
4323 /* Free the list of changes DELTA. */
4324
4325 static void
4326 iv_ca_delta_free (struct iv_ca_delta **delta)
4327 {
4328 struct iv_ca_delta *act, *next;
4329
4330 for (act = *delta; act; act = next)
4331 {
4332 next = act->next_change;
4333 free (act);
4334 }
4335
4336 *delta = NULL;
4337 }
4338
4339 /* Allocates new iv candidates assignment. */
4340
4341 static struct iv_ca *
4342 iv_ca_new (struct ivopts_data *data)
4343 {
4344 struct iv_ca *nw = XNEW (struct iv_ca);
4345
4346 nw->upto = 0;
4347 nw->bad_uses = 0;
4348 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4349 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4350 nw->cands = BITMAP_ALLOC (NULL);
4351 nw->n_cands = 0;
4352 nw->n_regs = 0;
4353 nw->cand_use_cost = 0;
4354 nw->cand_cost = 0;
4355 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4356 nw->cost = 0;
4357
4358 return nw;
4359 }
4360
4361 /* Free memory occupied by the set IVS. */
4362
4363 static void
4364 iv_ca_free (struct iv_ca **ivs)
4365 {
4366 free ((*ivs)->cand_for_use);
4367 free ((*ivs)->n_cand_uses);
4368 BITMAP_FREE ((*ivs)->cands);
4369 free ((*ivs)->n_invariant_uses);
4370 free (*ivs);
4371 *ivs = NULL;
4372 }
4373
4374 /* Dumps IVS to FILE. */
4375
4376 static void
4377 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4378 {
4379 const char *pref = " invariants ";
4380 unsigned i;
4381
4382 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4383 bitmap_print (file, ivs->cands, " candidates ","\n");
4384
4385 for (i = 1; i <= data->max_inv_id; i++)
4386 if (ivs->n_invariant_uses[i])
4387 {
4388 fprintf (file, "%s%d", pref, i);
4389 pref = ", ";
4390 }
4391 fprintf (file, "\n");
4392 }
4393
4394 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4395 new set, and store differences in DELTA. Number of induction variables
4396 in the new set is stored to N_IVS. */
4397
4398 static unsigned
4399 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4400 struct iv_cand *cand, struct iv_ca_delta **delta,
4401 unsigned *n_ivs)
4402 {
4403 unsigned i, cost;
4404 struct iv_use *use;
4405 struct cost_pair *old_cp, *new_cp;
4406
4407 *delta = NULL;
4408 for (i = 0; i < ivs->upto; i++)
4409 {
4410 use = iv_use (data, i);
4411 old_cp = iv_ca_cand_for_use (ivs, use);
4412
4413 if (old_cp
4414 && old_cp->cand == cand)
4415 continue;
4416
4417 new_cp = get_use_iv_cost (data, use, cand);
4418 if (!new_cp)
4419 continue;
4420
4421 if (!iv_ca_has_deps (ivs, new_cp))
4422 continue;
4423
4424 if (!cheaper_cost_pair (new_cp, old_cp))
4425 continue;
4426
4427 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4428 }
4429
4430 iv_ca_delta_commit (data, ivs, *delta, true);
4431 cost = iv_ca_cost (ivs);
4432 if (n_ivs)
4433 *n_ivs = iv_ca_n_cands (ivs);
4434 iv_ca_delta_commit (data, ivs, *delta, false);
4435
4436 return cost;
4437 }
4438
4439 /* Try narrowing set IVS by removing CAND. Return the cost of
4440 the new set and store the differences in DELTA. */
4441
4442 static unsigned
4443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4444 struct iv_cand *cand, struct iv_ca_delta **delta)
4445 {
4446 unsigned i, ci;
4447 struct iv_use *use;
4448 struct cost_pair *old_cp, *new_cp, *cp;
4449 bitmap_iterator bi;
4450 struct iv_cand *cnd;
4451 unsigned cost;
4452
4453 *delta = NULL;
4454 for (i = 0; i < n_iv_uses (data); i++)
4455 {
4456 use = iv_use (data, i);
4457
4458 old_cp = iv_ca_cand_for_use (ivs, use);
4459 if (old_cp->cand != cand)
4460 continue;
4461
4462 new_cp = NULL;
4463
4464 if (data->consider_all_candidates)
4465 {
4466 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4467 {
4468 if (ci == cand->id)
4469 continue;
4470
4471 cnd = iv_cand (data, ci);
4472
4473 cp = get_use_iv_cost (data, use, cnd);
4474 if (!cp)
4475 continue;
4476 if (!iv_ca_has_deps (ivs, cp))
4477 continue;
4478
4479 if (!cheaper_cost_pair (cp, new_cp))
4480 continue;
4481
4482 new_cp = cp;
4483 }
4484 }
4485 else
4486 {
4487 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4488 {
4489 if (ci == cand->id)
4490 continue;
4491
4492 cnd = iv_cand (data, ci);
4493
4494 cp = get_use_iv_cost (data, use, cnd);
4495 if (!cp)
4496 continue;
4497 if (!iv_ca_has_deps (ivs, cp))
4498 continue;
4499
4500 if (!cheaper_cost_pair (cp, new_cp))
4501 continue;
4502
4503 new_cp = cp;
4504 }
4505 }
4506
4507 if (!new_cp)
4508 {
4509 iv_ca_delta_free (delta);
4510 return INFTY;
4511 }
4512
4513 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4514 }
4515
4516 iv_ca_delta_commit (data, ivs, *delta, true);
4517 cost = iv_ca_cost (ivs);
4518 iv_ca_delta_commit (data, ivs, *delta, false);
4519
4520 return cost;
4521 }
4522
4523 /* Try optimizing the set of candidates IVS by removing candidates different
4524 from to EXCEPT_CAND from it. Return cost of the new set, and store
4525 differences in DELTA. */
4526
4527 static unsigned
4528 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4529 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4530 {
4531 bitmap_iterator bi;
4532 struct iv_ca_delta *act_delta, *best_delta;
4533 unsigned i, best_cost, acost;
4534 struct iv_cand *cand;
4535
4536 best_delta = NULL;
4537 best_cost = iv_ca_cost (ivs);
4538
4539 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4540 {
4541 cand = iv_cand (data, i);
4542
4543 if (cand == except_cand)
4544 continue;
4545
4546 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4547
4548 if (acost < best_cost)
4549 {
4550 best_cost = acost;
4551 iv_ca_delta_free (&best_delta);
4552 best_delta = act_delta;
4553 }
4554 else
4555 iv_ca_delta_free (&act_delta);
4556 }
4557
4558 if (!best_delta)
4559 {
4560 *delta = NULL;
4561 return best_cost;
4562 }
4563
4564 /* Recurse to possibly remove other unnecessary ivs. */
4565 iv_ca_delta_commit (data, ivs, best_delta, true);
4566 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4567 iv_ca_delta_commit (data, ivs, best_delta, false);
4568 *delta = iv_ca_delta_join (best_delta, *delta);
4569 return best_cost;
4570 }
4571
4572 /* Tries to extend the sets IVS in the best possible way in order
4573 to express the USE. */
4574
4575 static bool
4576 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4577 struct iv_use *use)
4578 {
4579 unsigned best_cost, act_cost;
4580 unsigned i;
4581 bitmap_iterator bi;
4582 struct iv_cand *cand;
4583 struct iv_ca_delta *best_delta = NULL, *act_delta;
4584 struct cost_pair *cp;
4585
4586 iv_ca_add_use (data, ivs, use);
4587 best_cost = iv_ca_cost (ivs);
4588
4589 cp = iv_ca_cand_for_use (ivs, use);
4590 if (cp)
4591 {
4592 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4593 iv_ca_set_no_cp (data, ivs, use);
4594 }
4595
4596 /* First try important candidates. Only if it fails, try the specific ones.
4597 Rationale -- in loops with many variables the best choice often is to use
4598 just one generic biv. If we added here many ivs specific to the uses,
4599 the optimization algorithm later would be likely to get stuck in a local
4600 minimum, thus causing us to create too many ivs. The approach from
4601 few ivs to more seems more likely to be successful -- starting from few
4602 ivs, replacing an expensive use by a specific iv should always be a
4603 win. */
4604 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4605 {
4606 cand = iv_cand (data, i);
4607
4608 if (iv_ca_cand_used_p (ivs, cand))
4609 continue;
4610
4611 cp = get_use_iv_cost (data, use, cand);
4612 if (!cp)
4613 continue;
4614
4615 iv_ca_set_cp (data, ivs, use, cp);
4616 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4617 iv_ca_set_no_cp (data, ivs, use);
4618 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4619
4620 if (act_cost < best_cost)
4621 {
4622 best_cost = act_cost;
4623
4624 iv_ca_delta_free (&best_delta);
4625 best_delta = act_delta;
4626 }
4627 else
4628 iv_ca_delta_free (&act_delta);
4629 }
4630
4631 if (best_cost == INFTY)
4632 {
4633 for (i = 0; i < use->n_map_members; i++)
4634 {
4635 cp = use->cost_map + i;
4636 cand = cp->cand;
4637 if (!cand)
4638 continue;
4639
4640 /* Already tried this. */
4641 if (cand->important)
4642 continue;
4643
4644 if (iv_ca_cand_used_p (ivs, cand))
4645 continue;
4646
4647 act_delta = NULL;
4648 iv_ca_set_cp (data, ivs, use, cp);
4649 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4650 iv_ca_set_no_cp (data, ivs, use);
4651 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4652 cp, act_delta);
4653
4654 if (act_cost < best_cost)
4655 {
4656 best_cost = act_cost;
4657
4658 if (best_delta)
4659 iv_ca_delta_free (&best_delta);
4660 best_delta = act_delta;
4661 }
4662 else
4663 iv_ca_delta_free (&act_delta);
4664 }
4665 }
4666
4667 iv_ca_delta_commit (data, ivs, best_delta, true);
4668 iv_ca_delta_free (&best_delta);
4669
4670 return (best_cost != INFTY);
4671 }
4672
4673 /* Finds an initial assignment of candidates to uses. */
4674
4675 static struct iv_ca *
4676 get_initial_solution (struct ivopts_data *data)
4677 {
4678 struct iv_ca *ivs = iv_ca_new (data);
4679 unsigned i;
4680
4681 for (i = 0; i < n_iv_uses (data); i++)
4682 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4683 {
4684 iv_ca_free (&ivs);
4685 return NULL;
4686 }
4687
4688 return ivs;
4689 }
4690
4691 /* Tries to improve set of induction variables IVS. */
4692
4693 static bool
4694 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4695 {
4696 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4697 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4698 struct iv_cand *cand;
4699
4700 /* Try extending the set of induction variables by one. */
4701 for (i = 0; i < n_iv_cands (data); i++)
4702 {
4703 cand = iv_cand (data, i);
4704
4705 if (iv_ca_cand_used_p (ivs, cand))
4706 continue;
4707
4708 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4709 if (!act_delta)
4710 continue;
4711
4712 /* If we successfully added the candidate and the set is small enough,
4713 try optimizing it by removing other candidates. */
4714 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4715 {
4716 iv_ca_delta_commit (data, ivs, act_delta, true);
4717 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4718 iv_ca_delta_commit (data, ivs, act_delta, false);
4719 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4720 }
4721
4722 if (acost < best_cost)
4723 {
4724 best_cost = acost;
4725 iv_ca_delta_free (&best_delta);
4726 best_delta = act_delta;
4727 }
4728 else
4729 iv_ca_delta_free (&act_delta);
4730 }
4731
4732 if (!best_delta)
4733 {
4734 /* Try removing the candidates from the set instead. */
4735 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4736
4737 /* Nothing more we can do. */
4738 if (!best_delta)
4739 return false;
4740 }
4741
4742 iv_ca_delta_commit (data, ivs, best_delta, true);
4743 gcc_assert (best_cost == iv_ca_cost (ivs));
4744 iv_ca_delta_free (&best_delta);
4745 return true;
4746 }
4747
4748 /* Attempts to find the optimal set of induction variables. We do simple
4749 greedy heuristic -- we try to replace at most one candidate in the selected
4750 solution and remove the unused ivs while this improves the cost. */
4751
4752 static struct iv_ca *
4753 find_optimal_iv_set (struct ivopts_data *data)
4754 {
4755 unsigned i;
4756 struct iv_ca *set;
4757 struct iv_use *use;
4758
4759 /* Get the initial solution. */
4760 set = get_initial_solution (data);
4761 if (!set)
4762 {
4763 if (dump_file && (dump_flags & TDF_DETAILS))
4764 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4765 return NULL;
4766 }
4767
4768 if (dump_file && (dump_flags & TDF_DETAILS))
4769 {
4770 fprintf (dump_file, "Initial set of candidates:\n");
4771 iv_ca_dump (data, dump_file, set);
4772 }
4773
4774 while (try_improve_iv_set (data, set))
4775 {
4776 if (dump_file && (dump_flags & TDF_DETAILS))
4777 {
4778 fprintf (dump_file, "Improved to:\n");
4779 iv_ca_dump (data, dump_file, set);
4780 }
4781 }
4782
4783 if (dump_file && (dump_flags & TDF_DETAILS))
4784 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4785
4786 for (i = 0; i < n_iv_uses (data); i++)
4787 {
4788 use = iv_use (data, i);
4789 use->selected = iv_ca_cand_for_use (set, use)->cand;
4790 }
4791
4792 return set;
4793 }
4794
4795 /* Creates a new induction variable corresponding to CAND. */
4796
4797 static void
4798 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4799 {
4800 block_stmt_iterator incr_pos;
4801 tree base;
4802 bool after = false;
4803
4804 if (!cand->iv)
4805 return;
4806
4807 switch (cand->pos)
4808 {
4809 case IP_NORMAL:
4810 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4811 break;
4812
4813 case IP_END:
4814 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4815 after = true;
4816 break;
4817
4818 case IP_ORIGINAL:
4819 /* Mark that the iv is preserved. */
4820 name_info (data, cand->var_before)->preserve_biv = true;
4821 name_info (data, cand->var_after)->preserve_biv = true;
4822
4823 /* Rewrite the increment so that it uses var_before directly. */
4824 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4825
4826 return;
4827 }
4828
4829 gimple_add_tmp_var (cand->var_before);
4830 add_referenced_var (cand->var_before);
4831
4832 base = unshare_expr (cand->iv->base);
4833
4834 create_iv (base, unshare_expr (cand->iv->step),
4835 cand->var_before, data->current_loop,
4836 &incr_pos, after, &cand->var_before, &cand->var_after);
4837 }
4838
4839 /* Creates new induction variables described in SET. */
4840
4841 static void
4842 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4843 {
4844 unsigned i;
4845 struct iv_cand *cand;
4846 bitmap_iterator bi;
4847
4848 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4849 {
4850 cand = iv_cand (data, i);
4851 create_new_iv (data, cand);
4852 }
4853 }
4854
4855 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4856 is true, remove also the ssa name defined by the statement. */
4857
4858 static void
4859 remove_statement (tree stmt, bool including_defined_name)
4860 {
4861 if (TREE_CODE (stmt) == PHI_NODE)
4862 {
4863 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4864 }
4865 else
4866 {
4867 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4868
4869 bsi_remove (&bsi, true);
4870 release_defs (stmt);
4871 }
4872 }
4873
4874 /* Rewrites USE (definition of iv used in a nonlinear expression)
4875 using candidate CAND. */
4876
4877 static void
4878 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4879 struct iv_use *use, struct iv_cand *cand)
4880 {
4881 tree comp;
4882 tree op, stmts, tgt, ass;
4883 block_stmt_iterator bsi, pbsi;
4884
4885 /* An important special case -- if we are asked to express value of
4886 the original iv by itself, just exit; there is no need to
4887 introduce a new computation (that might also need casting the
4888 variable to unsigned and back). */
4889 if (cand->pos == IP_ORIGINAL
4890 && cand->incremented_at == use->stmt)
4891 {
4892 tree step, ctype, utype;
4893 enum tree_code incr_code = PLUS_EXPR;
4894
4895 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4896 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4897
4898 step = cand->iv->step;
4899 ctype = TREE_TYPE (step);
4900 utype = TREE_TYPE (cand->var_after);
4901 if (TREE_CODE (step) == NEGATE_EXPR)
4902 {
4903 incr_code = MINUS_EXPR;
4904 step = TREE_OPERAND (step, 0);
4905 }
4906
4907 /* Check whether we may leave the computation unchanged.
4908 This is the case only if it does not rely on other
4909 computations in the loop -- otherwise, the computation
4910 we rely upon may be removed in remove_unused_ivs,
4911 thus leading to ICE. */
4912 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4913 if (TREE_CODE (op) == PLUS_EXPR
4914 || TREE_CODE (op) == MINUS_EXPR)
4915 {
4916 if (TREE_OPERAND (op, 0) == cand->var_before)
4917 op = TREE_OPERAND (op, 1);
4918 else if (TREE_CODE (op) == PLUS_EXPR
4919 && TREE_OPERAND (op, 1) == cand->var_before)
4920 op = TREE_OPERAND (op, 0);
4921 else
4922 op = NULL_TREE;
4923 }
4924 else
4925 op = NULL_TREE;
4926
4927 if (op
4928 && (TREE_CODE (op) == INTEGER_CST
4929 || operand_equal_p (op, step, 0)))
4930 return;
4931
4932 /* Otherwise, add the necessary computations to express
4933 the iv. */
4934 op = fold_convert (ctype, cand->var_before);
4935 comp = fold_convert (utype,
4936 build2 (incr_code, ctype, op,
4937 unshare_expr (step)));
4938 }
4939 else
4940 {
4941 comp = get_computation (data->current_loop, use, cand);
4942 gcc_assert (comp != NULL_TREE);
4943 }
4944
4945 switch (TREE_CODE (use->stmt))
4946 {
4947 case PHI_NODE:
4948 tgt = PHI_RESULT (use->stmt);
4949
4950 /* If we should keep the biv, do not replace it. */
4951 if (name_info (data, tgt)->preserve_biv)
4952 return;
4953
4954 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4955 while (!bsi_end_p (pbsi)
4956 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4957 {
4958 bsi = pbsi;
4959 bsi_next (&pbsi);
4960 }
4961 break;
4962
4963 case GIMPLE_MODIFY_STMT:
4964 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4965 bsi = bsi_for_stmt (use->stmt);
4966 break;
4967
4968 default:
4969 gcc_unreachable ();
4970 }
4971
4972 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4973
4974 if (TREE_CODE (use->stmt) == PHI_NODE)
4975 {
4976 if (stmts)
4977 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4978 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4979 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4980 remove_statement (use->stmt, false);
4981 SSA_NAME_DEF_STMT (tgt) = ass;
4982 }
4983 else
4984 {
4985 if (stmts)
4986 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4987 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4988 }
4989 }
4990
4991 /* Replaces ssa name in index IDX by its basic variable. Callback for
4992 for_each_index. */
4993
4994 static bool
4995 idx_remove_ssa_names (tree base, tree *idx,
4996 void *data ATTRIBUTE_UNUSED)
4997 {
4998 tree *op;
4999
5000 if (TREE_CODE (*idx) == SSA_NAME)
5001 *idx = SSA_NAME_VAR (*idx);
5002
5003 if (TREE_CODE (base) == ARRAY_REF)
5004 {
5005 op = &TREE_OPERAND (base, 2);
5006 if (*op
5007 && TREE_CODE (*op) == SSA_NAME)
5008 *op = SSA_NAME_VAR (*op);
5009 op = &TREE_OPERAND (base, 3);
5010 if (*op
5011 && TREE_CODE (*op) == SSA_NAME)
5012 *op = SSA_NAME_VAR (*op);
5013 }
5014
5015 return true;
5016 }
5017
5018 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5019
5020 static tree
5021 unshare_and_remove_ssa_names (tree ref)
5022 {
5023 ref = unshare_expr (ref);
5024 for_each_index (&ref, idx_remove_ssa_names, NULL);
5025
5026 return ref;
5027 }
5028
5029 /* Extract the alias analysis info for the memory reference REF. There are
5030 several ways how this information may be stored and what precisely is
5031 its semantics depending on the type of the reference, but there always is
5032 somewhere hidden one _DECL node that is used to determine the set of
5033 virtual operands for the reference. The code below deciphers this jungle
5034 and extracts this single useful piece of information. */
5035
5036 static tree
5037 get_ref_tag (tree ref, tree orig)
5038 {
5039 tree var = get_base_address (ref);
5040 tree aref = NULL_TREE, tag, sv;
5041 HOST_WIDE_INT offset, size, maxsize;
5042
5043 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5044 {
5045 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5046 if (ref)
5047 break;
5048 }
5049
5050 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5051 return unshare_expr (sv);
5052
5053 if (!var)
5054 return NULL_TREE;
5055
5056 if (TREE_CODE (var) == INDIRECT_REF)
5057 {
5058 /* If the base is a dereference of a pointer, first check its name memory
5059 tag. If it does not have one, use its symbol memory tag. */
5060 var = TREE_OPERAND (var, 0);
5061 if (TREE_CODE (var) != SSA_NAME)
5062 return NULL_TREE;
5063
5064 if (SSA_NAME_PTR_INFO (var))
5065 {
5066 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5067 if (tag)
5068 return tag;
5069 }
5070
5071 var = SSA_NAME_VAR (var);
5072 tag = symbol_mem_tag (var);
5073 gcc_assert (tag != NULL_TREE);
5074 return tag;
5075 }
5076 else
5077 {
5078 if (!DECL_P (var))
5079 return NULL_TREE;
5080
5081 tag = symbol_mem_tag (var);
5082 if (tag)
5083 return tag;
5084
5085 return var;
5086 }
5087 }
5088
5089 /* Copies the reference information from OLD_REF to NEW_REF. */
5090
5091 static void
5092 copy_ref_info (tree new_ref, tree old_ref)
5093 {
5094 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5095 copy_mem_ref_info (new_ref, old_ref);
5096 else
5097 {
5098 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5099 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5100 }
5101 }
5102
5103 /* Rewrites USE (address that is an iv) using candidate CAND. */
5104
5105 static void
5106 rewrite_use_address (struct ivopts_data *data,
5107 struct iv_use *use, struct iv_cand *cand)
5108 {
5109 aff_tree aff;
5110 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5111 tree ref;
5112 bool ok;
5113
5114 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5115 gcc_assert (ok);
5116 unshare_aff_combination (&aff);
5117
5118 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5119 copy_ref_info (ref, *use->op_p);
5120 *use->op_p = ref;
5121 }
5122
5123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5124 candidate CAND. */
5125
5126 static void
5127 rewrite_use_compare (struct ivopts_data *data,
5128 struct iv_use *use, struct iv_cand *cand)
5129 {
5130 tree comp, *var_p, op, bound;
5131 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5132 enum tree_code compare;
5133 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5134 bool ok;
5135
5136 bound = cp->value;
5137 if (bound)
5138 {
5139 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5140 tree var_type = TREE_TYPE (var);
5141
5142 compare = iv_elimination_compare (data, use);
5143 bound = unshare_expr (fold_convert (var_type, bound));
5144 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5145
5146 *use->op_p = build2 (compare, boolean_type_node, var, op);
5147 return;
5148 }
5149
5150 /* The induction variable elimination failed; just express the original
5151 giv. */
5152 comp = get_computation (data->current_loop, use, cand);
5153 gcc_assert (comp != NULL_TREE);
5154
5155 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5156 gcc_assert (ok);
5157
5158 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5159 }
5160
5161 /* Rewrites USE using candidate CAND. */
5162
5163 static void
5164 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5165 {
5166 push_stmt_changes (&use->stmt);
5167
5168 switch (use->type)
5169 {
5170 case USE_NONLINEAR_EXPR:
5171 rewrite_use_nonlinear_expr (data, use, cand);
5172 break;
5173
5174 case USE_ADDRESS:
5175 rewrite_use_address (data, use, cand);
5176 break;
5177
5178 case USE_COMPARE:
5179 rewrite_use_compare (data, use, cand);
5180 break;
5181
5182 default:
5183 gcc_unreachable ();
5184 }
5185
5186 pop_stmt_changes (&use->stmt);
5187 }
5188
5189 /* Rewrite the uses using the selected induction variables. */
5190
5191 static void
5192 rewrite_uses (struct ivopts_data *data)
5193 {
5194 unsigned i;
5195 struct iv_cand *cand;
5196 struct iv_use *use;
5197
5198 for (i = 0; i < n_iv_uses (data); i++)
5199 {
5200 use = iv_use (data, i);
5201 cand = use->selected;
5202 gcc_assert (cand);
5203
5204 rewrite_use (data, use, cand);
5205 }
5206 }
5207
5208 /* Removes the ivs that are not used after rewriting. */
5209
5210 static void
5211 remove_unused_ivs (struct ivopts_data *data)
5212 {
5213 unsigned j;
5214 bitmap_iterator bi;
5215
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217 {
5218 struct version_info *info;
5219
5220 info = ver_info (data, j);
5221 if (info->iv
5222 && !integer_zerop (info->iv->step)
5223 && !info->inv_id
5224 && !info->iv->have_use_for
5225 && !info->preserve_biv)
5226 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5227 }
5228 }
5229
5230 /* Frees data allocated by the optimization of a single loop. */
5231
5232 static void
5233 free_loop_data (struct ivopts_data *data)
5234 {
5235 unsigned i, j;
5236 bitmap_iterator bi;
5237 tree obj;
5238
5239 htab_empty (data->niters);
5240
5241 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5242 {
5243 struct version_info *info;
5244
5245 info = ver_info (data, i);
5246 if (info->iv)
5247 free (info->iv);
5248 info->iv = NULL;
5249 info->has_nonlin_use = false;
5250 info->preserve_biv = false;
5251 info->inv_id = 0;
5252 }
5253 bitmap_clear (data->relevant);
5254 bitmap_clear (data->important_candidates);
5255
5256 for (i = 0; i < n_iv_uses (data); i++)
5257 {
5258 struct iv_use *use = iv_use (data, i);
5259
5260 free (use->iv);
5261 BITMAP_FREE (use->related_cands);
5262 for (j = 0; j < use->n_map_members; j++)
5263 if (use->cost_map[j].depends_on)
5264 BITMAP_FREE (use->cost_map[j].depends_on);
5265 free (use->cost_map);
5266 free (use);
5267 }
5268 VEC_truncate (iv_use_p, data->iv_uses, 0);
5269
5270 for (i = 0; i < n_iv_cands (data); i++)
5271 {
5272 struct iv_cand *cand = iv_cand (data, i);
5273
5274 if (cand->iv)
5275 free (cand->iv);
5276 if (cand->depends_on)
5277 BITMAP_FREE (cand->depends_on);
5278 free (cand);
5279 }
5280 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5281
5282 if (data->version_info_size < num_ssa_names)
5283 {
5284 data->version_info_size = 2 * num_ssa_names;
5285 free (data->version_info);
5286 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5287 }
5288
5289 data->max_inv_id = 0;
5290
5291 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5292 SET_DECL_RTL (obj, NULL_RTX);
5293
5294 VEC_truncate (tree, decl_rtl_to_reset, 0);
5295 }
5296
5297 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5298 loop tree. */
5299
5300 static void
5301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5302 {
5303 free_loop_data (data);
5304 free (data->version_info);
5305 BITMAP_FREE (data->relevant);
5306 BITMAP_FREE (data->important_candidates);
5307 htab_delete (data->niters);
5308
5309 VEC_free (tree, heap, decl_rtl_to_reset);
5310 VEC_free (iv_use_p, heap, data->iv_uses);
5311 VEC_free (iv_cand_p, heap, data->iv_candidates);
5312 }
5313
5314 /* Optimizes the LOOP. Returns true if anything changed. */
5315
5316 static bool
5317 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5318 {
5319 bool changed = false;
5320 struct iv_ca *iv_ca;
5321 edge exit;
5322
5323 data->current_loop = loop;
5324
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5326 {
5327 fprintf (dump_file, "Processing loop %d\n", loop->num);
5328
5329 exit = single_dom_exit (loop);
5330 if (exit)
5331 {
5332 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5333 exit->src->index, exit->dest->index);
5334 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5335 fprintf (dump_file, "\n");
5336 }
5337
5338 fprintf (dump_file, "\n");
5339 }
5340
5341 /* For each ssa name determines whether it behaves as an induction variable
5342 in some loop. */
5343 if (!find_induction_variables (data))
5344 goto finish;
5345
5346 /* Finds interesting uses (item 1). */
5347 find_interesting_uses (data);
5348 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5349 goto finish;
5350
5351 /* Finds candidates for the induction variables (item 2). */
5352 find_iv_candidates (data);
5353
5354 /* Calculates the costs (item 3, part 1). */
5355 determine_use_iv_costs (data);
5356 determine_iv_costs (data);
5357 determine_set_costs (data);
5358
5359 /* Find the optimal set of induction variables (item 3, part 2). */
5360 iv_ca = find_optimal_iv_set (data);
5361 if (!iv_ca)
5362 goto finish;
5363 changed = true;
5364
5365 /* Create the new induction variables (item 4, part 1). */
5366 create_new_ivs (data, iv_ca);
5367 iv_ca_free (&iv_ca);
5368
5369 /* Rewrite the uses (item 4, part 2). */
5370 rewrite_uses (data);
5371
5372 /* Remove the ivs that are unused after rewriting. */
5373 remove_unused_ivs (data);
5374
5375 /* We have changed the structure of induction variables; it might happen
5376 that definitions in the scev database refer to some of them that were
5377 eliminated. */
5378 scev_reset ();
5379
5380 finish:
5381 free_loop_data (data);
5382
5383 return changed;
5384 }
5385
5386 /* Main entry point. Optimizes induction variables in loops. */
5387
5388 void
5389 tree_ssa_iv_optimize (void)
5390 {
5391 struct loop *loop;
5392 struct ivopts_data data;
5393 loop_iterator li;
5394
5395 tree_ssa_iv_optimize_init (&data);
5396
5397 /* Optimize the loops starting with the innermost ones. */
5398 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5399 {
5400 if (dump_file && (dump_flags & TDF_DETAILS))
5401 flow_loop_dump (loop, dump_file, NULL, 1);
5402
5403 tree_ssa_iv_optimize_loop (&data, loop);
5404 }
5405
5406 tree_ssa_iv_optimize_finalize (&data);
5407 }