basic-block.h (FOR_ALL_BB_FN): New macro.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
149 };
150
151 /* Use. */
152 struct iv_use
153 {
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
161
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
165
166 struct iv_cand *selected;
167 /* The selected candidate. */
168 };
169
170 /* The position where the iv is computed. */
171 enum iv_position
172 {
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
176 };
177
178 /* The induction variable candidate. */
179 struct iv_cand
180 {
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
196 };
197
198 /* The data used by the induction variable optimizations. */
199
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
208 struct ivopts_data
209 {
210 /* The currently optimized loop. */
211 struct loop *current_loop;
212
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
215
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
218
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
221
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
224
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
227
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
230
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
233
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
236
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
240 };
241
242 /* An assignment of iv candidates to uses. */
243
244 struct iv_ca
245 {
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
248
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
251
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
254
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
257
258 /* The candidates used. */
259 bitmap cands;
260
261 /* The number of candidates in the set. */
262 unsigned n_cands;
263
264 /* Total number of registers needed. */
265 unsigned n_regs;
266
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
269
270 /* Total cost of candidates. */
271 unsigned cand_cost;
272
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
275
276 /* Total cost of the assignment. */
277 unsigned cost;
278 };
279
280 /* Difference of two iv candidate assignments. */
281
282 struct iv_ca_delta
283 {
284 /* Changed use. */
285 struct iv_use *use;
286
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
289
290 /* A new assignment. */
291 struct cost_pair *new_cp;
292
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
295 };
296
297 /* Bound on number of candidates below that all candidates are considered. */
298
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
304
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
310
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
316
317 static VEC(tree,heap) *decl_rtl_to_reset;
318
319 /* Number of uses recorded in DATA. */
320
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
323 {
324 return VEC_length (iv_use_p, data->iv_uses);
325 }
326
327 /* Ith use recorded in DATA. */
328
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
331 {
332 return VEC_index (iv_use_p, data->iv_uses, i);
333 }
334
335 /* Number of candidates recorded in DATA. */
336
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
339 {
340 return VEC_length (iv_cand_p, data->iv_candidates);
341 }
342
343 /* Ith candidate recorded in DATA. */
344
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
347 {
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 }
350
351 /* The data for LOOP. */
352
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
355 {
356 return loop->aux;
357 }
358
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
360
361 static edge
362 single_dom_exit (struct loop *loop)
363 {
364 edge exit = loop->single_exit;
365
366 if (!exit)
367 return NULL;
368
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
371
372 return exit;
373 }
374
375 /* Dumps information about the induction variable IV to FILE. */
376
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
380 {
381 if (iv->ssa_name)
382 {
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
386 }
387
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
391
392 if (iv->step)
393 {
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
397
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402 else
403 {
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
407 }
408
409 if (iv->base_object)
410 {
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
418 }
419
420 /* Dumps information about the USE to FILE. */
421
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
425 {
426 fprintf (file, "use %d\n", use->id);
427
428 switch (use->type)
429 {
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
433
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
437
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
441
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
445
446 default:
447 gcc_unreachable ();
448 }
449
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
458
459 dump_iv (file, use->iv);
460
461 if (use->related_cands)
462 {
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
465 }
466 }
467
468 /* Dumps information about the uses to FILE. */
469
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
473 {
474 unsigned i;
475 struct iv_use *use;
476
477 for (i = 0; i < n_iv_uses (data); i++)
478 {
479 use = iv_use (data, i);
480
481 dump_use (file, use);
482 fprintf (file, "\n");
483 }
484 }
485
486 /* Dumps information about induction variable candidate CAND to FILE. */
487
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
491 {
492 struct iv *iv = cand->iv;
493
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
496
497 if (cand->depends_on)
498 {
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
501 }
502
503 if (!iv)
504 {
505 fprintf (file, " final value replacement\n");
506 return;
507 }
508
509 switch (cand->pos)
510 {
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
514
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
518
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
522 }
523
524 dump_iv (file, iv);
525 }
526
527 /* Returns the info for ssa version VER. */
528
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
531 {
532 return data->version_info + ver;
533 }
534
535 /* Returns the info for ssa name NAME. */
536
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
539 {
540 return ver_info (data, SSA_NAME_VERSION (name));
541 }
542
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
545
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
549 {
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
553
554 a &= mask;
555 b &= mask;
556
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
559 {
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
564 }
565
566 if (!(b & 1))
567 {
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
570 }
571
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
577 {
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
580 }
581
582 val = (a * inv) & mask;
583
584 gcc_assert (((val * b) & mask) == a);
585
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
588
589 *x = val;
590
591 return true;
592 }
593
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
596
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599 {
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
602 gcc_assert (bb);
603
604 if (sbb == loop->latch)
605 return true;
606
607 if (sbb != bb)
608 return false;
609
610 return stmt == last_stmt (bb);
611 }
612
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
615
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618 {
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
622
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
625
626 if (stmt_bb != cand_bb)
627 return true;
628
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632 {
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
637 }
638 }
639
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
642
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645 {
646 switch (cand->pos)
647 {
648 case IP_END:
649 return false;
650
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
653
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
656
657 default:
658 gcc_unreachable ();
659 }
660 }
661
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
664
665 struct nfe_cache_elt
666 {
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
669
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
672
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
675 };
676
677 /* Hash function for nfe_cache_elt E. */
678
679 static hashval_t
680 nfe_hash (const void *e)
681 {
682 const struct nfe_cache_elt *elt = e;
683
684 return htab_hash_pointer (elt->exit);
685 }
686
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
688
689 static int
690 nfe_eq (const void *e1, const void *e2)
691 {
692 const struct nfe_cache_elt *elt1 = e1;
693
694 return elt1->exit == e2;
695 }
696
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
699
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
702 {
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
705
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
709
710 if (!*slot)
711 {
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
716 *slot = nfe_desc;
717 }
718 else
719 nfe_desc = *slot;
720
721 if (!nfe_desc->valid_p)
722 return NULL;
723
724 return &nfe_desc->niter;
725 }
726
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
729 goes wrong. */
730
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
733 {
734 edge exit = single_dom_exit (data->current_loop);
735
736 if (!exit)
737 return NULL;
738
739 return niter_for_exit (data, exit);
740 }
741
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
744
745 static void
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
747 {
748 unsigned i;
749
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
757
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
761
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765 }
766
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770 static tree
771 determine_base_object (tree expr)
772 {
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
775
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
778
779 switch (code)
780 {
781 case INTEGER_CST:
782 return NULL_TREE;
783
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
787
788 if (!base)
789 return expr;
790
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
793
794 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
795
796 case PLUS_EXPR:
797 case MINUS_EXPR:
798 op0 = determine_base_object (TREE_OPERAND (expr, 0));
799 op1 = determine_base_object (TREE_OPERAND (expr, 1));
800
801 if (!op1)
802 return op0;
803
804 if (!op0)
805 return (code == PLUS_EXPR
806 ? op1
807 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
808
809 return fold (build (code, ptr_type_node, op0, op1));
810
811 case NOP_EXPR:
812 case CONVERT_EXPR:
813 return determine_base_object (TREE_OPERAND (expr, 0));
814
815 default:
816 return fold_convert (ptr_type_node, expr);
817 }
818 }
819
820 /* Allocates an induction variable with given initial value BASE and step STEP
821 for loop LOOP. */
822
823 static struct iv *
824 alloc_iv (tree base, tree step)
825 {
826 struct iv *iv = xcalloc (1, sizeof (struct iv));
827
828 if (step && integer_zerop (step))
829 step = NULL_TREE;
830
831 iv->base = base;
832 iv->base_object = determine_base_object (base);
833 iv->step = step;
834 iv->biv_p = false;
835 iv->have_use_for = false;
836 iv->use_id = 0;
837 iv->ssa_name = NULL_TREE;
838
839 return iv;
840 }
841
842 /* Sets STEP and BASE for induction variable IV. */
843
844 static void
845 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
846 {
847 struct version_info *info = name_info (data, iv);
848
849 gcc_assert (!info->iv);
850
851 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
852 info->iv = alloc_iv (base, step);
853 info->iv->ssa_name = iv;
854 }
855
856 /* Finds induction variable declaration for VAR. */
857
858 static struct iv *
859 get_iv (struct ivopts_data *data, tree var)
860 {
861 basic_block bb;
862
863 if (!name_info (data, var)->iv)
864 {
865 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
866
867 if (!bb
868 || !flow_bb_inside_loop_p (data->current_loop, bb))
869 set_iv (data, var, var, NULL_TREE);
870 }
871
872 return name_info (data, var)->iv;
873 }
874
875 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
876 not define a simple affine biv with nonzero step. */
877
878 static tree
879 determine_biv_step (tree phi)
880 {
881 struct loop *loop = bb_for_stmt (phi)->loop_father;
882 tree name = PHI_RESULT (phi), base, step;
883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
887 if (!simple_iv (loop, phi, name, &base, &step, true))
888 return NULL_TREE;
889
890 if (zero_p (step))
891 return NULL_TREE;
892
893 return step;
894 }
895
896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
897
898 static bool
899 abnormal_ssa_name_p (tree exp)
900 {
901 if (!exp)
902 return false;
903
904 if (TREE_CODE (exp) != SSA_NAME)
905 return false;
906
907 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
908 }
909
910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
911 abnormal phi node. Callback for for_each_index. */
912
913 static bool
914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
915 void *data ATTRIBUTE_UNUSED)
916 {
917 if (TREE_CODE (base) == ARRAY_REF)
918 {
919 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
920 return false;
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
922 return false;
923 }
924
925 return !abnormal_ssa_name_p (*index);
926 }
927
928 /* Returns true if EXPR contains a ssa name that occurs in an
929 abnormal phi node. */
930
931 static bool
932 contains_abnormal_ssa_name_p (tree expr)
933 {
934 enum tree_code code;
935 enum tree_code_class class;
936
937 if (!expr)
938 return false;
939
940 code = TREE_CODE (expr);
941 class = TREE_CODE_CLASS (code);
942
943 if (code == SSA_NAME)
944 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
945
946 if (code == INTEGER_CST
947 || is_gimple_min_invariant (expr))
948 return false;
949
950 if (code == ADDR_EXPR)
951 return !for_each_index (&TREE_OPERAND (expr, 0),
952 idx_contains_abnormal_ssa_name_p,
953 NULL);
954
955 switch (class)
956 {
957 case tcc_binary:
958 case tcc_comparison:
959 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
960 return true;
961
962 /* Fallthru. */
963 case tcc_unary:
964 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
965 return true;
966
967 break;
968
969 default:
970 gcc_unreachable ();
971 }
972
973 return false;
974 }
975
976 /* Finds basic ivs. */
977
978 static bool
979 find_bivs (struct ivopts_data *data)
980 {
981 tree phi, step, type, base;
982 bool found = false;
983 struct loop *loop = data->current_loop;
984
985 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
986 {
987 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
988 continue;
989
990 step = determine_biv_step (phi);
991 if (!step)
992 continue;
993
994 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
995 if (contains_abnormal_ssa_name_p (base)
996 || contains_abnormal_ssa_name_p (step))
997 continue;
998
999 type = TREE_TYPE (PHI_RESULT (phi));
1000 base = fold_convert (type, base);
1001 if (step)
1002 step = fold_convert (type, step);
1003
1004 set_iv (data, PHI_RESULT (phi), base, step);
1005 found = true;
1006 }
1007
1008 return found;
1009 }
1010
1011 /* Marks basic ivs. */
1012
1013 static void
1014 mark_bivs (struct ivopts_data *data)
1015 {
1016 tree phi, var;
1017 struct iv *iv, *incr_iv;
1018 struct loop *loop = data->current_loop;
1019 basic_block incr_bb;
1020
1021 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1022 {
1023 iv = get_iv (data, PHI_RESULT (phi));
1024 if (!iv)
1025 continue;
1026
1027 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028 incr_iv = get_iv (data, var);
1029 if (!incr_iv)
1030 continue;
1031
1032 /* If the increment is in the subloop, ignore it. */
1033 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034 if (incr_bb->loop_father != data->current_loop
1035 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1036 continue;
1037
1038 iv->biv_p = true;
1039 incr_iv->biv_p = true;
1040 }
1041 }
1042
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044 parameters to BASE and STEP. */
1045
1046 static bool
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1048 tree *base, tree *step)
1049 {
1050 tree lhs;
1051 struct loop *loop = data->current_loop;
1052
1053 *base = NULL_TREE;
1054 *step = NULL_TREE;
1055
1056 if (TREE_CODE (stmt) != MODIFY_EXPR)
1057 return false;
1058
1059 lhs = TREE_OPERAND (stmt, 0);
1060 if (TREE_CODE (lhs) != SSA_NAME)
1061 return false;
1062
1063 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1064 return false;
1065
1066 if (contains_abnormal_ssa_name_p (*base)
1067 || contains_abnormal_ssa_name_p (*step))
1068 return false;
1069
1070 return true;
1071 }
1072
1073 /* Finds general ivs in statement STMT. */
1074
1075 static void
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1077 {
1078 tree base, step;
1079
1080 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1081 return;
1082
1083 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1084 }
1085
1086 /* Finds general ivs in basic block BB. */
1087
1088 static void
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1090 {
1091 block_stmt_iterator bsi;
1092
1093 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094 find_givs_in_stmt (data, bsi_stmt (bsi));
1095 }
1096
1097 /* Finds general ivs. */
1098
1099 static void
1100 find_givs (struct ivopts_data *data)
1101 {
1102 struct loop *loop = data->current_loop;
1103 basic_block *body = get_loop_body_in_dom_order (loop);
1104 unsigned i;
1105
1106 for (i = 0; i < loop->num_nodes; i++)
1107 find_givs_in_bb (data, body[i]);
1108 free (body);
1109 }
1110
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112 variable and if so, its initial value and step. */
1113
1114 static bool
1115 find_induction_variables (struct ivopts_data *data)
1116 {
1117 unsigned i;
1118 bitmap_iterator bi;
1119
1120 if (!find_bivs (data))
1121 return false;
1122
1123 find_givs (data);
1124 mark_bivs (data);
1125
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1127 {
1128 struct tree_niter_desc *niter;
1129
1130 niter = niter_for_single_dom_exit (data);
1131
1132 if (niter)
1133 {
1134 fprintf (dump_file, " number of iterations ");
1135 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136 fprintf (dump_file, "\n");
1137
1138 fprintf (dump_file, " may be zero if ");
1139 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1141 fprintf (dump_file, "\n");
1142 };
1143
1144 fprintf (dump_file, "Induction variables:\n\n");
1145
1146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147 {
1148 if (ver_info (data, i)->iv)
1149 dump_iv (dump_file, ver_info (data, i)->iv);
1150 }
1151 }
1152
1153 return true;
1154 }
1155
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1157
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160 tree stmt, enum use_type use_type)
1161 {
1162 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1163
1164 use->id = n_iv_uses (data);
1165 use->type = use_type;
1166 use->iv = iv;
1167 use->stmt = stmt;
1168 use->op_p = use_p;
1169 use->related_cands = BITMAP_ALLOC (NULL);
1170
1171 /* To avoid showing ssa name in the dumps, if it was not reset by the
1172 caller. */
1173 iv->ssa_name = NULL_TREE;
1174
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 dump_use (dump_file, use);
1177
1178 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179
1180 return use;
1181 }
1182
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184 NONLINEAR_USE is true if the invariant is used in a way we do not
1185 handle specially. */
1186
1187 static void
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 {
1190 basic_block bb;
1191 struct version_info *info;
1192
1193 if (TREE_CODE (op) != SSA_NAME
1194 || !is_gimple_reg (op))
1195 return;
1196
1197 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1198 if (bb
1199 && flow_bb_inside_loop_p (data->current_loop, bb))
1200 return;
1201
1202 info = name_info (data, op);
1203 info->name = op;
1204 info->has_nonlin_use |= nonlinear_use;
1205 if (!info->inv_id)
1206 info->inv_id = ++data->max_inv_id;
1207 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 }
1209
1210 /* Checks whether the use OP is interesting and if so, records it
1211 as TYPE. */
1212
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1215 enum use_type type)
1216 {
1217 struct iv *iv;
1218 struct iv *civ;
1219 tree stmt;
1220 struct iv_use *use;
1221
1222 if (TREE_CODE (op) != SSA_NAME)
1223 return NULL;
1224
1225 iv = get_iv (data, op);
1226 if (!iv)
1227 return NULL;
1228
1229 if (iv->have_use_for)
1230 {
1231 use = iv_use (data, iv->use_id);
1232
1233 gcc_assert (use->type == USE_NONLINEAR_EXPR
1234 || use->type == USE_OUTER);
1235
1236 if (type == USE_NONLINEAR_EXPR)
1237 use->type = USE_NONLINEAR_EXPR;
1238 return use;
1239 }
1240
1241 if (zero_p (iv->step))
1242 {
1243 record_invariant (data, op, true);
1244 return NULL;
1245 }
1246 iv->have_use_for = true;
1247
1248 civ = xmalloc (sizeof (struct iv));
1249 *civ = *iv;
1250
1251 stmt = SSA_NAME_DEF_STMT (op);
1252 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253 || TREE_CODE (stmt) == MODIFY_EXPR);
1254
1255 use = record_use (data, NULL, civ, stmt, type);
1256 iv->use_id = use->id;
1257
1258 return use;
1259 }
1260
1261 /* Checks whether the use OP is interesting and if so, records it. */
1262
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 {
1266 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 }
1268
1269 /* Records a definition of induction variable OP that is used outside of the
1270 loop. */
1271
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 {
1275 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 }
1277
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279 and if so, records it. */
1280
1281 static void
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 {
1284 tree *op0_p;
1285 tree *op1_p;
1286 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1287 struct iv const_iv;
1288 tree zero = integer_zero_node;
1289
1290 const_iv.step = NULL_TREE;
1291
1292 if (TREE_CODE (*cond_p) != SSA_NAME
1293 && !COMPARISON_CLASS_P (*cond_p))
1294 return;
1295
1296 if (TREE_CODE (*cond_p) == SSA_NAME)
1297 {
1298 op0_p = cond_p;
1299 op1_p = &zero;
1300 }
1301 else
1302 {
1303 op0_p = &TREE_OPERAND (*cond_p, 0);
1304 op1_p = &TREE_OPERAND (*cond_p, 1);
1305 }
1306
1307 if (TREE_CODE (*op0_p) == SSA_NAME)
1308 iv0 = get_iv (data, *op0_p);
1309 else
1310 iv0 = &const_iv;
1311
1312 if (TREE_CODE (*op1_p) == SSA_NAME)
1313 iv1 = get_iv (data, *op1_p);
1314 else
1315 iv1 = &const_iv;
1316
1317 if (/* When comparing with non-invariant value, we may not do any senseful
1318 induction variable elimination. */
1319 (!iv0 || !iv1)
1320 /* Eliminating condition based on two ivs would be nontrivial.
1321 ??? TODO -- it is not really important to handle this case. */
1322 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323 {
1324 find_interesting_uses_op (data, *op0_p);
1325 find_interesting_uses_op (data, *op1_p);
1326 return;
1327 }
1328
1329 if (zero_p (iv0->step) && zero_p (iv1->step))
1330 {
1331 /* If both are invariants, this is a work for unswitching. */
1332 return;
1333 }
1334
1335 civ = xmalloc (sizeof (struct iv));
1336 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 }
1339
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341 i.e. if all its operands are defined outside of the LOOP. */
1342
1343 bool
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 {
1346 basic_block def_bb;
1347 unsigned i, len;
1348
1349 if (is_gimple_min_invariant (expr))
1350 return true;
1351
1352 if (TREE_CODE (expr) == SSA_NAME)
1353 {
1354 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1355 if (def_bb
1356 && flow_bb_inside_loop_p (loop, def_bb))
1357 return false;
1358
1359 return true;
1360 }
1361
1362 if (!EXPR_P (expr))
1363 return false;
1364
1365 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366 for (i = 0; i < len; i++)
1367 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1368 return false;
1369
1370 return true;
1371 }
1372
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374 initial ones. Returns false when the value of the index cannot be determined.
1375 Callback for for_each_index. */
1376
1377 struct ifs_ivopts_data
1378 {
1379 struct ivopts_data *ivopts_data;
1380 tree stmt;
1381 tree *step_p;
1382 };
1383
1384 static bool
1385 idx_find_step (tree base, tree *idx, void *data)
1386 {
1387 struct ifs_ivopts_data *dta = data;
1388 struct iv *iv;
1389 tree step, type, iv_type, iv_step, lbound, off;
1390 struct loop *loop = dta->ivopts_data->current_loop;
1391
1392 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1394 return false;
1395
1396 /* If base is a component ref, require that the offset of the reference
1397 be invariant. */
1398 if (TREE_CODE (base) == COMPONENT_REF)
1399 {
1400 off = component_ref_field_offset (base);
1401 return expr_invariant_in_loop_p (loop, off);
1402 }
1403
1404 /* If base is array, first check whether we will be able to move the
1405 reference out of the loop (in order to take its address in strength
1406 reduction). In order for this to work we need both lower bound
1407 and step to be loop invariants. */
1408 if (TREE_CODE (base) == ARRAY_REF)
1409 {
1410 step = array_ref_element_size (base);
1411 lbound = array_ref_low_bound (base);
1412
1413 if (!expr_invariant_in_loop_p (loop, step)
1414 || !expr_invariant_in_loop_p (loop, lbound))
1415 return false;
1416 }
1417
1418 if (TREE_CODE (*idx) != SSA_NAME)
1419 return true;
1420
1421 iv = get_iv (dta->ivopts_data, *idx);
1422 if (!iv)
1423 return false;
1424
1425 *idx = iv->base;
1426
1427 if (!iv->step)
1428 return true;
1429
1430 iv_type = TREE_TYPE (iv->base);
1431 type = build_pointer_type (TREE_TYPE (base));
1432 if (TREE_CODE (base) == ARRAY_REF)
1433 {
1434 step = array_ref_element_size (base);
1435
1436 /* We only handle addresses whose step is an integer constant. */
1437 if (TREE_CODE (step) != INTEGER_CST)
1438 return false;
1439 }
1440 else
1441 /* The step for pointer arithmetics already is 1 byte. */
1442 step = build_int_cst (type, 1);
1443
1444 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1445 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1446 type, iv->base, iv->step, dta->stmt);
1447 else
1448 iv_step = fold_convert (iv_type, iv->step);
1449
1450 if (!iv_step)
1451 {
1452 /* The index might wrap. */
1453 return false;
1454 }
1455
1456 step = fold_build2 (MULT_EXPR, type, step, iv_step);
1457
1458 if (!*dta->step_p)
1459 *dta->step_p = step;
1460 else
1461 *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1462
1463 return true;
1464 }
1465
1466 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1467 object is passed to it in DATA. */
1468
1469 static bool
1470 idx_record_use (tree base, tree *idx,
1471 void *data)
1472 {
1473 find_interesting_uses_op (data, *idx);
1474 if (TREE_CODE (base) == ARRAY_REF)
1475 {
1476 find_interesting_uses_op (data, array_ref_element_size (base));
1477 find_interesting_uses_op (data, array_ref_low_bound (base));
1478 }
1479 return true;
1480 }
1481
1482 /* Returns true if memory reference REF may be unaligned. */
1483
1484 static bool
1485 may_be_unaligned_p (tree ref)
1486 {
1487 tree base;
1488 tree base_type;
1489 HOST_WIDE_INT bitsize;
1490 HOST_WIDE_INT bitpos;
1491 tree toffset;
1492 enum machine_mode mode;
1493 int unsignedp, volatilep;
1494 unsigned base_align;
1495
1496 /* The test below is basically copy of what expr.c:normal_inner_ref
1497 does to check whether the object must be loaded by parts when
1498 STRICT_ALIGNMENT is true. */
1499 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500 &unsignedp, &volatilep, true);
1501 base_type = TREE_TYPE (base);
1502 base_align = TYPE_ALIGN (base_type);
1503
1504 if (mode != BLKmode
1505 && (base_align < GET_MODE_ALIGNMENT (mode)
1506 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507 || bitpos % BITS_PER_UNIT != 0))
1508 return true;
1509
1510 return false;
1511 }
1512
1513 /* Builds ADDR_EXPR of object OBJ. If OBJ is an INDIRECT_REF, the indirect_ref
1514 is stripped instead. */
1515
1516 static tree
1517 build_addr_strip_iref (tree obj)
1518 {
1519 tree type;
1520
1521 if (TREE_CODE (obj) == INDIRECT_REF)
1522 {
1523 type = build_pointer_type (TREE_TYPE (obj));
1524 obj = fold_convert (type, TREE_OPERAND (obj, 0));
1525 }
1526 else
1527 obj = build_addr (obj);
1528
1529 return obj;
1530 }
1531
1532 /* Finds addresses in *OP_P inside STMT. */
1533
1534 static void
1535 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1536 {
1537 tree base = unshare_expr (*op_p), step = NULL;
1538 struct iv *civ;
1539 struct ifs_ivopts_data ifs_ivopts_data;
1540
1541 /* Do not play with volatile memory references. A bit too conservative,
1542 perhaps, but safe. */
1543 if (stmt_ann (stmt)->has_volatile_ops)
1544 goto fail;
1545
1546 /* Ignore bitfields for now. Not really something terribly complicated
1547 to handle. TODO. */
1548 if (TREE_CODE (base) == COMPONENT_REF
1549 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1550 goto fail;
1551
1552 if (STRICT_ALIGNMENT
1553 && may_be_unaligned_p (base))
1554 goto fail;
1555
1556 ifs_ivopts_data.ivopts_data = data;
1557 ifs_ivopts_data.stmt = stmt;
1558 ifs_ivopts_data.step_p = &step;
1559 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1560 || zero_p (step))
1561 goto fail;
1562
1563 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1565
1566 base = build_addr_strip_iref (base);
1567
1568 civ = alloc_iv (base, step);
1569 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1570 return;
1571
1572 fail:
1573 for_each_index (op_p, idx_record_use, data);
1574 }
1575
1576 /* Finds and records invariants used in STMT. */
1577
1578 static void
1579 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1580 {
1581 ssa_op_iter iter;
1582 use_operand_p use_p;
1583 tree op;
1584
1585 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1586 {
1587 op = USE_FROM_PTR (use_p);
1588 record_invariant (data, op, false);
1589 }
1590 }
1591
1592 /* Finds interesting uses of induction variables in the statement STMT. */
1593
1594 static void
1595 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1596 {
1597 struct iv *iv;
1598 tree op, lhs, rhs;
1599 ssa_op_iter iter;
1600 use_operand_p use_p;
1601
1602 find_invariants_stmt (data, stmt);
1603
1604 if (TREE_CODE (stmt) == COND_EXPR)
1605 {
1606 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1607 return;
1608 }
1609
1610 if (TREE_CODE (stmt) == MODIFY_EXPR)
1611 {
1612 lhs = TREE_OPERAND (stmt, 0);
1613 rhs = TREE_OPERAND (stmt, 1);
1614
1615 if (TREE_CODE (lhs) == SSA_NAME)
1616 {
1617 /* If the statement defines an induction variable, the uses are not
1618 interesting by themselves. */
1619
1620 iv = get_iv (data, lhs);
1621
1622 if (iv && !zero_p (iv->step))
1623 return;
1624 }
1625
1626 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1627 {
1628 case tcc_comparison:
1629 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1630 return;
1631
1632 case tcc_reference:
1633 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1634 if (REFERENCE_CLASS_P (lhs))
1635 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1636 return;
1637
1638 default: ;
1639 }
1640
1641 if (REFERENCE_CLASS_P (lhs)
1642 && is_gimple_val (rhs))
1643 {
1644 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1645 find_interesting_uses_op (data, rhs);
1646 return;
1647 }
1648
1649 /* TODO -- we should also handle address uses of type
1650
1651 memory = call (whatever);
1652
1653 and
1654
1655 call (memory). */
1656 }
1657
1658 if (TREE_CODE (stmt) == PHI_NODE
1659 && bb_for_stmt (stmt) == data->current_loop->header)
1660 {
1661 lhs = PHI_RESULT (stmt);
1662 iv = get_iv (data, lhs);
1663
1664 if (iv && !zero_p (iv->step))
1665 return;
1666 }
1667
1668 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1669 {
1670 op = USE_FROM_PTR (use_p);
1671
1672 if (TREE_CODE (op) != SSA_NAME)
1673 continue;
1674
1675 iv = get_iv (data, op);
1676 if (!iv)
1677 continue;
1678
1679 find_interesting_uses_op (data, op);
1680 }
1681 }
1682
1683 /* Finds interesting uses of induction variables outside of loops
1684 on loop exit edge EXIT. */
1685
1686 static void
1687 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1688 {
1689 tree phi, def;
1690
1691 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1692 {
1693 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1694 find_interesting_uses_outer (data, def);
1695 }
1696 }
1697
1698 /* Finds uses of the induction variables that are interesting. */
1699
1700 static void
1701 find_interesting_uses (struct ivopts_data *data)
1702 {
1703 basic_block bb;
1704 block_stmt_iterator bsi;
1705 tree phi;
1706 basic_block *body = get_loop_body (data->current_loop);
1707 unsigned i;
1708 struct version_info *info;
1709 edge e;
1710
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1712 fprintf (dump_file, "Uses:\n\n");
1713
1714 for (i = 0; i < data->current_loop->num_nodes; i++)
1715 {
1716 edge_iterator ei;
1717 bb = body[i];
1718
1719 FOR_EACH_EDGE (e, ei, bb->succs)
1720 if (e->dest != EXIT_BLOCK_PTR
1721 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1722 find_interesting_uses_outside (data, e);
1723
1724 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1725 find_interesting_uses_stmt (data, phi);
1726 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1727 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1728 }
1729
1730 if (dump_file && (dump_flags & TDF_DETAILS))
1731 {
1732 bitmap_iterator bi;
1733
1734 fprintf (dump_file, "\n");
1735
1736 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1737 {
1738 info = ver_info (data, i);
1739 if (info->inv_id)
1740 {
1741 fprintf (dump_file, " ");
1742 print_generic_expr (dump_file, info->name, TDF_SLIM);
1743 fprintf (dump_file, " is invariant (%d)%s\n",
1744 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1745 }
1746 }
1747
1748 fprintf (dump_file, "\n");
1749 }
1750
1751 free (body);
1752 }
1753
1754 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1755 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1756 we are at the top-level of the processed address. */
1757
1758 static tree
1759 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1760 unsigned HOST_WIDE_INT *offset)
1761 {
1762 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1763 enum tree_code code;
1764 tree type, orig_type = TREE_TYPE (expr);
1765 unsigned HOST_WIDE_INT off0, off1, st;
1766 tree orig_expr = expr;
1767
1768 STRIP_NOPS (expr);
1769
1770 type = TREE_TYPE (expr);
1771 code = TREE_CODE (expr);
1772 *offset = 0;
1773
1774 switch (code)
1775 {
1776 case INTEGER_CST:
1777 if (!cst_and_fits_in_hwi (expr)
1778 || zero_p (expr))
1779 return orig_expr;
1780
1781 *offset = int_cst_value (expr);
1782 return build_int_cst_type (orig_type, 0);
1783
1784 case PLUS_EXPR:
1785 case MINUS_EXPR:
1786 op0 = TREE_OPERAND (expr, 0);
1787 op1 = TREE_OPERAND (expr, 1);
1788
1789 op0 = strip_offset_1 (op0, false, false, &off0);
1790 op1 = strip_offset_1 (op1, false, false, &off1);
1791
1792 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1793 if (op0 == TREE_OPERAND (expr, 0)
1794 && op1 == TREE_OPERAND (expr, 1))
1795 return orig_expr;
1796
1797 if (zero_p (op1))
1798 expr = op0;
1799 else if (zero_p (op0))
1800 {
1801 if (code == PLUS_EXPR)
1802 expr = op1;
1803 else
1804 expr = fold_build1 (NEGATE_EXPR, type, op1);
1805 }
1806 else
1807 expr = fold_build2 (code, type, op0, op1);
1808
1809 return fold_convert (orig_type, expr);
1810
1811 case ARRAY_REF:
1812 if (!inside_addr)
1813 return orig_expr;
1814
1815 step = array_ref_element_size (expr);
1816 if (!cst_and_fits_in_hwi (step))
1817 break;
1818
1819 st = int_cst_value (step);
1820 op1 = TREE_OPERAND (expr, 1);
1821 op1 = strip_offset_1 (op1, false, false, &off1);
1822 *offset = off1 * st;
1823
1824 if (top_compref
1825 && zero_p (op1))
1826 {
1827 /* Strip the component reference completely. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830 *offset += off0;
1831 return op0;
1832 }
1833 break;
1834
1835 case COMPONENT_REF:
1836 if (!inside_addr)
1837 return orig_expr;
1838
1839 tmp = component_ref_field_offset (expr);
1840 if (top_compref
1841 && cst_and_fits_in_hwi (tmp))
1842 {
1843 /* Strip the component reference completely. */
1844 op0 = TREE_OPERAND (expr, 0);
1845 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1846 *offset = off0 + int_cst_value (tmp);
1847 return op0;
1848 }
1849 break;
1850
1851 case ADDR_EXPR:
1852 op0 = TREE_OPERAND (expr, 0);
1853 op0 = strip_offset_1 (op0, true, true, &off0);
1854 *offset += off0;
1855
1856 if (op0 == TREE_OPERAND (expr, 0))
1857 return orig_expr;
1858
1859 expr = build_addr_strip_iref (op0);
1860 return fold_convert (orig_type, expr);
1861
1862 case INDIRECT_REF:
1863 inside_addr = false;
1864 break;
1865
1866 default:
1867 return orig_expr;
1868 }
1869
1870 /* Default handling of expressions for that we want to recurse into
1871 the first operand. */
1872 op0 = TREE_OPERAND (expr, 0);
1873 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1874 *offset += off0;
1875
1876 if (op0 == TREE_OPERAND (expr, 0)
1877 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1878 return orig_expr;
1879
1880 expr = copy_node (expr);
1881 TREE_OPERAND (expr, 0) = op0;
1882 if (op1)
1883 TREE_OPERAND (expr, 1) = op1;
1884
1885 /* Inside address, we might strip the top level component references,
1886 thus changing type of the expresion. Handling of ADDR_EXPR
1887 will fix that. */
1888 expr = fold_convert (orig_type, expr);
1889
1890 return expr;
1891 }
1892
1893 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1894
1895 static tree
1896 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1897 {
1898 return strip_offset_1 (expr, false, false, offset);
1899 }
1900
1901 /* Returns variant of TYPE that can be used as base for different uses.
1902 For integer types, we return unsigned variant of the type, which
1903 avoids problems with overflows. For pointer types, we return void *. */
1904
1905 static tree
1906 generic_type_for (tree type)
1907 {
1908 if (POINTER_TYPE_P (type))
1909 return ptr_type_node;
1910
1911 if (TYPE_UNSIGNED (type))
1912 return type;
1913
1914 return unsigned_type_for (type);
1915 }
1916
1917 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1918 the bitmap to that we should store it. */
1919
1920 static struct ivopts_data *fd_ivopts_data;
1921 static tree
1922 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1923 {
1924 bitmap *depends_on = data;
1925 struct version_info *info;
1926
1927 if (TREE_CODE (*expr_p) != SSA_NAME)
1928 return NULL_TREE;
1929 info = name_info (fd_ivopts_data, *expr_p);
1930
1931 if (!info->inv_id || info->has_nonlin_use)
1932 return NULL_TREE;
1933
1934 if (!*depends_on)
1935 *depends_on = BITMAP_ALLOC (NULL);
1936 bitmap_set_bit (*depends_on, info->inv_id);
1937
1938 return NULL_TREE;
1939 }
1940
1941 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1942 position to POS. If USE is not NULL, the candidate is set as related to
1943 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1944 replacement of the final value of the iv by a direct computation. */
1945
1946 static struct iv_cand *
1947 add_candidate_1 (struct ivopts_data *data,
1948 tree base, tree step, bool important, enum iv_position pos,
1949 struct iv_use *use, tree incremented_at)
1950 {
1951 unsigned i;
1952 struct iv_cand *cand = NULL;
1953 tree type, orig_type;
1954
1955 if (base)
1956 {
1957 orig_type = TREE_TYPE (base);
1958 type = generic_type_for (orig_type);
1959 if (type != orig_type)
1960 {
1961 base = fold_convert (type, base);
1962 if (step)
1963 step = fold_convert (type, step);
1964 }
1965 }
1966
1967 for (i = 0; i < n_iv_cands (data); i++)
1968 {
1969 cand = iv_cand (data, i);
1970
1971 if (cand->pos != pos)
1972 continue;
1973
1974 if (cand->incremented_at != incremented_at)
1975 continue;
1976
1977 if (!cand->iv)
1978 {
1979 if (!base && !step)
1980 break;
1981
1982 continue;
1983 }
1984
1985 if (!base && !step)
1986 continue;
1987
1988 if (!operand_equal_p (base, cand->iv->base, 0))
1989 continue;
1990
1991 if (zero_p (cand->iv->step))
1992 {
1993 if (zero_p (step))
1994 break;
1995 }
1996 else
1997 {
1998 if (step && operand_equal_p (step, cand->iv->step, 0))
1999 break;
2000 }
2001 }
2002
2003 if (i == n_iv_cands (data))
2004 {
2005 cand = xcalloc (1, sizeof (struct iv_cand));
2006 cand->id = i;
2007
2008 if (!base && !step)
2009 cand->iv = NULL;
2010 else
2011 cand->iv = alloc_iv (base, step);
2012
2013 cand->pos = pos;
2014 if (pos != IP_ORIGINAL && cand->iv)
2015 {
2016 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2017 cand->var_after = cand->var_before;
2018 }
2019 cand->important = important;
2020 cand->incremented_at = incremented_at;
2021 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2022
2023 if (step
2024 && TREE_CODE (step) != INTEGER_CST)
2025 {
2026 fd_ivopts_data = data;
2027 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2028 }
2029
2030 if (dump_file && (dump_flags & TDF_DETAILS))
2031 dump_cand (dump_file, cand);
2032 }
2033
2034 if (important && !cand->important)
2035 {
2036 cand->important = true;
2037 if (dump_file && (dump_flags & TDF_DETAILS))
2038 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2039 }
2040
2041 if (use)
2042 {
2043 bitmap_set_bit (use->related_cands, i);
2044 if (dump_file && (dump_flags & TDF_DETAILS))
2045 fprintf (dump_file, "Candidate %d is related to use %d\n",
2046 cand->id, use->id);
2047 }
2048
2049 return cand;
2050 }
2051
2052 /* Returns true if incrementing the induction variable at the end of the LOOP
2053 is allowed.
2054
2055 The purpose is to avoid splitting latch edge with a biv increment, thus
2056 creating a jump, possibly confusing other optimization passes and leaving
2057 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2058 is not available (so we do not have a better alternative), or if the latch
2059 edge is already nonempty. */
2060
2061 static bool
2062 allow_ip_end_pos_p (struct loop *loop)
2063 {
2064 if (!ip_normal_pos (loop))
2065 return true;
2066
2067 if (!empty_block_p (ip_end_pos (loop)))
2068 return true;
2069
2070 return false;
2071 }
2072
2073 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2074 position to POS. If USE is not NULL, the candidate is set as related to
2075 it. The candidate computation is scheduled on all available positions. */
2076
2077 static void
2078 add_candidate (struct ivopts_data *data,
2079 tree base, tree step, bool important, struct iv_use *use)
2080 {
2081 if (ip_normal_pos (data->current_loop))
2082 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2083 if (ip_end_pos (data->current_loop)
2084 && allow_ip_end_pos_p (data->current_loop))
2085 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2086 }
2087
2088 /* Add a standard "0 + 1 * iteration" iv candidate for a
2089 type with SIZE bits. */
2090
2091 static void
2092 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2093 unsigned int size)
2094 {
2095 tree type = lang_hooks.types.type_for_size (size, true);
2096 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2097 true, NULL);
2098 }
2099
2100 /* Adds standard iv candidates. */
2101
2102 static void
2103 add_standard_iv_candidates (struct ivopts_data *data)
2104 {
2105 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2106
2107 /* The same for a double-integer type if it is still fast enough. */
2108 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2109 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2110 }
2111
2112
2113 /* Adds candidates bases on the old induction variable IV. */
2114
2115 static void
2116 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2117 {
2118 tree phi, def;
2119 struct iv_cand *cand;
2120
2121 add_candidate (data, iv->base, iv->step, true, NULL);
2122
2123 /* The same, but with initial value zero. */
2124 add_candidate (data,
2125 build_int_cst (TREE_TYPE (iv->base), 0),
2126 iv->step, true, NULL);
2127
2128 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2129 if (TREE_CODE (phi) == PHI_NODE)
2130 {
2131 /* Additionally record the possibility of leaving the original iv
2132 untouched. */
2133 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2134 cand = add_candidate_1 (data,
2135 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2136 SSA_NAME_DEF_STMT (def));
2137 cand->var_before = iv->ssa_name;
2138 cand->var_after = def;
2139 }
2140 }
2141
2142 /* Adds candidates based on the old induction variables. */
2143
2144 static void
2145 add_old_ivs_candidates (struct ivopts_data *data)
2146 {
2147 unsigned i;
2148 struct iv *iv;
2149 bitmap_iterator bi;
2150
2151 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2152 {
2153 iv = ver_info (data, i)->iv;
2154 if (iv && iv->biv_p && !zero_p (iv->step))
2155 add_old_iv_candidates (data, iv);
2156 }
2157 }
2158
2159 /* Adds candidates based on the value of the induction variable IV and USE. */
2160
2161 static void
2162 add_iv_value_candidates (struct ivopts_data *data,
2163 struct iv *iv, struct iv_use *use)
2164 {
2165 unsigned HOST_WIDE_INT offset;
2166 tree base;
2167
2168 add_candidate (data, iv->base, iv->step, false, use);
2169
2170 /* The same, but with initial value zero. Make such variable important,
2171 since it is generic enough so that possibly many uses may be based
2172 on it. */
2173 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2174 iv->step, true, use);
2175
2176 /* Third, try removing the constant offset. */
2177 base = strip_offset (iv->base, &offset);
2178 if (offset)
2179 add_candidate (data, base, iv->step, false, use);
2180 }
2181
2182 /* Possibly adds pseudocandidate for replacing the final value of USE by
2183 a direct computation. */
2184
2185 static void
2186 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2187 {
2188 struct tree_niter_desc *niter;
2189
2190 /* We must know where we exit the loop and how many times does it roll. */
2191 niter = niter_for_single_dom_exit (data);
2192 if (!niter
2193 || !zero_p (niter->may_be_zero))
2194 return;
2195
2196 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2197 }
2198
2199 /* Adds candidates based on the uses. */
2200
2201 static void
2202 add_derived_ivs_candidates (struct ivopts_data *data)
2203 {
2204 unsigned i;
2205
2206 for (i = 0; i < n_iv_uses (data); i++)
2207 {
2208 struct iv_use *use = iv_use (data, i);
2209
2210 if (!use)
2211 continue;
2212
2213 switch (use->type)
2214 {
2215 case USE_NONLINEAR_EXPR:
2216 case USE_COMPARE:
2217 case USE_ADDRESS:
2218 /* Just add the ivs based on the value of the iv used here. */
2219 add_iv_value_candidates (data, use->iv, use);
2220 break;
2221
2222 case USE_OUTER:
2223 add_iv_value_candidates (data, use->iv, use);
2224
2225 /* Additionally, add the pseudocandidate for the possibility to
2226 replace the final value by a direct computation. */
2227 add_iv_outer_candidates (data, use);
2228 break;
2229
2230 default:
2231 gcc_unreachable ();
2232 }
2233 }
2234 }
2235
2236 /* Record important candidates and add them to related_cands bitmaps
2237 if needed. */
2238
2239 static void
2240 record_important_candidates (struct ivopts_data *data)
2241 {
2242 unsigned i;
2243 struct iv_use *use;
2244
2245 for (i = 0; i < n_iv_cands (data); i++)
2246 {
2247 struct iv_cand *cand = iv_cand (data, i);
2248
2249 if (cand->important)
2250 bitmap_set_bit (data->important_candidates, i);
2251 }
2252
2253 data->consider_all_candidates = (n_iv_cands (data)
2254 <= CONSIDER_ALL_CANDIDATES_BOUND);
2255
2256 if (data->consider_all_candidates)
2257 {
2258 /* We will not need "related_cands" bitmaps in this case,
2259 so release them to decrease peak memory consumption. */
2260 for (i = 0; i < n_iv_uses (data); i++)
2261 {
2262 use = iv_use (data, i);
2263 BITMAP_FREE (use->related_cands);
2264 }
2265 }
2266 else
2267 {
2268 /* Add important candidates to the related_cands bitmaps. */
2269 for (i = 0; i < n_iv_uses (data); i++)
2270 bitmap_ior_into (iv_use (data, i)->related_cands,
2271 data->important_candidates);
2272 }
2273 }
2274
2275 /* Finds the candidates for the induction variables. */
2276
2277 static void
2278 find_iv_candidates (struct ivopts_data *data)
2279 {
2280 /* Add commonly used ivs. */
2281 add_standard_iv_candidates (data);
2282
2283 /* Add old induction variables. */
2284 add_old_ivs_candidates (data);
2285
2286 /* Add induction variables derived from uses. */
2287 add_derived_ivs_candidates (data);
2288
2289 /* Record the important candidates. */
2290 record_important_candidates (data);
2291 }
2292
2293 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2294 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2295 we allocate a simple list to every use. */
2296
2297 static void
2298 alloc_use_cost_map (struct ivopts_data *data)
2299 {
2300 unsigned i, size, s, j;
2301
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 {
2304 struct iv_use *use = iv_use (data, i);
2305 bitmap_iterator bi;
2306
2307 if (data->consider_all_candidates)
2308 size = n_iv_cands (data);
2309 else
2310 {
2311 s = 0;
2312 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2313 {
2314 s++;
2315 }
2316
2317 /* Round up to the power of two, so that moduling by it is fast. */
2318 for (size = 1; size < s; size <<= 1)
2319 continue;
2320 }
2321
2322 use->n_map_members = size;
2323 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2324 }
2325 }
2326
2327 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2328 on invariants DEPENDS_ON and that the value used in expressing it
2329 is VALUE.*/
2330
2331 static void
2332 set_use_iv_cost (struct ivopts_data *data,
2333 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2334 bitmap depends_on, tree value)
2335 {
2336 unsigned i, s;
2337
2338 if (cost == INFTY)
2339 {
2340 BITMAP_FREE (depends_on);
2341 return;
2342 }
2343
2344 if (data->consider_all_candidates)
2345 {
2346 use->cost_map[cand->id].cand = cand;
2347 use->cost_map[cand->id].cost = cost;
2348 use->cost_map[cand->id].depends_on = depends_on;
2349 use->cost_map[cand->id].value = value;
2350 return;
2351 }
2352
2353 /* n_map_members is a power of two, so this computes modulo. */
2354 s = cand->id & (use->n_map_members - 1);
2355 for (i = s; i < use->n_map_members; i++)
2356 if (!use->cost_map[i].cand)
2357 goto found;
2358 for (i = 0; i < s; i++)
2359 if (!use->cost_map[i].cand)
2360 goto found;
2361
2362 gcc_unreachable ();
2363
2364 found:
2365 use->cost_map[i].cand = cand;
2366 use->cost_map[i].cost = cost;
2367 use->cost_map[i].depends_on = depends_on;
2368 use->cost_map[i].value = value;
2369 }
2370
2371 /* Gets cost of (USE, CANDIDATE) pair. */
2372
2373 static struct cost_pair *
2374 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2375 struct iv_cand *cand)
2376 {
2377 unsigned i, s;
2378 struct cost_pair *ret;
2379
2380 if (!cand)
2381 return NULL;
2382
2383 if (data->consider_all_candidates)
2384 {
2385 ret = use->cost_map + cand->id;
2386 if (!ret->cand)
2387 return NULL;
2388
2389 return ret;
2390 }
2391
2392 /* n_map_members is a power of two, so this computes modulo. */
2393 s = cand->id & (use->n_map_members - 1);
2394 for (i = s; i < use->n_map_members; i++)
2395 if (use->cost_map[i].cand == cand)
2396 return use->cost_map + i;
2397
2398 for (i = 0; i < s; i++)
2399 if (use->cost_map[i].cand == cand)
2400 return use->cost_map + i;
2401
2402 return NULL;
2403 }
2404
2405 /* Returns estimate on cost of computing SEQ. */
2406
2407 static unsigned
2408 seq_cost (rtx seq)
2409 {
2410 unsigned cost = 0;
2411 rtx set;
2412
2413 for (; seq; seq = NEXT_INSN (seq))
2414 {
2415 set = single_set (seq);
2416 if (set)
2417 cost += rtx_cost (set, SET);
2418 else
2419 cost++;
2420 }
2421
2422 return cost;
2423 }
2424
2425 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2426 static rtx
2427 produce_memory_decl_rtl (tree obj, int *regno)
2428 {
2429 rtx x;
2430
2431 gcc_assert (obj);
2432 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2433 {
2434 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2435 x = gen_rtx_SYMBOL_REF (Pmode, name);
2436 }
2437 else
2438 x = gen_raw_REG (Pmode, (*regno)++);
2439
2440 return gen_rtx_MEM (DECL_MODE (obj), x);
2441 }
2442
2443 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2444 walk_tree. DATA contains the actual fake register number. */
2445
2446 static tree
2447 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2448 {
2449 tree obj = NULL_TREE;
2450 rtx x = NULL_RTX;
2451 int *regno = data;
2452
2453 switch (TREE_CODE (*expr_p))
2454 {
2455 case ADDR_EXPR:
2456 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2457 handled_component_p (*expr_p);
2458 expr_p = &TREE_OPERAND (*expr_p, 0))
2459 continue;
2460 obj = *expr_p;
2461 if (DECL_P (obj))
2462 x = produce_memory_decl_rtl (obj, regno);
2463 break;
2464
2465 case SSA_NAME:
2466 *ws = 0;
2467 obj = SSA_NAME_VAR (*expr_p);
2468 if (!DECL_RTL_SET_P (obj))
2469 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2470 break;
2471
2472 case VAR_DECL:
2473 case PARM_DECL:
2474 case RESULT_DECL:
2475 *ws = 0;
2476 obj = *expr_p;
2477
2478 if (DECL_RTL_SET_P (obj))
2479 break;
2480
2481 if (DECL_MODE (obj) == BLKmode)
2482 x = produce_memory_decl_rtl (obj, regno);
2483 else
2484 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2485
2486 break;
2487
2488 default:
2489 break;
2490 }
2491
2492 if (x)
2493 {
2494 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2495 SET_DECL_RTL (obj, x);
2496 }
2497
2498 return NULL_TREE;
2499 }
2500
2501 /* Determines cost of the computation of EXPR. */
2502
2503 static unsigned
2504 computation_cost (tree expr)
2505 {
2506 rtx seq, rslt;
2507 tree type = TREE_TYPE (expr);
2508 unsigned cost;
2509 /* Avoid using hard regs in ways which may be unsupported. */
2510 int regno = LAST_VIRTUAL_REGISTER + 1;
2511
2512 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2513 start_sequence ();
2514 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2515 seq = get_insns ();
2516 end_sequence ();
2517
2518 cost = seq_cost (seq);
2519 if (MEM_P (rslt))
2520 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2521
2522 return cost;
2523 }
2524
2525 /* Returns variable containing the value of candidate CAND at statement AT. */
2526
2527 static tree
2528 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2529 {
2530 if (stmt_after_increment (loop, cand, stmt))
2531 return cand->var_after;
2532 else
2533 return cand->var_before;
2534 }
2535
2536 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2537 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2538
2539 static int
2540 tree_int_cst_sign_bit (tree t)
2541 {
2542 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2543 unsigned HOST_WIDE_INT w;
2544
2545 if (bitno < HOST_BITS_PER_WIDE_INT)
2546 w = TREE_INT_CST_LOW (t);
2547 else
2548 {
2549 w = TREE_INT_CST_HIGH (t);
2550 bitno -= HOST_BITS_PER_WIDE_INT;
2551 }
2552
2553 return (w >> bitno) & 1;
2554 }
2555
2556 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2557 return cst. Otherwise return NULL_TREE. */
2558
2559 static tree
2560 constant_multiple_of (tree type, tree top, tree bot)
2561 {
2562 tree res, mby, p0, p1;
2563 enum tree_code code;
2564 bool negate;
2565
2566 STRIP_NOPS (top);
2567 STRIP_NOPS (bot);
2568
2569 if (operand_equal_p (top, bot, 0))
2570 return build_int_cst (type, 1);
2571
2572 code = TREE_CODE (top);
2573 switch (code)
2574 {
2575 case MULT_EXPR:
2576 mby = TREE_OPERAND (top, 1);
2577 if (TREE_CODE (mby) != INTEGER_CST)
2578 return NULL_TREE;
2579
2580 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2581 if (!res)
2582 return NULL_TREE;
2583
2584 return fold_binary_to_constant (MULT_EXPR, type, res,
2585 fold_convert (type, mby));
2586
2587 case PLUS_EXPR:
2588 case MINUS_EXPR:
2589 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2590 if (!p0)
2591 return NULL_TREE;
2592 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2593 if (!p1)
2594 return NULL_TREE;
2595
2596 return fold_binary_to_constant (code, type, p0, p1);
2597
2598 case INTEGER_CST:
2599 if (TREE_CODE (bot) != INTEGER_CST)
2600 return NULL_TREE;
2601
2602 bot = fold_convert (type, bot);
2603 top = fold_convert (type, top);
2604
2605 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2606 the result afterwards. */
2607 if (tree_int_cst_sign_bit (bot))
2608 {
2609 negate = true;
2610 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2611 }
2612 else
2613 negate = false;
2614
2615 /* Ditto for TOP. */
2616 if (tree_int_cst_sign_bit (top))
2617 {
2618 negate = !negate;
2619 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2620 }
2621
2622 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2623 return NULL_TREE;
2624
2625 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2626 if (negate)
2627 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2628 return res;
2629
2630 default:
2631 return NULL_TREE;
2632 }
2633 }
2634
2635 /* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2636 to make things simpler; this is sufficient in most cases. */
2637
2638 #define MAX_AFF_ELTS 8
2639
2640 struct affine_tree_combination
2641 {
2642 /* Type of the result of the combination. */
2643 tree type;
2644
2645 /* Mask modulo that the operations are performed. */
2646 unsigned HOST_WIDE_INT mask;
2647
2648 /* Constant offset. */
2649 unsigned HOST_WIDE_INT offset;
2650
2651 /* Number of elements of the combination. */
2652 unsigned n;
2653
2654 /* Elements and their coefficients. */
2655 tree elts[MAX_AFF_ELTS];
2656 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2657
2658 /* Remainder of the expression. */
2659 tree rest;
2660 };
2661
2662 /* Sets COMB to CST. */
2663
2664 static void
2665 aff_combination_const (struct affine_tree_combination *comb, tree type,
2666 unsigned HOST_WIDE_INT cst)
2667 {
2668 unsigned prec = TYPE_PRECISION (type);
2669
2670 comb->type = type;
2671 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2672
2673 comb->n = 0;
2674 comb->rest = NULL_TREE;
2675 comb->offset = cst & comb->mask;
2676 }
2677
2678 /* Sets COMB to single element ELT. */
2679
2680 static void
2681 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2682 {
2683 unsigned prec = TYPE_PRECISION (type);
2684
2685 comb->type = type;
2686 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2687
2688 comb->n = 1;
2689 comb->elts[0] = elt;
2690 comb->coefs[0] = 1;
2691 comb->rest = NULL_TREE;
2692 comb->offset = 0;
2693 }
2694
2695 /* Scales COMB by SCALE. */
2696
2697 static void
2698 aff_combination_scale (struct affine_tree_combination *comb,
2699 unsigned HOST_WIDE_INT scale)
2700 {
2701 unsigned i, j;
2702
2703 if (scale == 1)
2704 return;
2705
2706 if (scale == 0)
2707 {
2708 aff_combination_const (comb, comb->type, 0);
2709 return;
2710 }
2711
2712 comb->offset = (scale * comb->offset) & comb->mask;
2713 for (i = 0, j = 0; i < comb->n; i++)
2714 {
2715 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2716 comb->elts[j] = comb->elts[i];
2717 if (comb->coefs[j] != 0)
2718 j++;
2719 }
2720 comb->n = j;
2721
2722 if (comb->rest)
2723 {
2724 if (comb->n < MAX_AFF_ELTS)
2725 {
2726 comb->coefs[comb->n] = scale;
2727 comb->elts[comb->n] = comb->rest;
2728 comb->rest = NULL_TREE;
2729 comb->n++;
2730 }
2731 else
2732 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2733 build_int_cst_type (comb->type, scale));
2734 }
2735 }
2736
2737 /* Adds ELT * SCALE to COMB. */
2738
2739 static void
2740 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2741 unsigned HOST_WIDE_INT scale)
2742 {
2743 unsigned i;
2744
2745 if (scale == 0)
2746 return;
2747
2748 for (i = 0; i < comb->n; i++)
2749 if (operand_equal_p (comb->elts[i], elt, 0))
2750 {
2751 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2752 if (comb->coefs[i])
2753 return;
2754
2755 comb->n--;
2756 comb->coefs[i] = comb->coefs[comb->n];
2757 comb->elts[i] = comb->elts[comb->n];
2758 return;
2759 }
2760 if (comb->n < MAX_AFF_ELTS)
2761 {
2762 comb->coefs[comb->n] = scale;
2763 comb->elts[comb->n] = elt;
2764 comb->n++;
2765 return;
2766 }
2767
2768 if (scale == 1)
2769 elt = fold_convert (comb->type, elt);
2770 else
2771 elt = fold_build2 (MULT_EXPR, comb->type,
2772 fold_convert (comb->type, elt),
2773 build_int_cst_type (comb->type, scale));
2774
2775 if (comb->rest)
2776 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2777 else
2778 comb->rest = elt;
2779 }
2780
2781 /* Adds COMB2 to COMB1. */
2782
2783 static void
2784 aff_combination_add (struct affine_tree_combination *comb1,
2785 struct affine_tree_combination *comb2)
2786 {
2787 unsigned i;
2788
2789 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2790 for (i = 0; i < comb2-> n; i++)
2791 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2792 if (comb2->rest)
2793 aff_combination_add_elt (comb1, comb2->rest, 1);
2794 }
2795
2796 /* Splits EXPR into an affine combination of parts. */
2797
2798 static void
2799 tree_to_aff_combination (tree expr, tree type,
2800 struct affine_tree_combination *comb)
2801 {
2802 struct affine_tree_combination tmp;
2803 enum tree_code code;
2804 tree cst, core, toffset;
2805 HOST_WIDE_INT bitpos, bitsize;
2806 enum machine_mode mode;
2807 int unsignedp, volatilep;
2808
2809 STRIP_NOPS (expr);
2810
2811 code = TREE_CODE (expr);
2812 switch (code)
2813 {
2814 case INTEGER_CST:
2815 aff_combination_const (comb, type, int_cst_value (expr));
2816 return;
2817
2818 case PLUS_EXPR:
2819 case MINUS_EXPR:
2820 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2821 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2822 if (code == MINUS_EXPR)
2823 aff_combination_scale (&tmp, -1);
2824 aff_combination_add (comb, &tmp);
2825 return;
2826
2827 case MULT_EXPR:
2828 cst = TREE_OPERAND (expr, 1);
2829 if (TREE_CODE (cst) != INTEGER_CST)
2830 break;
2831 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832 aff_combination_scale (comb, int_cst_value (cst));
2833 return;
2834
2835 case NEGATE_EXPR:
2836 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2837 aff_combination_scale (comb, -1);
2838 return;
2839
2840 case ADDR_EXPR:
2841 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2842 &toffset, &mode, &unsignedp, &volatilep,
2843 false);
2844 if (bitpos % BITS_PER_UNIT != 0)
2845 break;
2846 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2847 core = build_addr_strip_iref (core);
2848 if (TREE_CODE (core) == ADDR_EXPR)
2849 aff_combination_add_elt (comb, core, 1);
2850 else
2851 {
2852 tree_to_aff_combination (core, type, &tmp);
2853 aff_combination_add (comb, &tmp);
2854 }
2855 if (toffset)
2856 {
2857 tree_to_aff_combination (toffset, type, &tmp);
2858 aff_combination_add (comb, &tmp);
2859 }
2860 return;
2861
2862 default:
2863 break;
2864 }
2865
2866 aff_combination_elt (comb, type, expr);
2867 }
2868
2869 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2870
2871 static tree
2872 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2873 unsigned HOST_WIDE_INT mask)
2874 {
2875 enum tree_code code;
2876
2877 scale &= mask;
2878 elt = fold_convert (type, elt);
2879
2880 if (scale == 1)
2881 {
2882 if (!expr)
2883 return elt;
2884
2885 return fold_build2 (PLUS_EXPR, type, expr, elt);
2886 }
2887
2888 if (scale == mask)
2889 {
2890 if (!expr)
2891 return fold_build1 (NEGATE_EXPR, type, elt);
2892
2893 return fold_build2 (MINUS_EXPR, type, expr, elt);
2894 }
2895
2896 if (!expr)
2897 return fold_build2 (MULT_EXPR, type, elt,
2898 build_int_cst_type (type, scale));
2899
2900 if ((scale | (mask >> 1)) == mask)
2901 {
2902 /* Scale is negative. */
2903 code = MINUS_EXPR;
2904 scale = (-scale) & mask;
2905 }
2906 else
2907 code = PLUS_EXPR;
2908
2909 elt = fold_build2 (MULT_EXPR, type, elt,
2910 build_int_cst_type (type, scale));
2911 return fold_build2 (code, type, expr, elt);
2912 }
2913
2914 /* Makes tree from the affine combination COMB. */
2915
2916 static tree
2917 aff_combination_to_tree (struct affine_tree_combination *comb)
2918 {
2919 tree type = comb->type;
2920 tree expr = comb->rest;
2921 unsigned i;
2922 unsigned HOST_WIDE_INT off, sgn;
2923
2924 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2925
2926 for (i = 0; i < comb->n; i++)
2927 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2928 comb->mask);
2929
2930 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2931 {
2932 /* Offset is negative. */
2933 off = (-comb->offset) & comb->mask;
2934 sgn = comb->mask;
2935 }
2936 else
2937 {
2938 off = comb->offset;
2939 sgn = 1;
2940 }
2941 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2942 comb->mask);
2943 }
2944
2945 /* Folds X + RATIO * Y in TYPE. */
2946
2947 static tree
2948 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2949 {
2950 enum tree_code code;
2951 tree cst;
2952 struct affine_tree_combination cx, cy;
2953
2954 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2955 {
2956 if (ratio == 1)
2957 return fold_build2 (PLUS_EXPR, type, x, y);
2958 if (ratio == -1)
2959 return fold_build2 (MINUS_EXPR, type, x, y);
2960
2961 if (ratio < 0)
2962 {
2963 code = MINUS_EXPR;
2964 ratio = -ratio;
2965 }
2966 else
2967 code = PLUS_EXPR;
2968
2969 cst = build_int_cst_type (type, ratio);
2970 y = fold_build2 (MULT_EXPR, type, y, cst);
2971 return fold_build2 (code, type, x, y);
2972 }
2973
2974 tree_to_aff_combination (x, type, &cx);
2975 tree_to_aff_combination (y, type, &cy);
2976 aff_combination_scale (&cy, ratio);
2977 aff_combination_add (&cx, &cy);
2978
2979 return aff_combination_to_tree (&cx);
2980 }
2981
2982 /* Determines the expression by that USE is expressed from induction variable
2983 CAND at statement AT in LOOP. */
2984
2985 static tree
2986 get_computation_at (struct loop *loop,
2987 struct iv_use *use, struct iv_cand *cand, tree at)
2988 {
2989 tree ubase = use->iv->base;
2990 tree ustep = use->iv->step;
2991 tree cbase = cand->iv->base;
2992 tree cstep = cand->iv->step;
2993 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2994 tree uutype;
2995 tree expr, delta;
2996 tree ratio;
2997 unsigned HOST_WIDE_INT ustepi, cstepi;
2998 HOST_WIDE_INT ratioi;
2999
3000 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3001 {
3002 /* We do not have a precision to express the values of use. */
3003 return NULL_TREE;
3004 }
3005
3006 expr = var_at_stmt (loop, cand, at);
3007
3008 if (TREE_TYPE (expr) != ctype)
3009 {
3010 /* This may happen with the original ivs. */
3011 expr = fold_convert (ctype, expr);
3012 }
3013
3014 if (TYPE_UNSIGNED (utype))
3015 uutype = utype;
3016 else
3017 {
3018 uutype = unsigned_type_for (utype);
3019 ubase = fold_convert (uutype, ubase);
3020 ustep = fold_convert (uutype, ustep);
3021 }
3022
3023 if (uutype != ctype)
3024 {
3025 expr = fold_convert (uutype, expr);
3026 cbase = fold_convert (uutype, cbase);
3027 cstep = fold_convert (uutype, cstep);
3028 }
3029
3030 if (cst_and_fits_in_hwi (cstep)
3031 && cst_and_fits_in_hwi (ustep))
3032 {
3033 ustepi = int_cst_value (ustep);
3034 cstepi = int_cst_value (cstep);
3035
3036 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3037 {
3038 /* TODO maybe consider case when ustep divides cstep and the ratio is
3039 a power of 2 (so that the division is fast to execute)? We would
3040 need to be much more careful with overflows etc. then. */
3041 return NULL_TREE;
3042 }
3043
3044 ratio = build_int_cst_type (uutype, ratioi);
3045 }
3046 else
3047 {
3048 ratio = constant_multiple_of (uutype, ustep, cstep);
3049 if (!ratio)
3050 return NULL_TREE;
3051
3052 /* Ratioi is only used to detect special cases when the multiplicative
3053 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3054 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3055 to integer_onep/integer_all_onesp, since the former ignores
3056 TREE_OVERFLOW. */
3057 if (cst_and_fits_in_hwi (ratio))
3058 ratioi = int_cst_value (ratio);
3059 else if (integer_onep (ratio))
3060 ratioi = 1;
3061 else if (integer_all_onesp (ratio))
3062 ratioi = -1;
3063 else
3064 ratioi = 0;
3065 }
3066
3067 /* We may need to shift the value if we are after the increment. */
3068 if (stmt_after_increment (loop, cand, at))
3069 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3070
3071 /* use = ubase - ratio * cbase + ratio * var.
3072
3073 In general case ubase + ratio * (var - cbase) could be better (one less
3074 multiplication), but often it is possible to eliminate redundant parts
3075 of computations from (ubase - ratio * cbase) term, and if it does not
3076 happen, fold is able to apply the distributive law to obtain this form
3077 anyway. */
3078
3079 if (ratioi == 1)
3080 {
3081 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3082 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3083 }
3084 else if (ratioi == -1)
3085 {
3086 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3087 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3088 }
3089 else
3090 {
3091 if (ratioi)
3092 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3093 else
3094 {
3095 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3096 delta = fold_affine_sum (uutype, ubase, delta, -1);
3097 }
3098 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3099 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3100 }
3101
3102 return fold_convert (utype, expr);
3103 }
3104
3105 /* Determines the expression by that USE is expressed from induction variable
3106 CAND in LOOP. */
3107
3108 static tree
3109 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3110 {
3111 return get_computation_at (loop, use, cand, use->stmt);
3112 }
3113
3114 /* Returns cost of addition in MODE. */
3115
3116 static unsigned
3117 add_cost (enum machine_mode mode)
3118 {
3119 static unsigned costs[NUM_MACHINE_MODES];
3120 rtx seq;
3121 unsigned cost;
3122
3123 if (costs[mode])
3124 return costs[mode];
3125
3126 start_sequence ();
3127 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3128 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3129 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3130 NULL_RTX);
3131 seq = get_insns ();
3132 end_sequence ();
3133
3134 cost = seq_cost (seq);
3135 if (!cost)
3136 cost = 1;
3137
3138 costs[mode] = cost;
3139
3140 if (dump_file && (dump_flags & TDF_DETAILS))
3141 fprintf (dump_file, "Addition in %s costs %d\n",
3142 GET_MODE_NAME (mode), cost);
3143 return cost;
3144 }
3145
3146 /* Entry in a hashtable of already known costs for multiplication. */
3147 struct mbc_entry
3148 {
3149 HOST_WIDE_INT cst; /* The constant to multiply by. */
3150 enum machine_mode mode; /* In mode. */
3151 unsigned cost; /* The cost. */
3152 };
3153
3154 /* Counts hash value for the ENTRY. */
3155
3156 static hashval_t
3157 mbc_entry_hash (const void *entry)
3158 {
3159 const struct mbc_entry *e = entry;
3160
3161 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3162 }
3163
3164 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3165
3166 static int
3167 mbc_entry_eq (const void *entry1, const void *entry2)
3168 {
3169 const struct mbc_entry *e1 = entry1;
3170 const struct mbc_entry *e2 = entry2;
3171
3172 return (e1->mode == e2->mode
3173 && e1->cst == e2->cst);
3174 }
3175
3176 /* Returns cost of multiplication by constant CST in MODE. */
3177
3178 static unsigned
3179 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3180 {
3181 static htab_t costs;
3182 struct mbc_entry **cached, act;
3183 rtx seq;
3184 unsigned cost;
3185
3186 if (!costs)
3187 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3188
3189 act.mode = mode;
3190 act.cst = cst;
3191 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3192 if (*cached)
3193 return (*cached)->cost;
3194
3195 *cached = xmalloc (sizeof (struct mbc_entry));
3196 (*cached)->mode = mode;
3197 (*cached)->cst = cst;
3198
3199 start_sequence ();
3200 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3201 NULL_RTX, 0);
3202 seq = get_insns ();
3203 end_sequence ();
3204
3205 cost = seq_cost (seq);
3206
3207 if (dump_file && (dump_flags & TDF_DETAILS))
3208 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3209 (int) cst, GET_MODE_NAME (mode), cost);
3210
3211 (*cached)->cost = cost;
3212
3213 return cost;
3214 }
3215
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3218 variable is omitted. The created memory accesses MODE.
3219
3220 TODO -- there must be some better way. This all is quite crude. */
3221
3222 static unsigned
3223 get_address_cost (bool symbol_present, bool var_present,
3224 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3225 {
3226 #define MAX_RATIO 128
3227 static sbitmap valid_mult;
3228 static HOST_WIDE_INT rat, off;
3229 static HOST_WIDE_INT min_offset, max_offset;
3230 static unsigned costs[2][2][2][2];
3231 unsigned cost, acost;
3232 rtx seq, addr, base;
3233 bool offset_p, ratio_p;
3234 rtx reg1;
3235 HOST_WIDE_INT s_offset;
3236 unsigned HOST_WIDE_INT mask;
3237 unsigned bits;
3238
3239 if (!valid_mult)
3240 {
3241 HOST_WIDE_INT i;
3242
3243 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3244
3245 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3246 for (i = 1; i <= 1 << 20; i <<= 1)
3247 {
3248 XEXP (addr, 1) = GEN_INT (i);
3249 if (!memory_address_p (Pmode, addr))
3250 break;
3251 }
3252 max_offset = i >> 1;
3253 off = max_offset;
3254
3255 for (i = 1; i <= 1 << 20; i <<= 1)
3256 {
3257 XEXP (addr, 1) = GEN_INT (-i);
3258 if (!memory_address_p (Pmode, addr))
3259 break;
3260 }
3261 min_offset = -(i >> 1);
3262
3263 if (dump_file && (dump_flags & TDF_DETAILS))
3264 {
3265 fprintf (dump_file, "get_address_cost:\n");
3266 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3267 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3268 }
3269
3270 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3271 sbitmap_zero (valid_mult);
3272 rat = 1;
3273 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3274 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275 {
3276 XEXP (addr, 1) = GEN_INT (i);
3277 if (memory_address_p (Pmode, addr))
3278 {
3279 SET_BIT (valid_mult, i + MAX_RATIO);
3280 rat = i;
3281 }
3282 }
3283
3284 if (dump_file && (dump_flags & TDF_DETAILS))
3285 {
3286 fprintf (dump_file, " allowed multipliers:");
3287 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3288 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3289 fprintf (dump_file, " %d", (int) i);
3290 fprintf (dump_file, "\n");
3291 fprintf (dump_file, "\n");
3292 }
3293 }
3294
3295 bits = GET_MODE_BITSIZE (Pmode);
3296 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3297 offset &= mask;
3298 if ((offset >> (bits - 1) & 1))
3299 offset |= ~mask;
3300 s_offset = offset;
3301
3302 cost = 0;
3303 offset_p = (s_offset != 0
3304 && min_offset <= s_offset && s_offset <= max_offset);
3305 ratio_p = (ratio != 1
3306 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3307 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3308
3309 if (ratio != 1 && !ratio_p)
3310 cost += multiply_by_cost (ratio, Pmode);
3311
3312 if (s_offset && !offset_p && !symbol_present)
3313 {
3314 cost += add_cost (Pmode);
3315 var_present = true;
3316 }
3317
3318 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3319 if (!acost)
3320 {
3321 acost = 0;
3322
3323 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3324 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3325 if (ratio_p)
3326 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3327
3328 if (var_present)
3329 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3330
3331 if (symbol_present)
3332 {
3333 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3334 if (offset_p)
3335 base = gen_rtx_fmt_e (CONST, Pmode,
3336 gen_rtx_fmt_ee (PLUS, Pmode,
3337 base,
3338 GEN_INT (off)));
3339 }
3340 else if (offset_p)
3341 base = GEN_INT (off);
3342 else
3343 base = NULL_RTX;
3344
3345 if (base)
3346 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3347
3348 start_sequence ();
3349 addr = memory_address (Pmode, addr);
3350 seq = get_insns ();
3351 end_sequence ();
3352
3353 acost = seq_cost (seq);
3354 acost += address_cost (addr, Pmode);
3355
3356 if (!acost)
3357 acost = 1;
3358 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3359 }
3360
3361 return cost + acost;
3362 }
3363 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3365
3366 static unsigned
3367 force_var_cost (struct ivopts_data *data,
3368 tree expr, bitmap *depends_on)
3369 {
3370 static bool costs_initialized = false;
3371 static unsigned integer_cost;
3372 static unsigned symbol_cost;
3373 static unsigned address_cost;
3374 tree op0, op1;
3375 unsigned cost0, cost1, cost;
3376 enum machine_mode mode;
3377
3378 if (!costs_initialized)
3379 {
3380 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3381 rtx x = gen_rtx_MEM (DECL_MODE (var),
3382 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3383 tree addr;
3384 tree type = build_pointer_type (integer_type_node);
3385
3386 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3387 2000));
3388
3389 SET_DECL_RTL (var, x);
3390 TREE_STATIC (var) = 1;
3391 addr = build1 (ADDR_EXPR, type, var);
3392 symbol_cost = computation_cost (addr) + 1;
3393
3394 address_cost
3395 = computation_cost (build2 (PLUS_EXPR, type,
3396 addr,
3397 build_int_cst_type (type, 2000))) + 1;
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3399 {
3400 fprintf (dump_file, "force_var_cost:\n");
3401 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3402 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3403 fprintf (dump_file, " address %d\n", (int) address_cost);
3404 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3405 fprintf (dump_file, "\n");
3406 }
3407
3408 costs_initialized = true;
3409 }
3410
3411 STRIP_NOPS (expr);
3412
3413 if (depends_on)
3414 {
3415 fd_ivopts_data = data;
3416 walk_tree (&expr, find_depends, depends_on, NULL);
3417 }
3418
3419 if (SSA_VAR_P (expr))
3420 return 0;
3421
3422 if (TREE_INVARIANT (expr))
3423 {
3424 if (TREE_CODE (expr) == INTEGER_CST)
3425 return integer_cost;
3426
3427 if (TREE_CODE (expr) == ADDR_EXPR)
3428 {
3429 tree obj = TREE_OPERAND (expr, 0);
3430
3431 if (TREE_CODE (obj) == VAR_DECL
3432 || TREE_CODE (obj) == PARM_DECL
3433 || TREE_CODE (obj) == RESULT_DECL)
3434 return symbol_cost;
3435 }
3436
3437 return address_cost;
3438 }
3439
3440 switch (TREE_CODE (expr))
3441 {
3442 case PLUS_EXPR:
3443 case MINUS_EXPR:
3444 case MULT_EXPR:
3445 op0 = TREE_OPERAND (expr, 0);
3446 op1 = TREE_OPERAND (expr, 1);
3447 STRIP_NOPS (op0);
3448 STRIP_NOPS (op1);
3449
3450 if (is_gimple_val (op0))
3451 cost0 = 0;
3452 else
3453 cost0 = force_var_cost (data, op0, NULL);
3454
3455 if (is_gimple_val (op1))
3456 cost1 = 0;
3457 else
3458 cost1 = force_var_cost (data, op1, NULL);
3459
3460 break;
3461
3462 default:
3463 /* Just an arbitrary value, FIXME. */
3464 return target_spill_cost;
3465 }
3466
3467 mode = TYPE_MODE (TREE_TYPE (expr));
3468 switch (TREE_CODE (expr))
3469 {
3470 case PLUS_EXPR:
3471 case MINUS_EXPR:
3472 cost = add_cost (mode);
3473 break;
3474
3475 case MULT_EXPR:
3476 if (cst_and_fits_in_hwi (op0))
3477 cost = multiply_by_cost (int_cst_value (op0), mode);
3478 else if (cst_and_fits_in_hwi (op1))
3479 cost = multiply_by_cost (int_cst_value (op1), mode);
3480 else
3481 return target_spill_cost;
3482 break;
3483
3484 default:
3485 gcc_unreachable ();
3486 }
3487
3488 cost += cost0;
3489 cost += cost1;
3490
3491 /* Bound the cost by target_spill_cost. The parts of complicated
3492 computations often are either loop invariant or at least can
3493 be shared between several iv uses, so letting this grow without
3494 limits would not give reasonable results. */
3495 return cost < target_spill_cost ? cost : target_spill_cost;
3496 }
3497
3498 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3499 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3500 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3501 invariants the computation depends on. */
3502
3503 static unsigned
3504 split_address_cost (struct ivopts_data *data,
3505 tree addr, bool *symbol_present, bool *var_present,
3506 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3507 {
3508 tree core;
3509 HOST_WIDE_INT bitsize;
3510 HOST_WIDE_INT bitpos;
3511 tree toffset;
3512 enum machine_mode mode;
3513 int unsignedp, volatilep;
3514
3515 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3516 &unsignedp, &volatilep, false);
3517
3518 if (toffset != 0
3519 || bitpos % BITS_PER_UNIT != 0
3520 || TREE_CODE (core) != VAR_DECL)
3521 {
3522 *symbol_present = false;
3523 *var_present = true;
3524 fd_ivopts_data = data;
3525 walk_tree (&addr, find_depends, depends_on, NULL);
3526 return target_spill_cost;
3527 }
3528
3529 *offset += bitpos / BITS_PER_UNIT;
3530 if (TREE_STATIC (core)
3531 || DECL_EXTERNAL (core))
3532 {
3533 *symbol_present = true;
3534 *var_present = false;
3535 return 0;
3536 }
3537
3538 *symbol_present = false;
3539 *var_present = true;
3540 return 0;
3541 }
3542
3543 /* Estimates cost of expressing difference of addresses E1 - E2 as
3544 var + symbol + offset. The value of offset is added to OFFSET,
3545 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3546 part is missing. DEPENDS_ON is a set of the invariants the computation
3547 depends on. */
3548
3549 static unsigned
3550 ptr_difference_cost (struct ivopts_data *data,
3551 tree e1, tree e2, bool *symbol_present, bool *var_present,
3552 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3553 {
3554 HOST_WIDE_INT diff = 0;
3555 unsigned cost;
3556
3557 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3558
3559 if (ptr_difference_const (e1, e2, &diff))
3560 {
3561 *offset += diff;
3562 *symbol_present = false;
3563 *var_present = false;
3564 return 0;
3565 }
3566
3567 if (e2 == integer_zero_node)
3568 return split_address_cost (data, TREE_OPERAND (e1, 0),
3569 symbol_present, var_present, offset, depends_on);
3570
3571 *symbol_present = false;
3572 *var_present = true;
3573
3574 cost = force_var_cost (data, e1, depends_on);
3575 cost += force_var_cost (data, e2, depends_on);
3576 cost += add_cost (Pmode);
3577
3578 return cost;
3579 }
3580
3581 /* Estimates cost of expressing difference E1 - E2 as
3582 var + symbol + offset. The value of offset is added to OFFSET,
3583 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3584 part is missing. DEPENDS_ON is a set of the invariants the computation
3585 depends on. */
3586
3587 static unsigned
3588 difference_cost (struct ivopts_data *data,
3589 tree e1, tree e2, bool *symbol_present, bool *var_present,
3590 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3591 {
3592 unsigned cost;
3593 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3594 unsigned HOST_WIDE_INT off1, off2;
3595
3596 e1 = strip_offset (e1, &off1);
3597 e2 = strip_offset (e2, &off2);
3598 *offset += off1 - off2;
3599
3600 STRIP_NOPS (e1);
3601 STRIP_NOPS (e2);
3602
3603 if (TREE_CODE (e1) == ADDR_EXPR)
3604 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3605 depends_on);
3606 *symbol_present = false;
3607
3608 if (operand_equal_p (e1, e2, 0))
3609 {
3610 *var_present = false;
3611 return 0;
3612 }
3613 *var_present = true;
3614 if (zero_p (e2))
3615 return force_var_cost (data, e1, depends_on);
3616
3617 if (zero_p (e1))
3618 {
3619 cost = force_var_cost (data, e2, depends_on);
3620 cost += multiply_by_cost (-1, mode);
3621
3622 return cost;
3623 }
3624
3625 cost = force_var_cost (data, e1, depends_on);
3626 cost += force_var_cost (data, e2, depends_on);
3627 cost += add_cost (mode);
3628
3629 return cost;
3630 }
3631
3632 /* Determines the cost of the computation by that USE is expressed
3633 from induction variable CAND. If ADDRESS_P is true, we just need
3634 to create an address from it, otherwise we want to get it into
3635 register. A set of invariants we depend on is stored in
3636 DEPENDS_ON. AT is the statement at that the value is computed. */
3637
3638 static unsigned
3639 get_computation_cost_at (struct ivopts_data *data,
3640 struct iv_use *use, struct iv_cand *cand,
3641 bool address_p, bitmap *depends_on, tree at)
3642 {
3643 tree ubase = use->iv->base, ustep = use->iv->step;
3644 tree cbase, cstep;
3645 tree utype = TREE_TYPE (ubase), ctype;
3646 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3647 HOST_WIDE_INT ratio, aratio;
3648 bool var_present, symbol_present;
3649 unsigned cost = 0, n_sums;
3650
3651 *depends_on = NULL;
3652
3653 /* Only consider real candidates. */
3654 if (!cand->iv)
3655 return INFTY;
3656
3657 cbase = cand->iv->base;
3658 cstep = cand->iv->step;
3659 ctype = TREE_TYPE (cbase);
3660
3661 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3662 {
3663 /* We do not have a precision to express the values of use. */
3664 return INFTY;
3665 }
3666
3667 if (address_p)
3668 {
3669 /* Do not try to express address of an object with computation based
3670 on address of a different object. This may cause problems in rtl
3671 level alias analysis (that does not expect this to be happening,
3672 as this is illegal in C), and would be unlikely to be useful
3673 anyway. */
3674 if (use->iv->base_object
3675 && cand->iv->base_object
3676 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3677 return INFTY;
3678 }
3679
3680 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3681 {
3682 /* TODO -- add direct handling of this case. */
3683 goto fallback;
3684 }
3685
3686 /* CSTEPI is removed from the offset in case statement is after the
3687 increment. If the step is not constant, we use zero instead.
3688 This is a bit imprecise (there is the extra addition), but
3689 redundancy elimination is likely to transform the code so that
3690 it uses value of the variable before increment anyway,
3691 so it is not that much unrealistic. */
3692 if (cst_and_fits_in_hwi (cstep))
3693 cstepi = int_cst_value (cstep);
3694 else
3695 cstepi = 0;
3696
3697 if (cst_and_fits_in_hwi (ustep)
3698 && cst_and_fits_in_hwi (cstep))
3699 {
3700 ustepi = int_cst_value (ustep);
3701
3702 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3703 return INFTY;
3704 }
3705 else
3706 {
3707 tree rat;
3708
3709 rat = constant_multiple_of (utype, ustep, cstep);
3710
3711 if (!rat)
3712 return INFTY;
3713
3714 if (cst_and_fits_in_hwi (rat))
3715 ratio = int_cst_value (rat);
3716 else if (integer_onep (rat))
3717 ratio = 1;
3718 else if (integer_all_onesp (rat))
3719 ratio = -1;
3720 else
3721 return INFTY;
3722 }
3723
3724 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3725 or ratio == 1, it is better to handle this like
3726
3727 ubase - ratio * cbase + ratio * var
3728
3729 (also holds in the case ratio == -1, TODO. */
3730
3731 if (cst_and_fits_in_hwi (cbase))
3732 {
3733 offset = - ratio * int_cst_value (cbase);
3734 cost += difference_cost (data,
3735 ubase, integer_zero_node,
3736 &symbol_present, &var_present, &offset,
3737 depends_on);
3738 }
3739 else if (ratio == 1)
3740 {
3741 cost += difference_cost (data,
3742 ubase, cbase,
3743 &symbol_present, &var_present, &offset,
3744 depends_on);
3745 }
3746 else
3747 {
3748 cost += force_var_cost (data, cbase, depends_on);
3749 cost += add_cost (TYPE_MODE (ctype));
3750 cost += difference_cost (data,
3751 ubase, integer_zero_node,
3752 &symbol_present, &var_present, &offset,
3753 depends_on);
3754 }
3755
3756 /* If we are after the increment, the value of the candidate is higher by
3757 one iteration. */
3758 if (stmt_after_increment (data->current_loop, cand, at))
3759 offset -= ratio * cstepi;
3760
3761 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3762 (symbol/var/const parts may be omitted). If we are looking for an address,
3763 find the cost of addressing this. */
3764 if (address_p)
3765 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3766
3767 /* Otherwise estimate the costs for computing the expression. */
3768 aratio = ratio > 0 ? ratio : -ratio;
3769 if (!symbol_present && !var_present && !offset)
3770 {
3771 if (ratio != 1)
3772 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3773
3774 return cost;
3775 }
3776
3777 if (aratio != 1)
3778 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3779
3780 n_sums = 1;
3781 if (var_present
3782 /* Symbol + offset should be compile-time computable. */
3783 && (symbol_present || offset))
3784 n_sums++;
3785
3786 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3787
3788 fallback:
3789 {
3790 /* Just get the expression, expand it and measure the cost. */
3791 tree comp = get_computation_at (data->current_loop, use, cand, at);
3792
3793 if (!comp)
3794 return INFTY;
3795
3796 if (address_p)
3797 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3798
3799 return computation_cost (comp);
3800 }
3801 }
3802
3803 /* Determines the cost of the computation by that USE is expressed
3804 from induction variable CAND. If ADDRESS_P is true, we just need
3805 to create an address from it, otherwise we want to get it into
3806 register. A set of invariants we depend on is stored in
3807 DEPENDS_ON. */
3808
3809 static unsigned
3810 get_computation_cost (struct ivopts_data *data,
3811 struct iv_use *use, struct iv_cand *cand,
3812 bool address_p, bitmap *depends_on)
3813 {
3814 return get_computation_cost_at (data,
3815 use, cand, address_p, depends_on, use->stmt);
3816 }
3817
3818 /* Determines cost of basing replacement of USE on CAND in a generic
3819 expression. */
3820
3821 static bool
3822 determine_use_iv_cost_generic (struct ivopts_data *data,
3823 struct iv_use *use, struct iv_cand *cand)
3824 {
3825 bitmap depends_on;
3826 unsigned cost;
3827
3828 /* The simple case first -- if we need to express value of the preserved
3829 original biv, the cost is 0. This also prevents us from counting the
3830 cost of increment twice -- once at this use and once in the cost of
3831 the candidate. */
3832 if (cand->pos == IP_ORIGINAL
3833 && cand->incremented_at == use->stmt)
3834 {
3835 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3836 return true;
3837 }
3838
3839 cost = get_computation_cost (data, use, cand, false, &depends_on);
3840 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3841
3842 return cost != INFTY;
3843 }
3844
3845 /* Determines cost of basing replacement of USE on CAND in an address. */
3846
3847 static bool
3848 determine_use_iv_cost_address (struct ivopts_data *data,
3849 struct iv_use *use, struct iv_cand *cand)
3850 {
3851 bitmap depends_on;
3852 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3853
3854 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3855
3856 return cost != INFTY;
3857 }
3858
3859 /* Computes value of induction variable IV in iteration NITER. */
3860
3861 static tree
3862 iv_value (struct iv *iv, tree niter)
3863 {
3864 tree val;
3865 tree type = TREE_TYPE (iv->base);
3866
3867 niter = fold_convert (type, niter);
3868 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3869
3870 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3871 }
3872
3873 /* Computes value of candidate CAND at position AT in iteration NITER. */
3874
3875 static tree
3876 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3877 {
3878 tree val = iv_value (cand->iv, niter);
3879 tree type = TREE_TYPE (cand->iv->base);
3880
3881 if (stmt_after_increment (loop, cand, at))
3882 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3883
3884 return val;
3885 }
3886
3887 /* Returns period of induction variable iv. */
3888
3889 static tree
3890 iv_period (struct iv *iv)
3891 {
3892 tree step = iv->step, period, type;
3893 tree pow2div;
3894
3895 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3896
3897 /* Period of the iv is gcd (step, type range). Since type range is power
3898 of two, it suffices to determine the maximum power of two that divides
3899 step. */
3900 pow2div = num_ending_zeros (step);
3901 type = unsigned_type_for (TREE_TYPE (step));
3902
3903 period = build_low_bits_mask (type,
3904 (TYPE_PRECISION (type)
3905 - tree_low_cst (pow2div, 1)));
3906
3907 return period;
3908 }
3909
3910 /* Returns the comparison operator used when eliminating the iv USE. */
3911
3912 static enum tree_code
3913 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3914 {
3915 struct loop *loop = data->current_loop;
3916 basic_block ex_bb;
3917 edge exit;
3918
3919 ex_bb = bb_for_stmt (use->stmt);
3920 exit = EDGE_SUCC (ex_bb, 0);
3921 if (flow_bb_inside_loop_p (loop, exit->dest))
3922 exit = EDGE_SUCC (ex_bb, 1);
3923
3924 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3925 }
3926
3927 /* Check whether it is possible to express the condition in USE by comparison
3928 of candidate CAND. If so, store the value compared with to BOUND. */
3929
3930 static bool
3931 may_eliminate_iv (struct ivopts_data *data,
3932 struct iv_use *use, struct iv_cand *cand, tree *bound)
3933 {
3934 basic_block ex_bb;
3935 edge exit;
3936 struct tree_niter_desc *niter;
3937 tree nit, nit_type;
3938 tree wider_type, period, per_type;
3939 struct loop *loop = data->current_loop;
3940
3941 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3942 return false;
3943
3944 /* For now works only for exits that dominate the loop latch. TODO -- extend
3945 for other conditions inside loop body. */
3946 ex_bb = bb_for_stmt (use->stmt);
3947 if (use->stmt != last_stmt (ex_bb)
3948 || TREE_CODE (use->stmt) != COND_EXPR)
3949 return false;
3950 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3951 return false;
3952
3953 exit = EDGE_SUCC (ex_bb, 0);
3954 if (flow_bb_inside_loop_p (loop, exit->dest))
3955 exit = EDGE_SUCC (ex_bb, 1);
3956 if (flow_bb_inside_loop_p (loop, exit->dest))
3957 return false;
3958
3959 niter = niter_for_exit (data, exit);
3960 if (!niter
3961 || !zero_p (niter->may_be_zero))
3962 return false;
3963
3964 nit = niter->niter;
3965 nit_type = TREE_TYPE (nit);
3966
3967 /* Determine whether we may use the variable to test whether niter iterations
3968 elapsed. This is the case iff the period of the induction variable is
3969 greater than the number of iterations. */
3970 period = iv_period (cand->iv);
3971 if (!period)
3972 return false;
3973 per_type = TREE_TYPE (period);
3974
3975 wider_type = TREE_TYPE (period);
3976 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3977 wider_type = per_type;
3978 else
3979 wider_type = nit_type;
3980
3981 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3982 fold_convert (wider_type, period),
3983 fold_convert (wider_type, nit)))))
3984 return false;
3985
3986 *bound = cand_value_at (loop, cand, use->stmt, nit);
3987 return true;
3988 }
3989
3990 /* Determines cost of basing replacement of USE on CAND in a condition. */
3991
3992 static bool
3993 determine_use_iv_cost_condition (struct ivopts_data *data,
3994 struct iv_use *use, struct iv_cand *cand)
3995 {
3996 tree bound = NULL_TREE, op, cond;
3997 bitmap depends_on = NULL;
3998 unsigned cost;
3999
4000 /* Only consider real candidates. */
4001 if (!cand->iv)
4002 {
4003 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4004 return false;
4005 }
4006
4007 if (may_eliminate_iv (data, use, cand, &bound))
4008 {
4009 cost = force_var_cost (data, bound, &depends_on);
4010
4011 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4012 return cost != INFTY;
4013 }
4014
4015 /* The induction variable elimination failed; just express the original
4016 giv. If it is compared with an invariant, note that we cannot get
4017 rid of it. */
4018 cost = get_computation_cost (data, use, cand, false, &depends_on);
4019
4020 cond = *use->op_p;
4021 if (TREE_CODE (cond) != SSA_NAME)
4022 {
4023 op = TREE_OPERAND (cond, 0);
4024 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4025 op = TREE_OPERAND (cond, 1);
4026 if (TREE_CODE (op) == SSA_NAME)
4027 {
4028 op = get_iv (data, op)->base;
4029 fd_ivopts_data = data;
4030 walk_tree (&op, find_depends, &depends_on, NULL);
4031 }
4032 }
4033
4034 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4035 return cost != INFTY;
4036 }
4037
4038 /* Checks whether it is possible to replace the final value of USE by
4039 a direct computation. If so, the formula is stored to *VALUE. */
4040
4041 static bool
4042 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4043 tree *value)
4044 {
4045 struct loop *loop = data->current_loop;
4046 edge exit;
4047 struct tree_niter_desc *niter;
4048
4049 exit = single_dom_exit (loop);
4050 if (!exit)
4051 return false;
4052
4053 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4054 bb_for_stmt (use->stmt)));
4055
4056 niter = niter_for_single_dom_exit (data);
4057 if (!niter
4058 || !zero_p (niter->may_be_zero))
4059 return false;
4060
4061 *value = iv_value (use->iv, niter->niter);
4062
4063 return true;
4064 }
4065
4066 /* Determines cost of replacing final value of USE using CAND. */
4067
4068 static bool
4069 determine_use_iv_cost_outer (struct ivopts_data *data,
4070 struct iv_use *use, struct iv_cand *cand)
4071 {
4072 bitmap depends_on;
4073 unsigned cost;
4074 edge exit;
4075 tree value = NULL_TREE;
4076 struct loop *loop = data->current_loop;
4077
4078 /* The simple case first -- if we need to express value of the preserved
4079 original biv, the cost is 0. This also prevents us from counting the
4080 cost of increment twice -- once at this use and once in the cost of
4081 the candidate. */
4082 if (cand->pos == IP_ORIGINAL
4083 && cand->incremented_at == use->stmt)
4084 {
4085 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4086 return true;
4087 }
4088
4089 if (!cand->iv)
4090 {
4091 if (!may_replace_final_value (data, use, &value))
4092 {
4093 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4094 return false;
4095 }
4096
4097 depends_on = NULL;
4098 cost = force_var_cost (data, value, &depends_on);
4099
4100 cost /= AVG_LOOP_NITER (loop);
4101
4102 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4103 return cost != INFTY;
4104 }
4105
4106 exit = single_dom_exit (loop);
4107 if (exit)
4108 {
4109 /* If there is just a single exit, we may use value of the candidate
4110 after we take it to determine the value of use. */
4111 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4112 last_stmt (exit->src));
4113 if (cost != INFTY)
4114 cost /= AVG_LOOP_NITER (loop);
4115 }
4116 else
4117 {
4118 /* Otherwise we just need to compute the iv. */
4119 cost = get_computation_cost (data, use, cand, false, &depends_on);
4120 }
4121
4122 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4123
4124 return cost != INFTY;
4125 }
4126
4127 /* Determines cost of basing replacement of USE on CAND. Returns false
4128 if USE cannot be based on CAND. */
4129
4130 static bool
4131 determine_use_iv_cost (struct ivopts_data *data,
4132 struct iv_use *use, struct iv_cand *cand)
4133 {
4134 switch (use->type)
4135 {
4136 case USE_NONLINEAR_EXPR:
4137 return determine_use_iv_cost_generic (data, use, cand);
4138
4139 case USE_OUTER:
4140 return determine_use_iv_cost_outer (data, use, cand);
4141
4142 case USE_ADDRESS:
4143 return determine_use_iv_cost_address (data, use, cand);
4144
4145 case USE_COMPARE:
4146 return determine_use_iv_cost_condition (data, use, cand);
4147
4148 default:
4149 gcc_unreachable ();
4150 }
4151 }
4152
4153 /* Determines costs of basing the use of the iv on an iv candidate. */
4154
4155 static void
4156 determine_use_iv_costs (struct ivopts_data *data)
4157 {
4158 unsigned i, j;
4159 struct iv_use *use;
4160 struct iv_cand *cand;
4161 bitmap to_clear = BITMAP_ALLOC (NULL);
4162
4163 alloc_use_cost_map (data);
4164
4165 for (i = 0; i < n_iv_uses (data); i++)
4166 {
4167 use = iv_use (data, i);
4168
4169 if (data->consider_all_candidates)
4170 {
4171 for (j = 0; j < n_iv_cands (data); j++)
4172 {
4173 cand = iv_cand (data, j);
4174 determine_use_iv_cost (data, use, cand);
4175 }
4176 }
4177 else
4178 {
4179 bitmap_iterator bi;
4180
4181 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4182 {
4183 cand = iv_cand (data, j);
4184 if (!determine_use_iv_cost (data, use, cand))
4185 bitmap_set_bit (to_clear, j);
4186 }
4187
4188 /* Remove the candidates for that the cost is infinite from
4189 the list of related candidates. */
4190 bitmap_and_compl_into (use->related_cands, to_clear);
4191 bitmap_clear (to_clear);
4192 }
4193 }
4194
4195 BITMAP_FREE (to_clear);
4196
4197 if (dump_file && (dump_flags & TDF_DETAILS))
4198 {
4199 fprintf (dump_file, "Use-candidate costs:\n");
4200
4201 for (i = 0; i < n_iv_uses (data); i++)
4202 {
4203 use = iv_use (data, i);
4204
4205 fprintf (dump_file, "Use %d:\n", i);
4206 fprintf (dump_file, " cand\tcost\tdepends on\n");
4207 for (j = 0; j < use->n_map_members; j++)
4208 {
4209 if (!use->cost_map[j].cand
4210 || use->cost_map[j].cost == INFTY)
4211 continue;
4212
4213 fprintf (dump_file, " %d\t%d\t",
4214 use->cost_map[j].cand->id,
4215 use->cost_map[j].cost);
4216 if (use->cost_map[j].depends_on)
4217 bitmap_print (dump_file,
4218 use->cost_map[j].depends_on, "","");
4219 fprintf (dump_file, "\n");
4220 }
4221
4222 fprintf (dump_file, "\n");
4223 }
4224 fprintf (dump_file, "\n");
4225 }
4226 }
4227
4228 /* Determines cost of the candidate CAND. */
4229
4230 static void
4231 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4232 {
4233 unsigned cost_base, cost_step;
4234 tree base;
4235
4236 if (!cand->iv)
4237 {
4238 cand->cost = 0;
4239 return;
4240 }
4241
4242 /* There are two costs associated with the candidate -- its increment
4243 and its initialization. The second is almost negligible for any loop
4244 that rolls enough, so we take it just very little into account. */
4245
4246 base = cand->iv->base;
4247 cost_base = force_var_cost (data, base, NULL);
4248 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4249
4250 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4251
4252 /* Prefer the original iv unless we may gain something by replacing it;
4253 this is not really relevant for artificial ivs created by other
4254 passes. */
4255 if (cand->pos == IP_ORIGINAL
4256 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4257 cand->cost--;
4258
4259 /* Prefer not to insert statements into latch unless there are some
4260 already (so that we do not create unnecessary jumps). */
4261 if (cand->pos == IP_END
4262 && empty_block_p (ip_end_pos (data->current_loop)))
4263 cand->cost++;
4264 }
4265
4266 /* Determines costs of computation of the candidates. */
4267
4268 static void
4269 determine_iv_costs (struct ivopts_data *data)
4270 {
4271 unsigned i;
4272
4273 if (dump_file && (dump_flags & TDF_DETAILS))
4274 {
4275 fprintf (dump_file, "Candidate costs:\n");
4276 fprintf (dump_file, " cand\tcost\n");
4277 }
4278
4279 for (i = 0; i < n_iv_cands (data); i++)
4280 {
4281 struct iv_cand *cand = iv_cand (data, i);
4282
4283 determine_iv_cost (data, cand);
4284
4285 if (dump_file && (dump_flags & TDF_DETAILS))
4286 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4287 }
4288
4289 if (dump_file && (dump_flags & TDF_DETAILS))
4290 fprintf (dump_file, "\n");
4291 }
4292
4293 /* Calculates cost for having SIZE induction variables. */
4294
4295 static unsigned
4296 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4297 {
4298 return global_cost_for_size (size,
4299 loop_data (data->current_loop)->regs_used,
4300 n_iv_uses (data));
4301 }
4302
4303 /* For each size of the induction variable set determine the penalty. */
4304
4305 static void
4306 determine_set_costs (struct ivopts_data *data)
4307 {
4308 unsigned j, n;
4309 tree phi, op;
4310 struct loop *loop = data->current_loop;
4311 bitmap_iterator bi;
4312
4313 /* We use the following model (definitely improvable, especially the
4314 cost function -- TODO):
4315
4316 We estimate the number of registers available (using MD data), name it A.
4317
4318 We estimate the number of registers used by the loop, name it U. This
4319 number is obtained as the number of loop phi nodes (not counting virtual
4320 registers and bivs) + the number of variables from outside of the loop.
4321
4322 We set a reserve R (free regs that are used for temporary computations,
4323 etc.). For now the reserve is a constant 3.
4324
4325 Let I be the number of induction variables.
4326
4327 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4328 make a lot of ivs without a reason).
4329 -- if A - R < U + I <= A, the cost is I * PRES_COST
4330 -- if U + I > A, the cost is I * PRES_COST and
4331 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4332
4333 if (dump_file && (dump_flags & TDF_DETAILS))
4334 {
4335 fprintf (dump_file, "Global costs:\n");
4336 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4337 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4338 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4339 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4340 }
4341
4342 n = 0;
4343 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4344 {
4345 op = PHI_RESULT (phi);
4346
4347 if (!is_gimple_reg (op))
4348 continue;
4349
4350 if (get_iv (data, op))
4351 continue;
4352
4353 n++;
4354 }
4355
4356 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4357 {
4358 struct version_info *info = ver_info (data, j);
4359
4360 if (info->inv_id && info->has_nonlin_use)
4361 n++;
4362 }
4363
4364 loop_data (loop)->regs_used = n;
4365 if (dump_file && (dump_flags & TDF_DETAILS))
4366 fprintf (dump_file, " regs_used %d\n", n);
4367
4368 if (dump_file && (dump_flags & TDF_DETAILS))
4369 {
4370 fprintf (dump_file, " cost for size:\n");
4371 fprintf (dump_file, " ivs\tcost\n");
4372 for (j = 0; j <= 2 * target_avail_regs; j++)
4373 fprintf (dump_file, " %d\t%d\n", j,
4374 ivopts_global_cost_for_size (data, j));
4375 fprintf (dump_file, "\n");
4376 }
4377 }
4378
4379 /* Returns true if A is a cheaper cost pair than B. */
4380
4381 static bool
4382 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4383 {
4384 if (!a)
4385 return false;
4386
4387 if (!b)
4388 return true;
4389
4390 if (a->cost < b->cost)
4391 return true;
4392
4393 if (a->cost > b->cost)
4394 return false;
4395
4396 /* In case the costs are the same, prefer the cheaper candidate. */
4397 if (a->cand->cost < b->cand->cost)
4398 return true;
4399
4400 return false;
4401 }
4402
4403 /* Computes the cost field of IVS structure. */
4404
4405 static void
4406 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4407 {
4408 unsigned cost = 0;
4409
4410 cost += ivs->cand_use_cost;
4411 cost += ivs->cand_cost;
4412 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4413
4414 ivs->cost = cost;
4415 }
4416
4417 /* Remove invariants in set INVS to set IVS. */
4418
4419 static void
4420 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4421 {
4422 bitmap_iterator bi;
4423 unsigned iid;
4424
4425 if (!invs)
4426 return;
4427
4428 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4429 {
4430 ivs->n_invariant_uses[iid]--;
4431 if (ivs->n_invariant_uses[iid] == 0)
4432 ivs->n_regs--;
4433 }
4434 }
4435
4436 /* Set USE not to be expressed by any candidate in IVS. */
4437
4438 static void
4439 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4440 struct iv_use *use)
4441 {
4442 unsigned uid = use->id, cid;
4443 struct cost_pair *cp;
4444
4445 cp = ivs->cand_for_use[uid];
4446 if (!cp)
4447 return;
4448 cid = cp->cand->id;
4449
4450 ivs->bad_uses++;
4451 ivs->cand_for_use[uid] = NULL;
4452 ivs->n_cand_uses[cid]--;
4453
4454 if (ivs->n_cand_uses[cid] == 0)
4455 {
4456 bitmap_clear_bit (ivs->cands, cid);
4457 /* Do not count the pseudocandidates. */
4458 if (cp->cand->iv)
4459 ivs->n_regs--;
4460 ivs->n_cands--;
4461 ivs->cand_cost -= cp->cand->cost;
4462
4463 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4464 }
4465
4466 ivs->cand_use_cost -= cp->cost;
4467
4468 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4469 iv_ca_recount_cost (data, ivs);
4470 }
4471
4472 /* Add invariants in set INVS to set IVS. */
4473
4474 static void
4475 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4476 {
4477 bitmap_iterator bi;
4478 unsigned iid;
4479
4480 if (!invs)
4481 return;
4482
4483 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4484 {
4485 ivs->n_invariant_uses[iid]++;
4486 if (ivs->n_invariant_uses[iid] == 1)
4487 ivs->n_regs++;
4488 }
4489 }
4490
4491 /* Set cost pair for USE in set IVS to CP. */
4492
4493 static void
4494 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4495 struct iv_use *use, struct cost_pair *cp)
4496 {
4497 unsigned uid = use->id, cid;
4498
4499 if (ivs->cand_for_use[uid] == cp)
4500 return;
4501
4502 if (ivs->cand_for_use[uid])
4503 iv_ca_set_no_cp (data, ivs, use);
4504
4505 if (cp)
4506 {
4507 cid = cp->cand->id;
4508
4509 ivs->bad_uses--;
4510 ivs->cand_for_use[uid] = cp;
4511 ivs->n_cand_uses[cid]++;
4512 if (ivs->n_cand_uses[cid] == 1)
4513 {
4514 bitmap_set_bit (ivs->cands, cid);
4515 /* Do not count the pseudocandidates. */
4516 if (cp->cand->iv)
4517 ivs->n_regs++;
4518 ivs->n_cands++;
4519 ivs->cand_cost += cp->cand->cost;
4520
4521 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4522 }
4523
4524 ivs->cand_use_cost += cp->cost;
4525 iv_ca_set_add_invariants (ivs, cp->depends_on);
4526 iv_ca_recount_cost (data, ivs);
4527 }
4528 }
4529
4530 /* Extend set IVS by expressing USE by some of the candidates in it
4531 if possible. */
4532
4533 static void
4534 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4535 struct iv_use *use)
4536 {
4537 struct cost_pair *best_cp = NULL, *cp;
4538 bitmap_iterator bi;
4539 unsigned i;
4540
4541 gcc_assert (ivs->upto >= use->id);
4542
4543 if (ivs->upto == use->id)
4544 {
4545 ivs->upto++;
4546 ivs->bad_uses++;
4547 }
4548
4549 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4550 {
4551 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4552
4553 if (cheaper_cost_pair (cp, best_cp))
4554 best_cp = cp;
4555 }
4556
4557 iv_ca_set_cp (data, ivs, use, best_cp);
4558 }
4559
4560 /* Get cost for assignment IVS. */
4561
4562 static unsigned
4563 iv_ca_cost (struct iv_ca *ivs)
4564 {
4565 return (ivs->bad_uses ? INFTY : ivs->cost);
4566 }
4567
4568 /* Returns true if all dependences of CP are among invariants in IVS. */
4569
4570 static bool
4571 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4572 {
4573 unsigned i;
4574 bitmap_iterator bi;
4575
4576 if (!cp->depends_on)
4577 return true;
4578
4579 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4580 {
4581 if (ivs->n_invariant_uses[i] == 0)
4582 return false;
4583 }
4584
4585 return true;
4586 }
4587
4588 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4589 it before NEXT_CHANGE. */
4590
4591 static struct iv_ca_delta *
4592 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4593 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4594 {
4595 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4596
4597 change->use = use;
4598 change->old_cp = old_cp;
4599 change->new_cp = new_cp;
4600 change->next_change = next_change;
4601
4602 return change;
4603 }
4604
4605 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4606 are rewritten. */
4607
4608 static struct iv_ca_delta *
4609 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4610 {
4611 struct iv_ca_delta *last;
4612
4613 if (!l2)
4614 return l1;
4615
4616 if (!l1)
4617 return l2;
4618
4619 for (last = l1; last->next_change; last = last->next_change)
4620 continue;
4621 last->next_change = l2;
4622
4623 return l1;
4624 }
4625
4626 /* Returns candidate by that USE is expressed in IVS. */
4627
4628 static struct cost_pair *
4629 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4630 {
4631 return ivs->cand_for_use[use->id];
4632 }
4633
4634 /* Reverse the list of changes DELTA, forming the inverse to it. */
4635
4636 static struct iv_ca_delta *
4637 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4638 {
4639 struct iv_ca_delta *act, *next, *prev = NULL;
4640 struct cost_pair *tmp;
4641
4642 for (act = delta; act; act = next)
4643 {
4644 next = act->next_change;
4645 act->next_change = prev;
4646 prev = act;
4647
4648 tmp = act->old_cp;
4649 act->old_cp = act->new_cp;
4650 act->new_cp = tmp;
4651 }
4652
4653 return prev;
4654 }
4655
4656 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4657 reverted instead. */
4658
4659 static void
4660 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4661 struct iv_ca_delta *delta, bool forward)
4662 {
4663 struct cost_pair *from, *to;
4664 struct iv_ca_delta *act;
4665
4666 if (!forward)
4667 delta = iv_ca_delta_reverse (delta);
4668
4669 for (act = delta; act; act = act->next_change)
4670 {
4671 from = act->old_cp;
4672 to = act->new_cp;
4673 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4674 iv_ca_set_cp (data, ivs, act->use, to);
4675 }
4676
4677 if (!forward)
4678 iv_ca_delta_reverse (delta);
4679 }
4680
4681 /* Returns true if CAND is used in IVS. */
4682
4683 static bool
4684 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4685 {
4686 return ivs->n_cand_uses[cand->id] > 0;
4687 }
4688
4689 /* Returns number of induction variable candidates in the set IVS. */
4690
4691 static unsigned
4692 iv_ca_n_cands (struct iv_ca *ivs)
4693 {
4694 return ivs->n_cands;
4695 }
4696
4697 /* Free the list of changes DELTA. */
4698
4699 static void
4700 iv_ca_delta_free (struct iv_ca_delta **delta)
4701 {
4702 struct iv_ca_delta *act, *next;
4703
4704 for (act = *delta; act; act = next)
4705 {
4706 next = act->next_change;
4707 free (act);
4708 }
4709
4710 *delta = NULL;
4711 }
4712
4713 /* Allocates new iv candidates assignment. */
4714
4715 static struct iv_ca *
4716 iv_ca_new (struct ivopts_data *data)
4717 {
4718 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4719
4720 nw->upto = 0;
4721 nw->bad_uses = 0;
4722 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4723 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4724 nw->cands = BITMAP_ALLOC (NULL);
4725 nw->n_cands = 0;
4726 nw->n_regs = 0;
4727 nw->cand_use_cost = 0;
4728 nw->cand_cost = 0;
4729 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4730 nw->cost = 0;
4731
4732 return nw;
4733 }
4734
4735 /* Free memory occupied by the set IVS. */
4736
4737 static void
4738 iv_ca_free (struct iv_ca **ivs)
4739 {
4740 free ((*ivs)->cand_for_use);
4741 free ((*ivs)->n_cand_uses);
4742 BITMAP_FREE ((*ivs)->cands);
4743 free ((*ivs)->n_invariant_uses);
4744 free (*ivs);
4745 *ivs = NULL;
4746 }
4747
4748 /* Dumps IVS to FILE. */
4749
4750 static void
4751 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4752 {
4753 const char *pref = " invariants ";
4754 unsigned i;
4755
4756 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4757 bitmap_print (file, ivs->cands, " candidates ","\n");
4758
4759 for (i = 1; i <= data->max_inv_id; i++)
4760 if (ivs->n_invariant_uses[i])
4761 {
4762 fprintf (file, "%s%d", pref, i);
4763 pref = ", ";
4764 }
4765 fprintf (file, "\n");
4766 }
4767
4768 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4769 new set, and store differences in DELTA. Number of induction variables
4770 in the new set is stored to N_IVS. */
4771
4772 static unsigned
4773 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4774 struct iv_cand *cand, struct iv_ca_delta **delta,
4775 unsigned *n_ivs)
4776 {
4777 unsigned i, cost;
4778 struct iv_use *use;
4779 struct cost_pair *old_cp, *new_cp;
4780
4781 *delta = NULL;
4782 for (i = 0; i < ivs->upto; i++)
4783 {
4784 use = iv_use (data, i);
4785 old_cp = iv_ca_cand_for_use (ivs, use);
4786
4787 if (old_cp
4788 && old_cp->cand == cand)
4789 continue;
4790
4791 new_cp = get_use_iv_cost (data, use, cand);
4792 if (!new_cp)
4793 continue;
4794
4795 if (!iv_ca_has_deps (ivs, new_cp))
4796 continue;
4797
4798 if (!cheaper_cost_pair (new_cp, old_cp))
4799 continue;
4800
4801 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4802 }
4803
4804 iv_ca_delta_commit (data, ivs, *delta, true);
4805 cost = iv_ca_cost (ivs);
4806 if (n_ivs)
4807 *n_ivs = iv_ca_n_cands (ivs);
4808 iv_ca_delta_commit (data, ivs, *delta, false);
4809
4810 return cost;
4811 }
4812
4813 /* Try narrowing set IVS by removing CAND. Return the cost of
4814 the new set and store the differences in DELTA. */
4815
4816 static unsigned
4817 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4818 struct iv_cand *cand, struct iv_ca_delta **delta)
4819 {
4820 unsigned i, ci;
4821 struct iv_use *use;
4822 struct cost_pair *old_cp, *new_cp, *cp;
4823 bitmap_iterator bi;
4824 struct iv_cand *cnd;
4825 unsigned cost;
4826
4827 *delta = NULL;
4828 for (i = 0; i < n_iv_uses (data); i++)
4829 {
4830 use = iv_use (data, i);
4831
4832 old_cp = iv_ca_cand_for_use (ivs, use);
4833 if (old_cp->cand != cand)
4834 continue;
4835
4836 new_cp = NULL;
4837
4838 if (data->consider_all_candidates)
4839 {
4840 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4841 {
4842 if (ci == cand->id)
4843 continue;
4844
4845 cnd = iv_cand (data, ci);
4846
4847 cp = get_use_iv_cost (data, use, cnd);
4848 if (!cp)
4849 continue;
4850 if (!iv_ca_has_deps (ivs, cp))
4851 continue;
4852
4853 if (!cheaper_cost_pair (cp, new_cp))
4854 continue;
4855
4856 new_cp = cp;
4857 }
4858 }
4859 else
4860 {
4861 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4862 {
4863 if (ci == cand->id)
4864 continue;
4865
4866 cnd = iv_cand (data, ci);
4867
4868 cp = get_use_iv_cost (data, use, cnd);
4869 if (!cp)
4870 continue;
4871 if (!iv_ca_has_deps (ivs, cp))
4872 continue;
4873
4874 if (!cheaper_cost_pair (cp, new_cp))
4875 continue;
4876
4877 new_cp = cp;
4878 }
4879 }
4880
4881 if (!new_cp)
4882 {
4883 iv_ca_delta_free (delta);
4884 return INFTY;
4885 }
4886
4887 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4888 }
4889
4890 iv_ca_delta_commit (data, ivs, *delta, true);
4891 cost = iv_ca_cost (ivs);
4892 iv_ca_delta_commit (data, ivs, *delta, false);
4893
4894 return cost;
4895 }
4896
4897 /* Try optimizing the set of candidates IVS by removing candidates different
4898 from to EXCEPT_CAND from it. Return cost of the new set, and store
4899 differences in DELTA. */
4900
4901 static unsigned
4902 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4903 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4904 {
4905 bitmap_iterator bi;
4906 struct iv_ca_delta *act_delta, *best_delta;
4907 unsigned i, best_cost, acost;
4908 struct iv_cand *cand;
4909
4910 best_delta = NULL;
4911 best_cost = iv_ca_cost (ivs);
4912
4913 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4914 {
4915 cand = iv_cand (data, i);
4916
4917 if (cand == except_cand)
4918 continue;
4919
4920 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4921
4922 if (acost < best_cost)
4923 {
4924 best_cost = acost;
4925 iv_ca_delta_free (&best_delta);
4926 best_delta = act_delta;
4927 }
4928 else
4929 iv_ca_delta_free (&act_delta);
4930 }
4931
4932 if (!best_delta)
4933 {
4934 *delta = NULL;
4935 return best_cost;
4936 }
4937
4938 /* Recurse to possibly remove other unnecessary ivs. */
4939 iv_ca_delta_commit (data, ivs, best_delta, true);
4940 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4941 iv_ca_delta_commit (data, ivs, best_delta, false);
4942 *delta = iv_ca_delta_join (best_delta, *delta);
4943 return best_cost;
4944 }
4945
4946 /* Tries to extend the sets IVS in the best possible way in order
4947 to express the USE. */
4948
4949 static bool
4950 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4951 struct iv_use *use)
4952 {
4953 unsigned best_cost, act_cost;
4954 unsigned i;
4955 bitmap_iterator bi;
4956 struct iv_cand *cand;
4957 struct iv_ca_delta *best_delta = NULL, *act_delta;
4958 struct cost_pair *cp;
4959
4960 iv_ca_add_use (data, ivs, use);
4961 best_cost = iv_ca_cost (ivs);
4962
4963 cp = iv_ca_cand_for_use (ivs, use);
4964 if (cp)
4965 {
4966 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4967 iv_ca_set_no_cp (data, ivs, use);
4968 }
4969
4970 /* First try important candidates. Only if it fails, try the specific ones.
4971 Rationale -- in loops with many variables the best choice often is to use
4972 just one generic biv. If we added here many ivs specific to the uses,
4973 the optimization algorithm later would be likely to get stuck in a local
4974 minimum, thus causing us to create too many ivs. The approach from
4975 few ivs to more seems more likely to be successful -- starting from few
4976 ivs, replacing an expensive use by a specific iv should always be a
4977 win. */
4978 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4979 {
4980 cand = iv_cand (data, i);
4981
4982 if (iv_ca_cand_used_p (ivs, cand))
4983 continue;
4984
4985 cp = get_use_iv_cost (data, use, cand);
4986 if (!cp)
4987 continue;
4988
4989 iv_ca_set_cp (data, ivs, use, cp);
4990 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4991 iv_ca_set_no_cp (data, ivs, use);
4992 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4993
4994 if (act_cost < best_cost)
4995 {
4996 best_cost = act_cost;
4997
4998 iv_ca_delta_free (&best_delta);
4999 best_delta = act_delta;
5000 }
5001 else
5002 iv_ca_delta_free (&act_delta);
5003 }
5004
5005 if (best_cost == INFTY)
5006 {
5007 for (i = 0; i < use->n_map_members; i++)
5008 {
5009 cp = use->cost_map + i;
5010 cand = cp->cand;
5011 if (!cand)
5012 continue;
5013
5014 /* Already tried this. */
5015 if (cand->important)
5016 continue;
5017
5018 if (iv_ca_cand_used_p (ivs, cand))
5019 continue;
5020
5021 act_delta = NULL;
5022 iv_ca_set_cp (data, ivs, use, cp);
5023 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5024 iv_ca_set_no_cp (data, ivs, use);
5025 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5026 cp, act_delta);
5027
5028 if (act_cost < best_cost)
5029 {
5030 best_cost = act_cost;
5031
5032 if (best_delta)
5033 iv_ca_delta_free (&best_delta);
5034 best_delta = act_delta;
5035 }
5036 else
5037 iv_ca_delta_free (&act_delta);
5038 }
5039 }
5040
5041 iv_ca_delta_commit (data, ivs, best_delta, true);
5042 iv_ca_delta_free (&best_delta);
5043
5044 return (best_cost != INFTY);
5045 }
5046
5047 /* Finds an initial assignment of candidates to uses. */
5048
5049 static struct iv_ca *
5050 get_initial_solution (struct ivopts_data *data)
5051 {
5052 struct iv_ca *ivs = iv_ca_new (data);
5053 unsigned i;
5054
5055 for (i = 0; i < n_iv_uses (data); i++)
5056 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5057 {
5058 iv_ca_free (&ivs);
5059 return NULL;
5060 }
5061
5062 return ivs;
5063 }
5064
5065 /* Tries to improve set of induction variables IVS. */
5066
5067 static bool
5068 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5069 {
5070 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5071 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5072 struct iv_cand *cand;
5073
5074 /* Try extending the set of induction variables by one. */
5075 for (i = 0; i < n_iv_cands (data); i++)
5076 {
5077 cand = iv_cand (data, i);
5078
5079 if (iv_ca_cand_used_p (ivs, cand))
5080 continue;
5081
5082 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5083 if (!act_delta)
5084 continue;
5085
5086 /* If we successfully added the candidate and the set is small enough,
5087 try optimizing it by removing other candidates. */
5088 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5089 {
5090 iv_ca_delta_commit (data, ivs, act_delta, true);
5091 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5092 iv_ca_delta_commit (data, ivs, act_delta, false);
5093 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5094 }
5095
5096 if (acost < best_cost)
5097 {
5098 best_cost = acost;
5099 iv_ca_delta_free (&best_delta);
5100 best_delta = act_delta;
5101 }
5102 else
5103 iv_ca_delta_free (&act_delta);
5104 }
5105
5106 if (!best_delta)
5107 {
5108 /* Try removing the candidates from the set instead. */
5109 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5110
5111 /* Nothing more we can do. */
5112 if (!best_delta)
5113 return false;
5114 }
5115
5116 iv_ca_delta_commit (data, ivs, best_delta, true);
5117 gcc_assert (best_cost == iv_ca_cost (ivs));
5118 iv_ca_delta_free (&best_delta);
5119 return true;
5120 }
5121
5122 /* Attempts to find the optimal set of induction variables. We do simple
5123 greedy heuristic -- we try to replace at most one candidate in the selected
5124 solution and remove the unused ivs while this improves the cost. */
5125
5126 static struct iv_ca *
5127 find_optimal_iv_set (struct ivopts_data *data)
5128 {
5129 unsigned i;
5130 struct iv_ca *set;
5131 struct iv_use *use;
5132
5133 /* Get the initial solution. */
5134 set = get_initial_solution (data);
5135 if (!set)
5136 {
5137 if (dump_file && (dump_flags & TDF_DETAILS))
5138 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5139 return NULL;
5140 }
5141
5142 if (dump_file && (dump_flags & TDF_DETAILS))
5143 {
5144 fprintf (dump_file, "Initial set of candidates:\n");
5145 iv_ca_dump (data, dump_file, set);
5146 }
5147
5148 while (try_improve_iv_set (data, set))
5149 {
5150 if (dump_file && (dump_flags & TDF_DETAILS))
5151 {
5152 fprintf (dump_file, "Improved to:\n");
5153 iv_ca_dump (data, dump_file, set);
5154 }
5155 }
5156
5157 if (dump_file && (dump_flags & TDF_DETAILS))
5158 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5159
5160 for (i = 0; i < n_iv_uses (data); i++)
5161 {
5162 use = iv_use (data, i);
5163 use->selected = iv_ca_cand_for_use (set, use)->cand;
5164 }
5165
5166 return set;
5167 }
5168
5169 /* Creates a new induction variable corresponding to CAND. */
5170
5171 static void
5172 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5173 {
5174 block_stmt_iterator incr_pos;
5175 tree base;
5176 bool after = false;
5177
5178 if (!cand->iv)
5179 return;
5180
5181 switch (cand->pos)
5182 {
5183 case IP_NORMAL:
5184 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5185 break;
5186
5187 case IP_END:
5188 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5189 after = true;
5190 break;
5191
5192 case IP_ORIGINAL:
5193 /* Mark that the iv is preserved. */
5194 name_info (data, cand->var_before)->preserve_biv = true;
5195 name_info (data, cand->var_after)->preserve_biv = true;
5196
5197 /* Rewrite the increment so that it uses var_before directly. */
5198 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5199
5200 return;
5201 }
5202
5203 gimple_add_tmp_var (cand->var_before);
5204 add_referenced_tmp_var (cand->var_before);
5205
5206 base = unshare_expr (cand->iv->base);
5207
5208 create_iv (base, unshare_expr (cand->iv->step),
5209 cand->var_before, data->current_loop,
5210 &incr_pos, after, &cand->var_before, &cand->var_after);
5211 }
5212
5213 /* Creates new induction variables described in SET. */
5214
5215 static void
5216 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5217 {
5218 unsigned i;
5219 struct iv_cand *cand;
5220 bitmap_iterator bi;
5221
5222 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5223 {
5224 cand = iv_cand (data, i);
5225 create_new_iv (data, cand);
5226 }
5227 }
5228
5229 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5230 is true, remove also the ssa name defined by the statement. */
5231
5232 static void
5233 remove_statement (tree stmt, bool including_defined_name)
5234 {
5235 if (TREE_CODE (stmt) == PHI_NODE)
5236 {
5237 if (!including_defined_name)
5238 {
5239 /* Prevent the ssa name defined by the statement from being removed. */
5240 SET_PHI_RESULT (stmt, NULL);
5241 }
5242 remove_phi_node (stmt, NULL_TREE);
5243 }
5244 else
5245 {
5246 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5247
5248 bsi_remove (&bsi);
5249 }
5250 }
5251
5252 /* Rewrites USE (definition of iv used in a nonlinear expression)
5253 using candidate CAND. */
5254
5255 static void
5256 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5257 struct iv_use *use, struct iv_cand *cand)
5258 {
5259 tree comp;
5260 tree op, stmts, tgt, ass;
5261 block_stmt_iterator bsi, pbsi;
5262
5263 /* An important special case -- if we are asked to express value of
5264 the original iv by itself, just exit; there is no need to
5265 introduce a new computation (that might also need casting the
5266 variable to unsigned and back). */
5267 if (cand->pos == IP_ORIGINAL
5268 && TREE_CODE (use->stmt) == MODIFY_EXPR
5269 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5270 {
5271 op = TREE_OPERAND (use->stmt, 1);
5272
5273 /* Be a bit careful. In case variable is expressed in some
5274 complicated way, rewrite it so that we may get rid of this
5275 complicated expression. */
5276 if ((TREE_CODE (op) == PLUS_EXPR
5277 || TREE_CODE (op) == MINUS_EXPR)
5278 && TREE_OPERAND (op, 0) == cand->var_before
5279 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5280 return;
5281 }
5282
5283 comp = unshare_expr (get_computation (data->current_loop,
5284 use, cand));
5285 switch (TREE_CODE (use->stmt))
5286 {
5287 case PHI_NODE:
5288 tgt = PHI_RESULT (use->stmt);
5289
5290 /* If we should keep the biv, do not replace it. */
5291 if (name_info (data, tgt)->preserve_biv)
5292 return;
5293
5294 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5295 while (!bsi_end_p (pbsi)
5296 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5297 {
5298 bsi = pbsi;
5299 bsi_next (&pbsi);
5300 }
5301 break;
5302
5303 case MODIFY_EXPR:
5304 tgt = TREE_OPERAND (use->stmt, 0);
5305 bsi = bsi_for_stmt (use->stmt);
5306 break;
5307
5308 default:
5309 gcc_unreachable ();
5310 }
5311
5312 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5313
5314 if (TREE_CODE (use->stmt) == PHI_NODE)
5315 {
5316 if (stmts)
5317 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5318 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5319 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5320 remove_statement (use->stmt, false);
5321 SSA_NAME_DEF_STMT (tgt) = ass;
5322 }
5323 else
5324 {
5325 if (stmts)
5326 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5327 TREE_OPERAND (use->stmt, 1) = op;
5328 }
5329 }
5330
5331 /* Replaces ssa name in index IDX by its basic variable. Callback for
5332 for_each_index. */
5333
5334 static bool
5335 idx_remove_ssa_names (tree base, tree *idx,
5336 void *data ATTRIBUTE_UNUSED)
5337 {
5338 tree *op;
5339
5340 if (TREE_CODE (*idx) == SSA_NAME)
5341 *idx = SSA_NAME_VAR (*idx);
5342
5343 if (TREE_CODE (base) == ARRAY_REF)
5344 {
5345 op = &TREE_OPERAND (base, 2);
5346 if (*op
5347 && TREE_CODE (*op) == SSA_NAME)
5348 *op = SSA_NAME_VAR (*op);
5349 op = &TREE_OPERAND (base, 3);
5350 if (*op
5351 && TREE_CODE (*op) == SSA_NAME)
5352 *op = SSA_NAME_VAR (*op);
5353 }
5354
5355 return true;
5356 }
5357
5358 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5359
5360 static tree
5361 unshare_and_remove_ssa_names (tree ref)
5362 {
5363 ref = unshare_expr (ref);
5364 for_each_index (&ref, idx_remove_ssa_names, NULL);
5365
5366 return ref;
5367 }
5368
5369 /* Rewrites base of memory access OP with expression WITH in statement
5370 pointed to by BSI. */
5371
5372 static void
5373 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5374 {
5375 tree bvar, var, new_name, copy, name;
5376 tree orig;
5377
5378 var = bvar = get_base_address (*op);
5379
5380 if (!var || TREE_CODE (with) != SSA_NAME)
5381 goto do_rewrite;
5382
5383 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5384 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5385 if (TREE_CODE (var) == INDIRECT_REF)
5386 var = TREE_OPERAND (var, 0);
5387 if (TREE_CODE (var) == SSA_NAME)
5388 {
5389 name = var;
5390 var = SSA_NAME_VAR (var);
5391 }
5392 else if (DECL_P (var))
5393 name = NULL_TREE;
5394 else
5395 goto do_rewrite;
5396
5397 /* We need to add a memory tag for the variable. But we do not want
5398 to add it to the temporary used for the computations, since this leads
5399 to problems in redundancy elimination when there are common parts
5400 in two computations referring to the different arrays. So we copy
5401 the variable to a new temporary. */
5402 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5403
5404 if (name)
5405 new_name = duplicate_ssa_name (name, copy);
5406 else
5407 {
5408 tree tag = var_ann (var)->type_mem_tag;
5409 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5410 add_referenced_tmp_var (new_ptr);
5411 if (tag)
5412 var_ann (new_ptr)->type_mem_tag = tag;
5413 else
5414 add_type_alias (new_ptr, var);
5415 new_name = make_ssa_name (new_ptr, copy);
5416 }
5417
5418 TREE_OPERAND (copy, 0) = new_name;
5419 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5420 with = new_name;
5421
5422 do_rewrite:
5423
5424 orig = NULL_TREE;
5425 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5426 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5427
5428 if (TREE_CODE (*op) == INDIRECT_REF)
5429 orig = REF_ORIGINAL (*op);
5430 if (!orig)
5431 orig = unshare_and_remove_ssa_names (*op);
5432
5433 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5434
5435 /* Record the original reference, for purposes of alias analysis. */
5436 REF_ORIGINAL (*op) = orig;
5437
5438 /* Virtual operands in the original statement may have to be renamed
5439 because of the replacement. */
5440 mark_new_vars_to_rename (bsi_stmt (*bsi));
5441 }
5442
5443 /* Rewrites USE (address that is an iv) using candidate CAND. */
5444
5445 static void
5446 rewrite_use_address (struct ivopts_data *data,
5447 struct iv_use *use, struct iv_cand *cand)
5448 {
5449 tree comp = unshare_expr (get_computation (data->current_loop,
5450 use, cand));
5451 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5452 tree stmts;
5453 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5454
5455 if (stmts)
5456 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5457
5458 rewrite_address_base (&bsi, use->op_p, op);
5459 }
5460
5461 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5462 candidate CAND. */
5463
5464 static void
5465 rewrite_use_compare (struct ivopts_data *data,
5466 struct iv_use *use, struct iv_cand *cand)
5467 {
5468 tree comp;
5469 tree *op_p, cond, op, stmts, bound;
5470 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5471 enum tree_code compare;
5472 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5473
5474 bound = cp->value;
5475 if (bound)
5476 {
5477 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5478 tree var_type = TREE_TYPE (var);
5479
5480 compare = iv_elimination_compare (data, use);
5481 bound = fold_convert (var_type, bound);
5482 op = force_gimple_operand (unshare_expr (bound), &stmts,
5483 true, NULL_TREE);
5484
5485 if (stmts)
5486 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5487
5488 *use->op_p = build2 (compare, boolean_type_node, var, op);
5489 update_stmt (use->stmt);
5490 return;
5491 }
5492
5493 /* The induction variable elimination failed; just express the original
5494 giv. */
5495 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5496
5497 cond = *use->op_p;
5498 op_p = &TREE_OPERAND (cond, 0);
5499 if (TREE_CODE (*op_p) != SSA_NAME
5500 || zero_p (get_iv (data, *op_p)->step))
5501 op_p = &TREE_OPERAND (cond, 1);
5502
5503 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5504 if (stmts)
5505 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5506
5507 *op_p = op;
5508 }
5509
5510 /* Ensure that operand *OP_P may be used at the end of EXIT without
5511 violating loop closed ssa form. */
5512
5513 static void
5514 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5515 {
5516 basic_block def_bb;
5517 struct loop *def_loop;
5518 tree phi, use;
5519
5520 use = USE_FROM_PTR (op_p);
5521 if (TREE_CODE (use) != SSA_NAME)
5522 return;
5523
5524 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5525 if (!def_bb)
5526 return;
5527
5528 def_loop = def_bb->loop_father;
5529 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5530 return;
5531
5532 /* Try finding a phi node that copies the value out of the loop. */
5533 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5534 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5535 break;
5536
5537 if (!phi)
5538 {
5539 /* Create such a phi node. */
5540 tree new_name = duplicate_ssa_name (use, NULL);
5541
5542 phi = create_phi_node (new_name, exit->dest);
5543 SSA_NAME_DEF_STMT (new_name) = phi;
5544 add_phi_arg (phi, use, exit);
5545 }
5546
5547 SET_USE (op_p, PHI_RESULT (phi));
5548 }
5549
5550 /* Ensure that operands of STMT may be used at the end of EXIT without
5551 violating loop closed ssa form. */
5552
5553 static void
5554 protect_loop_closed_ssa_form (edge exit, tree stmt)
5555 {
5556 ssa_op_iter iter;
5557 use_operand_p use_p;
5558
5559 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5560 protect_loop_closed_ssa_form_use (exit, use_p);
5561 }
5562
5563 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5564 so that they are emitted on the correct place, and so that the loop closed
5565 ssa form is preserved. */
5566
5567 static void
5568 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5569 {
5570 tree_stmt_iterator tsi;
5571 block_stmt_iterator bsi;
5572 tree phi, stmt, def, next;
5573
5574 if (!single_pred_p (exit->dest))
5575 split_loop_exit_edge (exit);
5576
5577 /* Ensure there is label in exit->dest, so that we can
5578 insert after it. */
5579 tree_block_label (exit->dest);
5580 bsi = bsi_after_labels (exit->dest);
5581
5582 if (TREE_CODE (stmts) == STATEMENT_LIST)
5583 {
5584 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5585 {
5586 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5587 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5588 }
5589 }
5590 else
5591 {
5592 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5593 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5594 }
5595
5596 if (!op)
5597 return;
5598
5599 for (phi = phi_nodes (exit->dest); phi; phi = next)
5600 {
5601 next = PHI_CHAIN (phi);
5602
5603 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5604 {
5605 def = PHI_RESULT (phi);
5606 remove_statement (phi, false);
5607 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5608 def, op);
5609 SSA_NAME_DEF_STMT (def) = stmt;
5610 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5611 }
5612 }
5613 }
5614
5615 /* Rewrites the final value of USE (that is only needed outside of the loop)
5616 using candidate CAND. */
5617
5618 static void
5619 rewrite_use_outer (struct ivopts_data *data,
5620 struct iv_use *use, struct iv_cand *cand)
5621 {
5622 edge exit;
5623 tree value, op, stmts, tgt;
5624 tree phi;
5625
5626 switch (TREE_CODE (use->stmt))
5627 {
5628 case PHI_NODE:
5629 tgt = PHI_RESULT (use->stmt);
5630 break;
5631 case MODIFY_EXPR:
5632 tgt = TREE_OPERAND (use->stmt, 0);
5633 break;
5634 default:
5635 gcc_unreachable ();
5636 }
5637
5638 exit = single_dom_exit (data->current_loop);
5639
5640 if (exit)
5641 {
5642 if (!cand->iv)
5643 {
5644 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5645 value = cp->value;
5646 }
5647 else
5648 value = get_computation_at (data->current_loop,
5649 use, cand, last_stmt (exit->src));
5650
5651 value = unshare_expr (value);
5652 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5653
5654 /* If we will preserve the iv anyway and we would need to perform
5655 some computation to replace the final value, do nothing. */
5656 if (stmts && name_info (data, tgt)->preserve_biv)
5657 return;
5658
5659 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5660 {
5661 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5662
5663 if (USE_FROM_PTR (use_p) == tgt)
5664 SET_USE (use_p, op);
5665 }
5666
5667 if (stmts)
5668 compute_phi_arg_on_exit (exit, stmts, op);
5669
5670 /* Enable removal of the statement. We cannot remove it directly,
5671 since we may still need the aliasing information attached to the
5672 ssa name defined by it. */
5673 name_info (data, tgt)->iv->have_use_for = false;
5674 return;
5675 }
5676
5677 /* If the variable is going to be preserved anyway, there is nothing to
5678 do. */
5679 if (name_info (data, tgt)->preserve_biv)
5680 return;
5681
5682 /* Otherwise we just need to compute the iv. */
5683 rewrite_use_nonlinear_expr (data, use, cand);
5684 }
5685
5686 /* Rewrites USE using candidate CAND. */
5687
5688 static void
5689 rewrite_use (struct ivopts_data *data,
5690 struct iv_use *use, struct iv_cand *cand)
5691 {
5692 switch (use->type)
5693 {
5694 case USE_NONLINEAR_EXPR:
5695 rewrite_use_nonlinear_expr (data, use, cand);
5696 break;
5697
5698 case USE_OUTER:
5699 rewrite_use_outer (data, use, cand);
5700 break;
5701
5702 case USE_ADDRESS:
5703 rewrite_use_address (data, use, cand);
5704 break;
5705
5706 case USE_COMPARE:
5707 rewrite_use_compare (data, use, cand);
5708 break;
5709
5710 default:
5711 gcc_unreachable ();
5712 }
5713 update_stmt (use->stmt);
5714 }
5715
5716 /* Rewrite the uses using the selected induction variables. */
5717
5718 static void
5719 rewrite_uses (struct ivopts_data *data)
5720 {
5721 unsigned i;
5722 struct iv_cand *cand;
5723 struct iv_use *use;
5724
5725 for (i = 0; i < n_iv_uses (data); i++)
5726 {
5727 use = iv_use (data, i);
5728 cand = use->selected;
5729 gcc_assert (cand);
5730
5731 rewrite_use (data, use, cand);
5732 }
5733 }
5734
5735 /* Removes the ivs that are not used after rewriting. */
5736
5737 static void
5738 remove_unused_ivs (struct ivopts_data *data)
5739 {
5740 unsigned j;
5741 bitmap_iterator bi;
5742
5743 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5744 {
5745 struct version_info *info;
5746
5747 info = ver_info (data, j);
5748 if (info->iv
5749 && !zero_p (info->iv->step)
5750 && !info->inv_id
5751 && !info->iv->have_use_for
5752 && !info->preserve_biv)
5753 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5754 }
5755 }
5756
5757 /* Frees data allocated by the optimization of a single loop. */
5758
5759 static void
5760 free_loop_data (struct ivopts_data *data)
5761 {
5762 unsigned i, j;
5763 bitmap_iterator bi;
5764 tree obj;
5765
5766 htab_empty (data->niters);
5767
5768 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5769 {
5770 struct version_info *info;
5771
5772 info = ver_info (data, i);
5773 if (info->iv)
5774 free (info->iv);
5775 info->iv = NULL;
5776 info->has_nonlin_use = false;
5777 info->preserve_biv = false;
5778 info->inv_id = 0;
5779 }
5780 bitmap_clear (data->relevant);
5781 bitmap_clear (data->important_candidates);
5782
5783 for (i = 0; i < n_iv_uses (data); i++)
5784 {
5785 struct iv_use *use = iv_use (data, i);
5786
5787 free (use->iv);
5788 BITMAP_FREE (use->related_cands);
5789 for (j = 0; j < use->n_map_members; j++)
5790 if (use->cost_map[j].depends_on)
5791 BITMAP_FREE (use->cost_map[j].depends_on);
5792 free (use->cost_map);
5793 free (use);
5794 }
5795 VEC_truncate (iv_use_p, data->iv_uses, 0);
5796
5797 for (i = 0; i < n_iv_cands (data); i++)
5798 {
5799 struct iv_cand *cand = iv_cand (data, i);
5800
5801 if (cand->iv)
5802 free (cand->iv);
5803 if (cand->depends_on)
5804 BITMAP_FREE (cand->depends_on);
5805 free (cand);
5806 }
5807 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5808
5809 if (data->version_info_size < num_ssa_names)
5810 {
5811 data->version_info_size = 2 * num_ssa_names;
5812 free (data->version_info);
5813 data->version_info = xcalloc (data->version_info_size,
5814 sizeof (struct version_info));
5815 }
5816
5817 data->max_inv_id = 0;
5818
5819 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5820 SET_DECL_RTL (obj, NULL_RTX);
5821
5822 VEC_truncate (tree, decl_rtl_to_reset, 0);
5823 }
5824
5825 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5826 loop tree. */
5827
5828 static void
5829 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5830 {
5831 unsigned i;
5832
5833 for (i = 1; i < loops->num; i++)
5834 if (loops->parray[i])
5835 {
5836 free (loops->parray[i]->aux);
5837 loops->parray[i]->aux = NULL;
5838 }
5839
5840 free_loop_data (data);
5841 free (data->version_info);
5842 BITMAP_FREE (data->relevant);
5843 BITMAP_FREE (data->important_candidates);
5844 htab_delete (data->niters);
5845
5846 VEC_free (tree, heap, decl_rtl_to_reset);
5847 VEC_free (iv_use_p, heap, data->iv_uses);
5848 VEC_free (iv_cand_p, heap, data->iv_candidates);
5849 }
5850
5851 /* Optimizes the LOOP. Returns true if anything changed. */
5852
5853 static bool
5854 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5855 {
5856 bool changed = false;
5857 struct iv_ca *iv_ca;
5858 edge exit;
5859
5860 data->current_loop = loop;
5861
5862 if (dump_file && (dump_flags & TDF_DETAILS))
5863 {
5864 fprintf (dump_file, "Processing loop %d\n", loop->num);
5865
5866 exit = single_dom_exit (loop);
5867 if (exit)
5868 {
5869 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5870 exit->src->index, exit->dest->index);
5871 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5872 fprintf (dump_file, "\n");
5873 }
5874
5875 fprintf (dump_file, "\n");
5876 }
5877
5878 /* For each ssa name determines whether it behaves as an induction variable
5879 in some loop. */
5880 if (!find_induction_variables (data))
5881 goto finish;
5882
5883 /* Finds interesting uses (item 1). */
5884 find_interesting_uses (data);
5885 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5886 goto finish;
5887
5888 /* Finds candidates for the induction variables (item 2). */
5889 find_iv_candidates (data);
5890
5891 /* Calculates the costs (item 3, part 1). */
5892 determine_use_iv_costs (data);
5893 determine_iv_costs (data);
5894 determine_set_costs (data);
5895
5896 /* Find the optimal set of induction variables (item 3, part 2). */
5897 iv_ca = find_optimal_iv_set (data);
5898 if (!iv_ca)
5899 goto finish;
5900 changed = true;
5901
5902 /* Create the new induction variables (item 4, part 1). */
5903 create_new_ivs (data, iv_ca);
5904 iv_ca_free (&iv_ca);
5905
5906 /* Rewrite the uses (item 4, part 2). */
5907 rewrite_uses (data);
5908
5909 /* Remove the ivs that are unused after rewriting. */
5910 remove_unused_ivs (data);
5911
5912 /* We have changed the structure of induction variables; it might happen
5913 that definitions in the scev database refer to some of them that were
5914 eliminated. */
5915 scev_reset ();
5916
5917 finish:
5918 free_loop_data (data);
5919
5920 return changed;
5921 }
5922
5923 /* Main entry point. Optimizes induction variables in LOOPS. */
5924
5925 void
5926 tree_ssa_iv_optimize (struct loops *loops)
5927 {
5928 struct loop *loop;
5929 struct ivopts_data data;
5930
5931 tree_ssa_iv_optimize_init (loops, &data);
5932
5933 /* Optimize the loops starting with the innermost ones. */
5934 loop = loops->tree_root;
5935 while (loop->inner)
5936 loop = loop->inner;
5937
5938 /* Scan the loops, inner ones first. */
5939 while (loop != loops->tree_root)
5940 {
5941 if (dump_file && (dump_flags & TDF_DETAILS))
5942 flow_loop_dump (loop, dump_file, NULL, 1);
5943
5944 tree_ssa_iv_optimize_loop (&data, loop);
5945
5946 if (loop->next)
5947 {
5948 loop = loop->next;
5949 while (loop->inner)
5950 loop = loop->inner;
5951 }
5952 else
5953 loop = loop->outer;
5954 }
5955
5956 /* FIXME. IV opts introduces new aliases and call-clobbered
5957 variables, which need to be renamed. However, when we call the
5958 renamer, not all statements will be scanned for operands. In
5959 particular, the newly introduced aliases may appear in statements
5960 that are considered "unmodified", so the renamer will not get a
5961 chance to rename those operands.
5962
5963 Work around this problem by forcing an operand re-scan on every
5964 statement. This will not be necessary once the new operand
5965 scanner is implemented. */
5966 if (need_ssa_update_p ())
5967 {
5968 basic_block bb;
5969 block_stmt_iterator si;
5970 FOR_EACH_BB (bb)
5971 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5972 update_stmt (bsi_stmt (si));
5973 }
5974
5975 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5976 tree_ssa_iv_optimize_finalize (loops, &data);
5977 }