44314615646ca1e450d2c10f64940f868407a795
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91
92 /* The infinite cost. */
93 #define INFTY 10000000
94
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
98
99
100 /* Representation of the induction variable. */
101 struct iv
102 {
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
110 };
111
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
114 {
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
121 };
122
123 /* Information attached to loop. */
124 struct loop_data
125 {
126 unsigned regs_used; /* Number of registers used. */
127 };
128
129 /* Types of uses. */
130 enum use_type
131 {
132 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
133 USE_OUTER, /* The induction variable is used outside the loop. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
136 };
137
138 /* The candidate - cost pair. */
139 struct cost_pair
140 {
141 struct iv_cand *cand; /* The candidate. */
142 unsigned cost; /* The cost. */
143 bitmap depends_on; /* The list of invariants that have to be
144 preserved. */
145 };
146
147 /* Use. */
148 struct iv_use
149 {
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 important ones. */
157
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
161
162 struct iv_cand *selected;
163 /* The selected candidate. */
164 };
165
166 /* The position where the iv is computed. */
167 enum iv_position
168 {
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
172 };
173
174 /* The induction variable candidate. */
175 struct iv_cand
176 {
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
182 incremented. */
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
190 };
191
192 /* The data used by the induction variable optimizations. */
193
194 struct ivopts_data
195 {
196 /* The currently optimized loop. */
197 struct loop *current_loop;
198
199 /* Numbers of iterations for all exits of the current loop. */
200 htab_t niters;
201
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
204
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
207
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
210
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
213
214 /* The uses of induction variables. */
215 varray_type iv_uses;
216
217 /* The candidates. */
218 varray_type iv_candidates;
219
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
222
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
226 };
227
228 /* An assignment of iv candidates to uses. */
229
230 struct iv_ca
231 {
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
234
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
237
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
240
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
243
244 /* The candidates used. */
245 bitmap cands;
246
247 /* The number of candidates in the set. */
248 unsigned n_cands;
249
250 /* Total number of registers needed. */
251 unsigned n_regs;
252
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
255
256 /* Total cost of candidates. */
257 unsigned cand_cost;
258
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
261
262 /* Total cost of the assignment. */
263 unsigned cost;
264 };
265
266 /* Difference of two iv candidate assignments. */
267
268 struct iv_ca_delta
269 {
270 /* Changed use. */
271 struct iv_use *use;
272
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
275
276 /* A new assignment. */
277 struct cost_pair *new_cp;
278
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
281 };
282
283 /* Bound on number of candidates below that all candidates are considered. */
284
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
287
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
290
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
293
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
296
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
299
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
302
303 static varray_type decl_rtl_to_reset;
304
305 /* Number of uses recorded in DATA. */
306
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
309 {
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
311 }
312
313 /* Ith use recorded in DATA. */
314
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
317 {
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
319 }
320
321 /* Number of candidates recorded in DATA. */
322
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
325 {
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
327 }
328
329 /* Ith candidate recorded in DATA. */
330
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
333 {
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
335 }
336
337 /* The data for LOOP. */
338
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
341 {
342 return loop->aux;
343 }
344
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
346
347 static edge
348 single_dom_exit (struct loop *loop)
349 {
350 edge exit = loop->single_exit;
351
352 if (!exit)
353 return NULL;
354
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
357
358 return exit;
359 }
360
361 /* Dumps information about the induction variable IV to FILE. */
362
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
366 {
367 if (iv->ssa_name)
368 {
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
372 }
373
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
377
378 if (iv->step)
379 {
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
383
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
387 }
388 else
389 {
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 if (iv->base_object)
396 {
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
400 }
401
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
404 }
405
406 /* Dumps information about the USE to FILE. */
407
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
411 {
412 fprintf (file, "use %d\n", use->id);
413
414 switch (use->type)
415 {
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
419
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
423
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
427
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
431
432 default:
433 gcc_unreachable ();
434 }
435
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
439
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
444
445 dump_iv (file, use->iv);
446
447 if (use->related_cands)
448 {
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
451 }
452 }
453
454 /* Dumps information about the uses to FILE. */
455
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
459 {
460 unsigned i;
461 struct iv_use *use;
462
463 for (i = 0; i < n_iv_uses (data); i++)
464 {
465 use = iv_use (data, i);
466
467 dump_use (file, use);
468 fprintf (file, "\n");
469 }
470 }
471
472 /* Dumps information about induction variable candidate CAND to FILE. */
473
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
477 {
478 struct iv *iv = cand->iv;
479
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
482
483 if (!iv)
484 {
485 fprintf (file, " final value replacement\n");
486 return;
487 }
488
489 switch (cand->pos)
490 {
491 case IP_NORMAL:
492 fprintf (file, " incremented before exit test\n");
493 break;
494
495 case IP_END:
496 fprintf (file, " incremented at end\n");
497 break;
498
499 case IP_ORIGINAL:
500 fprintf (file, " original biv\n");
501 break;
502 }
503
504 dump_iv (file, iv);
505 }
506
507 /* Returns the info for ssa version VER. */
508
509 static inline struct version_info *
510 ver_info (struct ivopts_data *data, unsigned ver)
511 {
512 return data->version_info + ver;
513 }
514
515 /* Returns the info for ssa name NAME. */
516
517 static inline struct version_info *
518 name_info (struct ivopts_data *data, tree name)
519 {
520 return ver_info (data, SSA_NAME_VERSION (name));
521 }
522
523 /* Checks whether there exists number X such that X * B = A, counting modulo
524 2^BITS. */
525
526 static bool
527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
528 HOST_WIDE_INT *x)
529 {
530 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
531 unsigned HOST_WIDE_INT inv, ex, val;
532 unsigned i;
533
534 a &= mask;
535 b &= mask;
536
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a & 1) && !(b & 1))
539 {
540 a >>= 1;
541 b >>= 1;
542 bits--;
543 mask >>= 1;
544 }
545
546 if (!(b & 1))
547 {
548 /* If b is still even, a is odd and there is no such x. */
549 return false;
550 }
551
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
554 inv = 1;
555 ex = b;
556 for (i = 0; i < bits - 1; i++)
557 {
558 inv = (inv * ex) & mask;
559 ex = (ex * ex) & mask;
560 }
561
562 val = (a * inv) & mask;
563
564 gcc_assert (((val * b) & mask) == a);
565
566 if ((val >> (bits - 1)) & 1)
567 val |= ~mask;
568
569 *x = val;
570
571 return true;
572 }
573
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
575 emitted in LOOP. */
576
577 static bool
578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
579 {
580 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
581
582 gcc_assert (bb);
583
584 if (sbb == loop->latch)
585 return true;
586
587 if (sbb != bb)
588 return false;
589
590 return stmt == last_stmt (bb);
591 }
592
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
595
596 static bool
597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
598 {
599 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
600 basic_block stmt_bb = bb_for_stmt (stmt);
601 block_stmt_iterator bsi;
602
603 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
604 return false;
605
606 if (stmt_bb != cand_bb)
607 return true;
608
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
612 {
613 if (bsi_stmt (bsi) == cand->incremented_at)
614 return false;
615 if (bsi_stmt (bsi) == stmt)
616 return true;
617 }
618 }
619
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
622
623 static bool
624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
625 {
626 switch (cand->pos)
627 {
628 case IP_END:
629 return false;
630
631 case IP_NORMAL:
632 return stmt_after_ip_normal_pos (loop, stmt);
633
634 case IP_ORIGINAL:
635 return stmt_after_ip_original_pos (cand, stmt);
636
637 default:
638 gcc_unreachable ();
639 }
640 }
641
642 /* Element of the table in that we cache the numbers of iterations obtained
643 from exits of the loop. */
644
645 struct nfe_cache_elt
646 {
647 /* The edge for that the number of iterations is cached. */
648 edge exit;
649
650 /* True if the # of iterations was successfully determined. */
651 bool valid_p;
652
653 /* Description of # of iterations. */
654 struct tree_niter_desc niter;
655 };
656
657 /* Hash function for nfe_cache_elt E. */
658
659 static hashval_t
660 nfe_hash (const void *e)
661 {
662 const struct nfe_cache_elt *elt = e;
663
664 return htab_hash_pointer (elt->exit);
665 }
666
667 /* Equality function for nfe_cache_elt E1 and edge E2. */
668
669 static int
670 nfe_eq (const void *e1, const void *e2)
671 {
672 const struct nfe_cache_elt *elt1 = e1;
673
674 return elt1->exit == e2;
675 }
676
677 /* Returns structure describing number of iterations determined from
678 EXIT of DATA->current_loop, or NULL if something goes wrong. */
679
680 static struct tree_niter_desc *
681 niter_for_exit (struct ivopts_data *data, edge exit)
682 {
683 struct nfe_cache_elt *nfe_desc;
684 PTR *slot;
685
686 slot = htab_find_slot_with_hash (data->niters, exit,
687 htab_hash_pointer (exit),
688 INSERT);
689
690 if (!*slot)
691 {
692 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
693 nfe_desc->exit = exit;
694 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
695 exit, &nfe_desc->niter);
696 *slot = nfe_desc;
697 }
698 else
699 nfe_desc = *slot;
700
701 if (!nfe_desc->valid_p)
702 return NULL;
703
704 return &nfe_desc->niter;
705 }
706
707 /* Returns structure describing number of iterations determined from
708 single dominating exit of DATA->current_loop, or NULL if something
709 goes wrong. */
710
711 static struct tree_niter_desc *
712 niter_for_single_dom_exit (struct ivopts_data *data)
713 {
714 edge exit = single_dom_exit (data->current_loop);
715
716 if (!exit)
717 return NULL;
718
719 return niter_for_exit (data, exit);
720 }
721
722 /* Initializes data structures used by the iv optimization pass, stored
723 in DATA. LOOPS is the loop tree. */
724
725 static void
726 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
727 {
728 unsigned i;
729
730 data->version_info_size = 2 * num_ssa_names;
731 data->version_info = xcalloc (data->version_info_size,
732 sizeof (struct version_info));
733 data->relevant = BITMAP_ALLOC (NULL);
734 data->important_candidates = BITMAP_ALLOC (NULL);
735 data->max_inv_id = 0;
736 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
737
738 for (i = 1; i < loops->num; i++)
739 if (loops->parray[i])
740 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
741
742 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
744 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
745 }
746
747 /* Returns a memory object to that EXPR points. In case we are able to
748 determine that it does not point to any such object, NULL is returned. */
749
750 static tree
751 determine_base_object (tree expr)
752 {
753 enum tree_code code = TREE_CODE (expr);
754 tree base, obj, op0, op1;
755
756 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
757 return NULL_TREE;
758
759 switch (code)
760 {
761 case INTEGER_CST:
762 return NULL_TREE;
763
764 case ADDR_EXPR:
765 obj = TREE_OPERAND (expr, 0);
766 base = get_base_address (obj);
767
768 if (!base)
769 return expr;
770
771 if (TREE_CODE (base) == INDIRECT_REF)
772 return determine_base_object (TREE_OPERAND (base, 0));
773
774 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
775
776 case PLUS_EXPR:
777 case MINUS_EXPR:
778 op0 = determine_base_object (TREE_OPERAND (expr, 0));
779 op1 = determine_base_object (TREE_OPERAND (expr, 1));
780
781 if (!op1)
782 return op0;
783
784 if (!op0)
785 return (code == PLUS_EXPR
786 ? op1
787 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
788
789 return fold (build (code, ptr_type_node, op0, op1));
790
791 case NOP_EXPR:
792 case CONVERT_EXPR:
793 return determine_base_object (TREE_OPERAND (expr, 0));
794
795 default:
796 return fold_convert (ptr_type_node, expr);
797 }
798 }
799
800 /* Allocates an induction variable with given initial value BASE and step STEP
801 for loop LOOP. */
802
803 static struct iv *
804 alloc_iv (tree base, tree step)
805 {
806 struct iv *iv = xcalloc (1, sizeof (struct iv));
807
808 if (step && integer_zerop (step))
809 step = NULL_TREE;
810
811 iv->base = base;
812 iv->base_object = determine_base_object (base);
813 iv->step = step;
814 iv->biv_p = false;
815 iv->have_use_for = false;
816 iv->use_id = 0;
817 iv->ssa_name = NULL_TREE;
818
819 return iv;
820 }
821
822 /* Sets STEP and BASE for induction variable IV. */
823
824 static void
825 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
826 {
827 struct version_info *info = name_info (data, iv);
828
829 gcc_assert (!info->iv);
830
831 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
832 info->iv = alloc_iv (base, step);
833 info->iv->ssa_name = iv;
834 }
835
836 /* Finds induction variable declaration for VAR. */
837
838 static struct iv *
839 get_iv (struct ivopts_data *data, tree var)
840 {
841 basic_block bb;
842
843 if (!name_info (data, var)->iv)
844 {
845 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
846
847 if (!bb
848 || !flow_bb_inside_loop_p (data->current_loop, bb))
849 set_iv (data, var, var, NULL_TREE);
850 }
851
852 return name_info (data, var)->iv;
853 }
854
855 /* Determines the step of a biv defined in PHI. */
856
857 static tree
858 determine_biv_step (tree phi)
859 {
860 struct loop *loop = bb_for_stmt (phi)->loop_father;
861 tree name = PHI_RESULT (phi), base, step;
862 tree type = TREE_TYPE (name);
863
864 if (!is_gimple_reg (name))
865 return NULL_TREE;
866
867 if (!simple_iv (loop, phi, name, &base, &step))
868 return NULL_TREE;
869
870 if (!step)
871 return build_int_cst (type, 0);
872
873 return step;
874 }
875
876 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
877
878 static bool
879 abnormal_ssa_name_p (tree exp)
880 {
881 if (!exp)
882 return false;
883
884 if (TREE_CODE (exp) != SSA_NAME)
885 return false;
886
887 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
888 }
889
890 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
891 abnormal phi node. Callback for for_each_index. */
892
893 static bool
894 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
895 void *data ATTRIBUTE_UNUSED)
896 {
897 if (TREE_CODE (base) == ARRAY_REF)
898 {
899 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
900 return false;
901 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
902 return false;
903 }
904
905 return !abnormal_ssa_name_p (*index);
906 }
907
908 /* Returns true if EXPR contains a ssa name that occurs in an
909 abnormal phi node. */
910
911 static bool
912 contains_abnormal_ssa_name_p (tree expr)
913 {
914 enum tree_code code = TREE_CODE (expr);
915 enum tree_code_class class = TREE_CODE_CLASS (code);
916
917 if (code == SSA_NAME)
918 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
919
920 if (code == INTEGER_CST
921 || is_gimple_min_invariant (expr))
922 return false;
923
924 if (code == ADDR_EXPR)
925 return !for_each_index (&TREE_OPERAND (expr, 0),
926 idx_contains_abnormal_ssa_name_p,
927 NULL);
928
929 switch (class)
930 {
931 case tcc_binary:
932 case tcc_comparison:
933 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
934 return true;
935
936 /* Fallthru. */
937 case tcc_unary:
938 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
939 return true;
940
941 break;
942
943 default:
944 gcc_unreachable ();
945 }
946
947 return false;
948 }
949
950 /* Finds basic ivs. */
951
952 static bool
953 find_bivs (struct ivopts_data *data)
954 {
955 tree phi, step, type, base;
956 bool found = false;
957 struct loop *loop = data->current_loop;
958
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
960 {
961 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
962 continue;
963
964 step = determine_biv_step (phi);
965
966 if (!step)
967 continue;
968 if (cst_and_fits_in_hwi (step)
969 && int_cst_value (step) == 0)
970 continue;
971
972 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
973 if (contains_abnormal_ssa_name_p (base))
974 continue;
975
976 type = TREE_TYPE (PHI_RESULT (phi));
977 base = fold_convert (type, base);
978 step = fold_convert (type, step);
979
980 /* FIXME: We do not handle induction variables whose step does
981 not satisfy cst_and_fits_in_hwi. */
982 if (!cst_and_fits_in_hwi (step))
983 continue;
984
985 set_iv (data, PHI_RESULT (phi), base, step);
986 found = true;
987 }
988
989 return found;
990 }
991
992 /* Marks basic ivs. */
993
994 static void
995 mark_bivs (struct ivopts_data *data)
996 {
997 tree phi, var;
998 struct iv *iv, *incr_iv;
999 struct loop *loop = data->current_loop;
1000 basic_block incr_bb;
1001
1002 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1003 {
1004 iv = get_iv (data, PHI_RESULT (phi));
1005 if (!iv)
1006 continue;
1007
1008 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1009 incr_iv = get_iv (data, var);
1010 if (!incr_iv)
1011 continue;
1012
1013 /* If the increment is in the subloop, ignore it. */
1014 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1015 if (incr_bb->loop_father != data->current_loop
1016 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1017 continue;
1018
1019 iv->biv_p = true;
1020 incr_iv->biv_p = true;
1021 }
1022 }
1023
1024 /* Checks whether STMT defines a linear induction variable and stores its
1025 parameters to BASE and STEP. */
1026
1027 static bool
1028 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1029 tree *base, tree *step)
1030 {
1031 tree lhs;
1032 struct loop *loop = data->current_loop;
1033
1034 *base = NULL_TREE;
1035 *step = NULL_TREE;
1036
1037 if (TREE_CODE (stmt) != MODIFY_EXPR)
1038 return false;
1039
1040 lhs = TREE_OPERAND (stmt, 0);
1041 if (TREE_CODE (lhs) != SSA_NAME)
1042 return false;
1043
1044 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1045 return false;
1046
1047 /* FIXME: We do not handle induction variables whose step does
1048 not satisfy cst_and_fits_in_hwi. */
1049 if (!zero_p (*step)
1050 && !cst_and_fits_in_hwi (*step))
1051 return false;
1052
1053 if (contains_abnormal_ssa_name_p (*base))
1054 return false;
1055
1056 return true;
1057 }
1058
1059 /* Finds general ivs in statement STMT. */
1060
1061 static void
1062 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1063 {
1064 tree base, step;
1065
1066 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1067 return;
1068
1069 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1070 }
1071
1072 /* Finds general ivs in basic block BB. */
1073
1074 static void
1075 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1076 {
1077 block_stmt_iterator bsi;
1078
1079 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1080 find_givs_in_stmt (data, bsi_stmt (bsi));
1081 }
1082
1083 /* Finds general ivs. */
1084
1085 static void
1086 find_givs (struct ivopts_data *data)
1087 {
1088 struct loop *loop = data->current_loop;
1089 basic_block *body = get_loop_body_in_dom_order (loop);
1090 unsigned i;
1091
1092 for (i = 0; i < loop->num_nodes; i++)
1093 find_givs_in_bb (data, body[i]);
1094 free (body);
1095 }
1096
1097 /* For each ssa name defined in LOOP determines whether it is an induction
1098 variable and if so, its initial value and step. */
1099
1100 static bool
1101 find_induction_variables (struct ivopts_data *data)
1102 {
1103 unsigned i;
1104 bitmap_iterator bi;
1105
1106 if (!find_bivs (data))
1107 return false;
1108
1109 find_givs (data);
1110 mark_bivs (data);
1111
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1113 {
1114 struct tree_niter_desc *niter;
1115
1116 niter = niter_for_single_dom_exit (data);
1117
1118 if (niter)
1119 {
1120 fprintf (dump_file, " number of iterations ");
1121 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1122 fprintf (dump_file, "\n");
1123
1124 fprintf (dump_file, " may be zero if ");
1125 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1126 fprintf (dump_file, "\n");
1127 fprintf (dump_file, "\n");
1128 };
1129
1130 fprintf (dump_file, "Induction variables:\n\n");
1131
1132 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1133 {
1134 if (ver_info (data, i)->iv)
1135 dump_iv (dump_file, ver_info (data, i)->iv);
1136 }
1137 }
1138
1139 return true;
1140 }
1141
1142 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1143
1144 static struct iv_use *
1145 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1146 tree stmt, enum use_type use_type)
1147 {
1148 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1149
1150 use->id = n_iv_uses (data);
1151 use->type = use_type;
1152 use->iv = iv;
1153 use->stmt = stmt;
1154 use->op_p = use_p;
1155 use->related_cands = BITMAP_ALLOC (NULL);
1156
1157 /* To avoid showing ssa name in the dumps, if it was not reset by the
1158 caller. */
1159 iv->ssa_name = NULL_TREE;
1160
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 dump_use (dump_file, use);
1163
1164 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1165
1166 return use;
1167 }
1168
1169 /* Checks whether OP is a loop-level invariant and if so, records it.
1170 NONLINEAR_USE is true if the invariant is used in a way we do not
1171 handle specially. */
1172
1173 static void
1174 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1175 {
1176 basic_block bb;
1177 struct version_info *info;
1178
1179 if (TREE_CODE (op) != SSA_NAME
1180 || !is_gimple_reg (op))
1181 return;
1182
1183 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1184 if (bb
1185 && flow_bb_inside_loop_p (data->current_loop, bb))
1186 return;
1187
1188 info = name_info (data, op);
1189 info->name = op;
1190 info->has_nonlin_use |= nonlinear_use;
1191 if (!info->inv_id)
1192 info->inv_id = ++data->max_inv_id;
1193 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1194 }
1195
1196 /* Checks whether the use OP is interesting and if so, records it
1197 as TYPE. */
1198
1199 static struct iv_use *
1200 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1201 enum use_type type)
1202 {
1203 struct iv *iv;
1204 struct iv *civ;
1205 tree stmt;
1206 struct iv_use *use;
1207
1208 if (TREE_CODE (op) != SSA_NAME)
1209 return NULL;
1210
1211 iv = get_iv (data, op);
1212 if (!iv)
1213 return NULL;
1214
1215 if (iv->have_use_for)
1216 {
1217 use = iv_use (data, iv->use_id);
1218
1219 gcc_assert (use->type == USE_NONLINEAR_EXPR
1220 || use->type == USE_OUTER);
1221
1222 if (type == USE_NONLINEAR_EXPR)
1223 use->type = USE_NONLINEAR_EXPR;
1224 return use;
1225 }
1226
1227 if (zero_p (iv->step))
1228 {
1229 record_invariant (data, op, true);
1230 return NULL;
1231 }
1232 iv->have_use_for = true;
1233
1234 civ = xmalloc (sizeof (struct iv));
1235 *civ = *iv;
1236
1237 stmt = SSA_NAME_DEF_STMT (op);
1238 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1239 || TREE_CODE (stmt) == MODIFY_EXPR);
1240
1241 use = record_use (data, NULL, civ, stmt, type);
1242 iv->use_id = use->id;
1243
1244 return use;
1245 }
1246
1247 /* Checks whether the use OP is interesting and if so, records it. */
1248
1249 static struct iv_use *
1250 find_interesting_uses_op (struct ivopts_data *data, tree op)
1251 {
1252 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1253 }
1254
1255 /* Records a definition of induction variable OP that is used outside of the
1256 loop. */
1257
1258 static struct iv_use *
1259 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1260 {
1261 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1262 }
1263
1264 /* Checks whether the condition *COND_P in STMT is interesting
1265 and if so, records it. */
1266
1267 static void
1268 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1269 {
1270 tree *op0_p;
1271 tree *op1_p;
1272 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1273 struct iv const_iv;
1274 tree zero = integer_zero_node;
1275
1276 const_iv.step = NULL_TREE;
1277
1278 if (integer_zerop (*cond_p)
1279 || integer_nonzerop (*cond_p))
1280 return;
1281
1282 if (TREE_CODE (*cond_p) == SSA_NAME)
1283 {
1284 op0_p = cond_p;
1285 op1_p = &zero;
1286 }
1287 else
1288 {
1289 op0_p = &TREE_OPERAND (*cond_p, 0);
1290 op1_p = &TREE_OPERAND (*cond_p, 1);
1291 }
1292
1293 if (TREE_CODE (*op0_p) == SSA_NAME)
1294 iv0 = get_iv (data, *op0_p);
1295 else
1296 iv0 = &const_iv;
1297
1298 if (TREE_CODE (*op1_p) == SSA_NAME)
1299 iv1 = get_iv (data, *op1_p);
1300 else
1301 iv1 = &const_iv;
1302
1303 if (/* When comparing with non-invariant value, we may not do any senseful
1304 induction variable elimination. */
1305 (!iv0 || !iv1)
1306 /* Eliminating condition based on two ivs would be nontrivial.
1307 ??? TODO -- it is not really important to handle this case. */
1308 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1309 {
1310 find_interesting_uses_op (data, *op0_p);
1311 find_interesting_uses_op (data, *op1_p);
1312 return;
1313 }
1314
1315 if (zero_p (iv0->step) && zero_p (iv1->step))
1316 {
1317 /* If both are invariants, this is a work for unswitching. */
1318 return;
1319 }
1320
1321 civ = xmalloc (sizeof (struct iv));
1322 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1323 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1324 }
1325
1326 /* Returns true if expression EXPR is obviously invariant in LOOP,
1327 i.e. if all its operands are defined outside of the LOOP. */
1328
1329 bool
1330 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1331 {
1332 basic_block def_bb;
1333 unsigned i, len;
1334
1335 if (is_gimple_min_invariant (expr))
1336 return true;
1337
1338 if (TREE_CODE (expr) == SSA_NAME)
1339 {
1340 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1341 if (def_bb
1342 && flow_bb_inside_loop_p (loop, def_bb))
1343 return false;
1344
1345 return true;
1346 }
1347
1348 if (!EXPR_P (expr))
1349 return false;
1350
1351 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1352 for (i = 0; i < len; i++)
1353 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1354 return false;
1355
1356 return true;
1357 }
1358
1359 /* Cumulates the steps of indices into DATA and replaces their values with the
1360 initial ones. Returns false when the value of the index cannot be determined.
1361 Callback for for_each_index. */
1362
1363 struct ifs_ivopts_data
1364 {
1365 struct ivopts_data *ivopts_data;
1366 tree stmt;
1367 tree *step_p;
1368 };
1369
1370 static bool
1371 idx_find_step (tree base, tree *idx, void *data)
1372 {
1373 struct ifs_ivopts_data *dta = data;
1374 struct iv *iv;
1375 tree step, type, iv_type, iv_step, lbound, off;
1376 struct loop *loop = dta->ivopts_data->current_loop;
1377
1378 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1379 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1380 return false;
1381
1382 /* If base is a component ref, require that the offset of the reference
1383 be invariant. */
1384 if (TREE_CODE (base) == COMPONENT_REF)
1385 {
1386 off = component_ref_field_offset (base);
1387 return expr_invariant_in_loop_p (loop, off);
1388 }
1389
1390 /* If base is array, first check whether we will be able to move the
1391 reference out of the loop (in order to take its address in strength
1392 reduction). In order for this to work we need both lower bound
1393 and step to be loop invariants. */
1394 if (TREE_CODE (base) == ARRAY_REF)
1395 {
1396 step = array_ref_element_size (base);
1397 lbound = array_ref_low_bound (base);
1398
1399 if (!expr_invariant_in_loop_p (loop, step)
1400 || !expr_invariant_in_loop_p (loop, lbound))
1401 return false;
1402 }
1403
1404 if (TREE_CODE (*idx) != SSA_NAME)
1405 return true;
1406
1407 iv = get_iv (dta->ivopts_data, *idx);
1408 if (!iv)
1409 return false;
1410
1411 *idx = iv->base;
1412
1413 if (!iv->step)
1414 return true;
1415
1416 iv_type = TREE_TYPE (iv->base);
1417 type = build_pointer_type (TREE_TYPE (base));
1418 if (TREE_CODE (base) == ARRAY_REF)
1419 {
1420 step = array_ref_element_size (base);
1421
1422 /* We only handle addresses whose step is an integer constant. */
1423 if (TREE_CODE (step) != INTEGER_CST)
1424 return false;
1425 }
1426 else
1427 /* The step for pointer arithmetics already is 1 byte. */
1428 step = build_int_cst (type, 1);
1429
1430 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1431 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1432 type, iv->base, iv->step, dta->stmt);
1433 else
1434 iv_step = fold_convert (iv_type, iv->step);
1435
1436 if (!iv_step)
1437 {
1438 /* The index might wrap. */
1439 return false;
1440 }
1441
1442 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1443
1444 if (!*dta->step_p)
1445 *dta->step_p = step;
1446 else
1447 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1448 *dta->step_p, step);
1449
1450 return true;
1451 }
1452
1453 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1454 object is passed to it in DATA. */
1455
1456 static bool
1457 idx_record_use (tree base, tree *idx,
1458 void *data)
1459 {
1460 find_interesting_uses_op (data, *idx);
1461 if (TREE_CODE (base) == ARRAY_REF)
1462 {
1463 find_interesting_uses_op (data, array_ref_element_size (base));
1464 find_interesting_uses_op (data, array_ref_low_bound (base));
1465 }
1466 return true;
1467 }
1468
1469 /* Returns true if memory reference REF may be unaligned. */
1470
1471 static bool
1472 may_be_unaligned_p (tree ref)
1473 {
1474 tree base;
1475 tree base_type;
1476 HOST_WIDE_INT bitsize;
1477 HOST_WIDE_INT bitpos;
1478 tree toffset;
1479 enum machine_mode mode;
1480 int unsignedp, volatilep;
1481 unsigned base_align;
1482
1483 /* The test below is basically copy of what expr.c:normal_inner_ref
1484 does to check whether the object must be loaded by parts when
1485 STRICT_ALIGNMENT is true. */
1486 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1487 &unsignedp, &volatilep, true);
1488 base_type = TREE_TYPE (base);
1489 base_align = TYPE_ALIGN (base_type);
1490
1491 if (mode != BLKmode
1492 && (base_align < GET_MODE_ALIGNMENT (mode)
1493 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1494 || bitpos % BITS_PER_UNIT != 0))
1495 return true;
1496
1497 return false;
1498 }
1499
1500 /* Finds addresses in *OP_P inside STMT. */
1501
1502 static void
1503 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1504 {
1505 tree base = unshare_expr (*op_p), step = NULL;
1506 struct iv *civ;
1507 struct ifs_ivopts_data ifs_ivopts_data;
1508
1509 /* Ignore bitfields for now. Not really something terribly complicated
1510 to handle. TODO. */
1511 if (TREE_CODE (base) == COMPONENT_REF
1512 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1513 goto fail;
1514
1515 if (STRICT_ALIGNMENT
1516 && may_be_unaligned_p (base))
1517 goto fail;
1518
1519 ifs_ivopts_data.ivopts_data = data;
1520 ifs_ivopts_data.stmt = stmt;
1521 ifs_ivopts_data.step_p = &step;
1522 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1523 || zero_p (step))
1524 goto fail;
1525
1526 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1527 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1528
1529 if (TREE_CODE (base) == INDIRECT_REF)
1530 base = TREE_OPERAND (base, 0);
1531 else
1532 base = build_addr (base);
1533
1534 civ = alloc_iv (base, step);
1535 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1536 return;
1537
1538 fail:
1539 for_each_index (op_p, idx_record_use, data);
1540 }
1541
1542 /* Finds and records invariants used in STMT. */
1543
1544 static void
1545 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1546 {
1547 use_optype uses = NULL;
1548 unsigned i, n;
1549 tree op;
1550
1551 if (TREE_CODE (stmt) == PHI_NODE)
1552 n = PHI_NUM_ARGS (stmt);
1553 else
1554 {
1555 get_stmt_operands (stmt);
1556 uses = STMT_USE_OPS (stmt);
1557 n = NUM_USES (uses);
1558 }
1559
1560 for (i = 0; i < n; i++)
1561 {
1562 if (TREE_CODE (stmt) == PHI_NODE)
1563 op = PHI_ARG_DEF (stmt, i);
1564 else
1565 op = USE_OP (uses, i);
1566
1567 record_invariant (data, op, false);
1568 }
1569 }
1570
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1572
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575 {
1576 struct iv *iv;
1577 tree op, lhs, rhs;
1578 use_optype uses = NULL;
1579 unsigned i, n;
1580
1581 find_invariants_stmt (data, stmt);
1582
1583 if (TREE_CODE (stmt) == COND_EXPR)
1584 {
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586 return;
1587 }
1588
1589 if (TREE_CODE (stmt) == MODIFY_EXPR)
1590 {
1591 lhs = TREE_OPERAND (stmt, 0);
1592 rhs = TREE_OPERAND (stmt, 1);
1593
1594 if (TREE_CODE (lhs) == SSA_NAME)
1595 {
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1598
1599 iv = get_iv (data, lhs);
1600
1601 if (iv && !zero_p (iv->step))
1602 return;
1603 }
1604
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606 {
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1609 return;
1610
1611 case tcc_reference:
1612 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1613 if (REFERENCE_CLASS_P (lhs))
1614 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1615 return;
1616
1617 default: ;
1618 }
1619
1620 if (REFERENCE_CLASS_P (lhs)
1621 && is_gimple_val (rhs))
1622 {
1623 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1624 find_interesting_uses_op (data, rhs);
1625 return;
1626 }
1627
1628 /* TODO -- we should also handle address uses of type
1629
1630 memory = call (whatever);
1631
1632 and
1633
1634 call (memory). */
1635 }
1636
1637 if (TREE_CODE (stmt) == PHI_NODE
1638 && bb_for_stmt (stmt) == data->current_loop->header)
1639 {
1640 lhs = PHI_RESULT (stmt);
1641 iv = get_iv (data, lhs);
1642
1643 if (iv && !zero_p (iv->step))
1644 return;
1645 }
1646
1647 if (TREE_CODE (stmt) == PHI_NODE)
1648 n = PHI_NUM_ARGS (stmt);
1649 else
1650 {
1651 uses = STMT_USE_OPS (stmt);
1652 n = NUM_USES (uses);
1653 }
1654
1655 for (i = 0; i < n; i++)
1656 {
1657 if (TREE_CODE (stmt) == PHI_NODE)
1658 op = PHI_ARG_DEF (stmt, i);
1659 else
1660 op = USE_OP (uses, i);
1661
1662 if (TREE_CODE (op) != SSA_NAME)
1663 continue;
1664
1665 iv = get_iv (data, op);
1666 if (!iv)
1667 continue;
1668
1669 find_interesting_uses_op (data, op);
1670 }
1671 }
1672
1673 /* Finds interesting uses of induction variables outside of loops
1674 on loop exit edge EXIT. */
1675
1676 static void
1677 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1678 {
1679 tree phi, def;
1680
1681 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1682 {
1683 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1684 find_interesting_uses_outer (data, def);
1685 }
1686 }
1687
1688 /* Finds uses of the induction variables that are interesting. */
1689
1690 static void
1691 find_interesting_uses (struct ivopts_data *data)
1692 {
1693 basic_block bb;
1694 block_stmt_iterator bsi;
1695 tree phi;
1696 basic_block *body = get_loop_body (data->current_loop);
1697 unsigned i;
1698 struct version_info *info;
1699 edge e;
1700
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "Uses:\n\n");
1703
1704 for (i = 0; i < data->current_loop->num_nodes; i++)
1705 {
1706 edge_iterator ei;
1707 bb = body[i];
1708
1709 FOR_EACH_EDGE (e, ei, bb->succs)
1710 if (e->dest != EXIT_BLOCK_PTR
1711 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1712 find_interesting_uses_outside (data, e);
1713
1714 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1715 find_interesting_uses_stmt (data, phi);
1716 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1718 }
1719
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1721 {
1722 bitmap_iterator bi;
1723
1724 fprintf (dump_file, "\n");
1725
1726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1727 {
1728 info = ver_info (data, i);
1729 if (info->inv_id)
1730 {
1731 fprintf (dump_file, " ");
1732 print_generic_expr (dump_file, info->name, TDF_SLIM);
1733 fprintf (dump_file, " is invariant (%d)%s\n",
1734 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1735 }
1736 }
1737
1738 fprintf (dump_file, "\n");
1739 }
1740
1741 free (body);
1742 }
1743
1744 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1745 is true, assume we are inside an address. */
1746
1747 static tree
1748 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1749 {
1750 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1751 enum tree_code code;
1752 tree type, orig_type = TREE_TYPE (expr);
1753 unsigned HOST_WIDE_INT off0, off1, st;
1754 tree orig_expr = expr;
1755
1756 STRIP_NOPS (expr);
1757 type = TREE_TYPE (expr);
1758 code = TREE_CODE (expr);
1759 *offset = 0;
1760
1761 switch (code)
1762 {
1763 case INTEGER_CST:
1764 if (!cst_and_fits_in_hwi (expr)
1765 || zero_p (expr))
1766 return orig_expr;
1767
1768 *offset = int_cst_value (expr);
1769 return build_int_cst_type (orig_type, 0);
1770
1771 case PLUS_EXPR:
1772 case MINUS_EXPR:
1773 op0 = TREE_OPERAND (expr, 0);
1774 op1 = TREE_OPERAND (expr, 1);
1775
1776 op0 = strip_offset (op0, false, &off0);
1777 op1 = strip_offset (op1, false, &off1);
1778
1779 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1780 if (op0 == TREE_OPERAND (expr, 0)
1781 && op1 == TREE_OPERAND (expr, 1))
1782 return orig_expr;
1783
1784 if (zero_p (op1))
1785 expr = op0;
1786 else if (zero_p (op0))
1787 {
1788 if (code == PLUS_EXPR)
1789 expr = op1;
1790 else
1791 expr = build1 (NEGATE_EXPR, type, op1);
1792 }
1793 else
1794 expr = build2 (code, type, op0, op1);
1795
1796 return fold_convert (orig_type, expr);
1797
1798 case ARRAY_REF:
1799 if (!inside_addr)
1800 return orig_expr;
1801
1802 step = array_ref_element_size (expr);
1803 if (!cst_and_fits_in_hwi (step))
1804 break;
1805
1806 st = int_cst_value (step);
1807 op1 = TREE_OPERAND (expr, 1);
1808 op1 = strip_offset (op1, false, &off1);
1809 *offset = off1 * st;
1810 break;
1811
1812 case COMPONENT_REF:
1813 if (!inside_addr)
1814 return orig_expr;
1815 break;
1816
1817 case ADDR_EXPR:
1818 inside_addr = true;
1819 break;
1820
1821 default:
1822 return orig_expr;
1823 }
1824
1825 /* Default handling of expressions for that we want to recurse into
1826 the first operand. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset (op0, inside_addr, &off0);
1829 *offset += off0;
1830
1831 if (op0 == TREE_OPERAND (expr, 0)
1832 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1833 return orig_expr;
1834
1835 expr = copy_node (expr);
1836 TREE_OPERAND (expr, 0) = op0;
1837 if (op1)
1838 TREE_OPERAND (expr, 1) = op1;
1839
1840 return fold_convert (orig_type, expr);
1841 }
1842
1843 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1844 position to POS. If USE is not NULL, the candidate is set as related to
1845 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1846 replacement of the final value of the iv by a direct computation. */
1847
1848 static struct iv_cand *
1849 add_candidate_1 (struct ivopts_data *data,
1850 tree base, tree step, bool important, enum iv_position pos,
1851 struct iv_use *use, tree incremented_at)
1852 {
1853 unsigned i;
1854 struct iv_cand *cand = NULL;
1855 tree type;
1856
1857 if (base)
1858 {
1859 type = TREE_TYPE (base);
1860 if (!TYPE_UNSIGNED (type))
1861 {
1862 type = unsigned_type_for (type);
1863 base = fold_convert (type, base);
1864 if (step)
1865 step = fold_convert (type, step);
1866 }
1867 }
1868
1869 for (i = 0; i < n_iv_cands (data); i++)
1870 {
1871 cand = iv_cand (data, i);
1872
1873 if (cand->pos != pos)
1874 continue;
1875
1876 if (cand->incremented_at != incremented_at)
1877 continue;
1878
1879 if (!cand->iv)
1880 {
1881 if (!base && !step)
1882 break;
1883
1884 continue;
1885 }
1886
1887 if (!base && !step)
1888 continue;
1889
1890 if (!operand_equal_p (base, cand->iv->base, 0))
1891 continue;
1892
1893 if (zero_p (cand->iv->step))
1894 {
1895 if (zero_p (step))
1896 break;
1897 }
1898 else
1899 {
1900 if (step && operand_equal_p (step, cand->iv->step, 0))
1901 break;
1902 }
1903 }
1904
1905 if (i == n_iv_cands (data))
1906 {
1907 cand = xcalloc (1, sizeof (struct iv_cand));
1908 cand->id = i;
1909
1910 if (!base && !step)
1911 cand->iv = NULL;
1912 else
1913 cand->iv = alloc_iv (base, step);
1914
1915 cand->pos = pos;
1916 if (pos != IP_ORIGINAL && cand->iv)
1917 {
1918 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1919 cand->var_after = cand->var_before;
1920 }
1921 cand->important = important;
1922 cand->incremented_at = incremented_at;
1923 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1924
1925 if (dump_file && (dump_flags & TDF_DETAILS))
1926 dump_cand (dump_file, cand);
1927 }
1928
1929 if (important && !cand->important)
1930 {
1931 cand->important = true;
1932 if (dump_file && (dump_flags & TDF_DETAILS))
1933 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1934 }
1935
1936 if (use)
1937 {
1938 bitmap_set_bit (use->related_cands, i);
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 fprintf (dump_file, "Candidate %d is related to use %d\n",
1941 cand->id, use->id);
1942 }
1943
1944 return cand;
1945 }
1946
1947 /* Returns true if incrementing the induction variable at the end of the LOOP
1948 is allowed.
1949
1950 The purpose is to avoid splitting latch edge with a biv increment, thus
1951 creating a jump, possibly confusing other optimization passes and leaving
1952 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1953 is not available (so we do not have a better alternative), or if the latch
1954 edge is already nonempty. */
1955
1956 static bool
1957 allow_ip_end_pos_p (struct loop *loop)
1958 {
1959 if (!ip_normal_pos (loop))
1960 return true;
1961
1962 if (!empty_block_p (ip_end_pos (loop)))
1963 return true;
1964
1965 return false;
1966 }
1967
1968 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1969 position to POS. If USE is not NULL, the candidate is set as related to
1970 it. The candidate computation is scheduled on all available positions. */
1971
1972 static void
1973 add_candidate (struct ivopts_data *data,
1974 tree base, tree step, bool important, struct iv_use *use)
1975 {
1976 if (ip_normal_pos (data->current_loop))
1977 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1978 if (ip_end_pos (data->current_loop)
1979 && allow_ip_end_pos_p (data->current_loop))
1980 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1981 }
1982
1983 /* Adds standard iv candidates. */
1984
1985 static void
1986 add_standard_iv_candidates (struct ivopts_data *data)
1987 {
1988 /* Add 0 + 1 * iteration candidate. */
1989 add_candidate (data,
1990 build_int_cst (unsigned_intSI_type_node, 0),
1991 build_int_cst (unsigned_intSI_type_node, 1),
1992 true, NULL);
1993
1994 /* The same for a long type if it is still fast enough. */
1995 if (BITS_PER_WORD > 32)
1996 add_candidate (data,
1997 build_int_cst (unsigned_intDI_type_node, 0),
1998 build_int_cst (unsigned_intDI_type_node, 1),
1999 true, NULL);
2000 }
2001
2002
2003 /* Adds candidates bases on the old induction variable IV. */
2004
2005 static void
2006 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2007 {
2008 tree phi, def;
2009 struct iv_cand *cand;
2010
2011 add_candidate (data, iv->base, iv->step, true, NULL);
2012
2013 /* The same, but with initial value zero. */
2014 add_candidate (data,
2015 build_int_cst (TREE_TYPE (iv->base), 0),
2016 iv->step, true, NULL);
2017
2018 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2019 if (TREE_CODE (phi) == PHI_NODE)
2020 {
2021 /* Additionally record the possibility of leaving the original iv
2022 untouched. */
2023 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2024 cand = add_candidate_1 (data,
2025 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2026 SSA_NAME_DEF_STMT (def));
2027 cand->var_before = iv->ssa_name;
2028 cand->var_after = def;
2029 }
2030 }
2031
2032 /* Adds candidates based on the old induction variables. */
2033
2034 static void
2035 add_old_ivs_candidates (struct ivopts_data *data)
2036 {
2037 unsigned i;
2038 struct iv *iv;
2039 bitmap_iterator bi;
2040
2041 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2042 {
2043 iv = ver_info (data, i)->iv;
2044 if (iv && iv->biv_p && !zero_p (iv->step))
2045 add_old_iv_candidates (data, iv);
2046 }
2047 }
2048
2049 /* Adds candidates based on the value of the induction variable IV and USE. */
2050
2051 static void
2052 add_iv_value_candidates (struct ivopts_data *data,
2053 struct iv *iv, struct iv_use *use)
2054 {
2055 add_candidate (data, iv->base, iv->step, false, use);
2056
2057 /* The same, but with initial value zero. */
2058 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2059 iv->step, false, use);
2060 }
2061
2062 /* Adds candidates based on the address IV and USE. */
2063
2064 static void
2065 add_address_candidates (struct ivopts_data *data,
2066 struct iv *iv, struct iv_use *use)
2067 {
2068 tree base, abase;
2069 unsigned HOST_WIDE_INT offset;
2070
2071 /* First, the trivial choices. */
2072 add_iv_value_candidates (data, iv, use);
2073
2074 /* Second, try removing the COMPONENT_REFs. */
2075 if (TREE_CODE (iv->base) == ADDR_EXPR)
2076 {
2077 base = TREE_OPERAND (iv->base, 0);
2078 while (TREE_CODE (base) == COMPONENT_REF
2079 || (TREE_CODE (base) == ARRAY_REF
2080 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2081 base = TREE_OPERAND (base, 0);
2082
2083 if (base != TREE_OPERAND (iv->base, 0))
2084 {
2085 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2086 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2087
2088 if (TREE_CODE (base) == INDIRECT_REF)
2089 base = TREE_OPERAND (base, 0);
2090 else
2091 base = build_addr (base);
2092 add_candidate (data, base, iv->step, false, use);
2093 }
2094 }
2095
2096 /* Third, try removing the constant offset. */
2097 abase = iv->base;
2098 base = strip_offset (abase, false, &offset);
2099 if (offset)
2100 add_candidate (data, base, iv->step, false, use);
2101 }
2102
2103 /* Possibly adds pseudocandidate for replacing the final value of USE by
2104 a direct computation. */
2105
2106 static void
2107 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2108 {
2109 struct tree_niter_desc *niter;
2110
2111 /* We must know where we exit the loop and how many times does it roll. */
2112 niter = niter_for_single_dom_exit (data);
2113 if (!niter
2114 || !zero_p (niter->may_be_zero))
2115 return;
2116
2117 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2118 }
2119
2120 /* Adds candidates based on the uses. */
2121
2122 static void
2123 add_derived_ivs_candidates (struct ivopts_data *data)
2124 {
2125 unsigned i;
2126
2127 for (i = 0; i < n_iv_uses (data); i++)
2128 {
2129 struct iv_use *use = iv_use (data, i);
2130
2131 if (!use)
2132 continue;
2133
2134 switch (use->type)
2135 {
2136 case USE_NONLINEAR_EXPR:
2137 case USE_COMPARE:
2138 /* Just add the ivs based on the value of the iv used here. */
2139 add_iv_value_candidates (data, use->iv, use);
2140 break;
2141
2142 case USE_OUTER:
2143 add_iv_value_candidates (data, use->iv, use);
2144
2145 /* Additionally, add the pseudocandidate for the possibility to
2146 replace the final value by a direct computation. */
2147 add_iv_outer_candidates (data, use);
2148 break;
2149
2150 case USE_ADDRESS:
2151 add_address_candidates (data, use->iv, use);
2152 break;
2153
2154 default:
2155 gcc_unreachable ();
2156 }
2157 }
2158 }
2159
2160 /* Record important candidates and add them to related_cands bitmaps
2161 if needed. */
2162
2163 static void
2164 record_important_candidates (struct ivopts_data *data)
2165 {
2166 unsigned i;
2167 struct iv_use *use;
2168
2169 for (i = 0; i < n_iv_cands (data); i++)
2170 {
2171 struct iv_cand *cand = iv_cand (data, i);
2172
2173 if (cand->important)
2174 bitmap_set_bit (data->important_candidates, i);
2175 }
2176
2177 data->consider_all_candidates = (n_iv_cands (data)
2178 <= CONSIDER_ALL_CANDIDATES_BOUND);
2179
2180 if (data->consider_all_candidates)
2181 {
2182 /* We will not need "related_cands" bitmaps in this case,
2183 so release them to decrease peak memory consumption. */
2184 for (i = 0; i < n_iv_uses (data); i++)
2185 {
2186 use = iv_use (data, i);
2187 BITMAP_FREE (use->related_cands);
2188 }
2189 }
2190 else
2191 {
2192 /* Add important candidates to the related_cands bitmaps. */
2193 for (i = 0; i < n_iv_uses (data); i++)
2194 bitmap_ior_into (iv_use (data, i)->related_cands,
2195 data->important_candidates);
2196 }
2197 }
2198
2199 /* Finds the candidates for the induction variables. */
2200
2201 static void
2202 find_iv_candidates (struct ivopts_data *data)
2203 {
2204 /* Add commonly used ivs. */
2205 add_standard_iv_candidates (data);
2206
2207 /* Add old induction variables. */
2208 add_old_ivs_candidates (data);
2209
2210 /* Add induction variables derived from uses. */
2211 add_derived_ivs_candidates (data);
2212
2213 /* Record the important candidates. */
2214 record_important_candidates (data);
2215 }
2216
2217 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2218 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2219 we allocate a simple list to every use. */
2220
2221 static void
2222 alloc_use_cost_map (struct ivopts_data *data)
2223 {
2224 unsigned i, size, s, j;
2225
2226 for (i = 0; i < n_iv_uses (data); i++)
2227 {
2228 struct iv_use *use = iv_use (data, i);
2229 bitmap_iterator bi;
2230
2231 if (data->consider_all_candidates)
2232 size = n_iv_cands (data);
2233 else
2234 {
2235 s = 0;
2236 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2237 {
2238 s++;
2239 }
2240
2241 /* Round up to the power of two, so that moduling by it is fast. */
2242 for (size = 1; size < s; size <<= 1)
2243 continue;
2244 }
2245
2246 use->n_map_members = size;
2247 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2248 }
2249 }
2250
2251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2252 on invariants DEPENDS_ON. */
2253
2254 static void
2255 set_use_iv_cost (struct ivopts_data *data,
2256 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2257 bitmap depends_on)
2258 {
2259 unsigned i, s;
2260
2261 if (cost == INFTY)
2262 {
2263 BITMAP_FREE (depends_on);
2264 return;
2265 }
2266
2267 if (data->consider_all_candidates)
2268 {
2269 use->cost_map[cand->id].cand = cand;
2270 use->cost_map[cand->id].cost = cost;
2271 use->cost_map[cand->id].depends_on = depends_on;
2272 return;
2273 }
2274
2275 /* n_map_members is a power of two, so this computes modulo. */
2276 s = cand->id & (use->n_map_members - 1);
2277 for (i = s; i < use->n_map_members; i++)
2278 if (!use->cost_map[i].cand)
2279 goto found;
2280 for (i = 0; i < s; i++)
2281 if (!use->cost_map[i].cand)
2282 goto found;
2283
2284 gcc_unreachable ();
2285
2286 found:
2287 use->cost_map[i].cand = cand;
2288 use->cost_map[i].cost = cost;
2289 use->cost_map[i].depends_on = depends_on;
2290 }
2291
2292 /* Gets cost of (USE, CANDIDATE) pair. */
2293
2294 static struct cost_pair *
2295 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2296 struct iv_cand *cand)
2297 {
2298 unsigned i, s;
2299 struct cost_pair *ret;
2300
2301 if (!cand)
2302 return NULL;
2303
2304 if (data->consider_all_candidates)
2305 {
2306 ret = use->cost_map + cand->id;
2307 if (!ret->cand)
2308 return NULL;
2309
2310 return ret;
2311 }
2312
2313 /* n_map_members is a power of two, so this computes modulo. */
2314 s = cand->id & (use->n_map_members - 1);
2315 for (i = s; i < use->n_map_members; i++)
2316 if (use->cost_map[i].cand == cand)
2317 return use->cost_map + i;
2318
2319 for (i = 0; i < s; i++)
2320 if (use->cost_map[i].cand == cand)
2321 return use->cost_map + i;
2322
2323 return NULL;
2324 }
2325
2326 /* Returns estimate on cost of computing SEQ. */
2327
2328 static unsigned
2329 seq_cost (rtx seq)
2330 {
2331 unsigned cost = 0;
2332 rtx set;
2333
2334 for (; seq; seq = NEXT_INSN (seq))
2335 {
2336 set = single_set (seq);
2337 if (set)
2338 cost += rtx_cost (set, SET);
2339 else
2340 cost++;
2341 }
2342
2343 return cost;
2344 }
2345
2346 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2347 static rtx
2348 produce_memory_decl_rtl (tree obj, int *regno)
2349 {
2350 rtx x;
2351 if (!obj)
2352 abort ();
2353 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2354 {
2355 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2356 x = gen_rtx_SYMBOL_REF (Pmode, name);
2357 }
2358 else
2359 x = gen_raw_REG (Pmode, (*regno)++);
2360
2361 return gen_rtx_MEM (DECL_MODE (obj), x);
2362 }
2363
2364 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2365 walk_tree. DATA contains the actual fake register number. */
2366
2367 static tree
2368 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2369 {
2370 tree obj = NULL_TREE;
2371 rtx x = NULL_RTX;
2372 int *regno = data;
2373
2374 switch (TREE_CODE (*expr_p))
2375 {
2376 case ADDR_EXPR:
2377 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2378 handled_component_p (*expr_p);
2379 expr_p = &TREE_OPERAND (*expr_p, 0))
2380 continue;
2381 obj = *expr_p;
2382 if (DECL_P (obj))
2383 x = produce_memory_decl_rtl (obj, regno);
2384 break;
2385
2386 case SSA_NAME:
2387 *ws = 0;
2388 obj = SSA_NAME_VAR (*expr_p);
2389 if (!DECL_RTL_SET_P (obj))
2390 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2391 break;
2392
2393 case VAR_DECL:
2394 case PARM_DECL:
2395 case RESULT_DECL:
2396 *ws = 0;
2397 obj = *expr_p;
2398
2399 if (DECL_RTL_SET_P (obj))
2400 break;
2401
2402 if (DECL_MODE (obj) == BLKmode)
2403 x = produce_memory_decl_rtl (obj, regno);
2404 else
2405 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2406
2407 break;
2408
2409 default:
2410 break;
2411 }
2412
2413 if (x)
2414 {
2415 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2416 SET_DECL_RTL (obj, x);
2417 }
2418
2419 return NULL_TREE;
2420 }
2421
2422 /* Determines cost of the computation of EXPR. */
2423
2424 static unsigned
2425 computation_cost (tree expr)
2426 {
2427 rtx seq, rslt;
2428 tree type = TREE_TYPE (expr);
2429 unsigned cost;
2430 /* Avoid using hard regs in ways which may be unsupported. */
2431 int regno = LAST_VIRTUAL_REGISTER + 1;
2432
2433 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2434 start_sequence ();
2435 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2436 seq = get_insns ();
2437 end_sequence ();
2438
2439 cost = seq_cost (seq);
2440 if (GET_CODE (rslt) == MEM)
2441 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2442
2443 return cost;
2444 }
2445
2446 /* Returns variable containing the value of candidate CAND at statement AT. */
2447
2448 static tree
2449 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2450 {
2451 if (stmt_after_increment (loop, cand, stmt))
2452 return cand->var_after;
2453 else
2454 return cand->var_before;
2455 }
2456
2457 /* Determines the expression by that USE is expressed from induction variable
2458 CAND at statement AT in LOOP. */
2459
2460 static tree
2461 get_computation_at (struct loop *loop,
2462 struct iv_use *use, struct iv_cand *cand, tree at)
2463 {
2464 tree ubase = use->iv->base;
2465 tree ustep = use->iv->step;
2466 tree cbase = cand->iv->base;
2467 tree cstep = cand->iv->step;
2468 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2469 tree uutype;
2470 tree expr, delta;
2471 tree ratio;
2472 unsigned HOST_WIDE_INT ustepi, cstepi;
2473 HOST_WIDE_INT ratioi;
2474
2475 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2476 {
2477 /* We do not have a precision to express the values of use. */
2478 return NULL_TREE;
2479 }
2480
2481 expr = var_at_stmt (loop, cand, at);
2482
2483 if (TREE_TYPE (expr) != ctype)
2484 {
2485 /* This may happen with the original ivs. */
2486 expr = fold_convert (ctype, expr);
2487 }
2488
2489 if (TYPE_UNSIGNED (utype))
2490 uutype = utype;
2491 else
2492 {
2493 uutype = unsigned_type_for (utype);
2494 ubase = fold_convert (uutype, ubase);
2495 ustep = fold_convert (uutype, ustep);
2496 }
2497
2498 if (uutype != ctype)
2499 {
2500 expr = fold_convert (uutype, expr);
2501 cbase = fold_convert (uutype, cbase);
2502 cstep = fold_convert (uutype, cstep);
2503 }
2504
2505 if (!cst_and_fits_in_hwi (cstep)
2506 || !cst_and_fits_in_hwi (ustep))
2507 return NULL_TREE;
2508
2509 ustepi = int_cst_value (ustep);
2510 cstepi = int_cst_value (cstep);
2511
2512 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2513 {
2514 /* TODO maybe consider case when ustep divides cstep and the ratio is
2515 a power of 2 (so that the division is fast to execute)? We would
2516 need to be much more careful with overflows etc. then. */
2517 return NULL_TREE;
2518 }
2519
2520 /* We may need to shift the value if we are after the increment. */
2521 if (stmt_after_increment (loop, cand, at))
2522 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2523
2524 /* use = ubase - ratio * cbase + ratio * var.
2525
2526 In general case ubase + ratio * (var - cbase) could be better (one less
2527 multiplication), but often it is possible to eliminate redundant parts
2528 of computations from (ubase - ratio * cbase) term, and if it does not
2529 happen, fold is able to apply the distributive law to obtain this form
2530 anyway. */
2531
2532 if (ratioi == 1)
2533 {
2534 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2535 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2536 }
2537 else if (ratioi == -1)
2538 {
2539 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2540 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2541 }
2542 else
2543 {
2544 ratio = build_int_cst_type (uutype, ratioi);
2545 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2546 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2547 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2548 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2549 }
2550
2551 return fold_convert (utype, expr);
2552 }
2553
2554 /* Determines the expression by that USE is expressed from induction variable
2555 CAND in LOOP. */
2556
2557 static tree
2558 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2559 {
2560 return get_computation_at (loop, use, cand, use->stmt);
2561 }
2562
2563 /* Returns cost of addition in MODE. */
2564
2565 static unsigned
2566 add_cost (enum machine_mode mode)
2567 {
2568 static unsigned costs[NUM_MACHINE_MODES];
2569 rtx seq;
2570 unsigned cost;
2571
2572 if (costs[mode])
2573 return costs[mode];
2574
2575 start_sequence ();
2576 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2577 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2578 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2579 NULL_RTX);
2580 seq = get_insns ();
2581 end_sequence ();
2582
2583 cost = seq_cost (seq);
2584 if (!cost)
2585 cost = 1;
2586
2587 costs[mode] = cost;
2588
2589 if (dump_file && (dump_flags & TDF_DETAILS))
2590 fprintf (dump_file, "Addition in %s costs %d\n",
2591 GET_MODE_NAME (mode), cost);
2592 return cost;
2593 }
2594
2595 /* Entry in a hashtable of already known costs for multiplication. */
2596 struct mbc_entry
2597 {
2598 HOST_WIDE_INT cst; /* The constant to multiply by. */
2599 enum machine_mode mode; /* In mode. */
2600 unsigned cost; /* The cost. */
2601 };
2602
2603 /* Counts hash value for the ENTRY. */
2604
2605 static hashval_t
2606 mbc_entry_hash (const void *entry)
2607 {
2608 const struct mbc_entry *e = entry;
2609
2610 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2611 }
2612
2613 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2614
2615 static int
2616 mbc_entry_eq (const void *entry1, const void *entry2)
2617 {
2618 const struct mbc_entry *e1 = entry1;
2619 const struct mbc_entry *e2 = entry2;
2620
2621 return (e1->mode == e2->mode
2622 && e1->cst == e2->cst);
2623 }
2624
2625 /* Returns cost of multiplication by constant CST in MODE. */
2626
2627 static unsigned
2628 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2629 {
2630 static htab_t costs;
2631 struct mbc_entry **cached, act;
2632 rtx seq;
2633 unsigned cost;
2634
2635 if (!costs)
2636 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2637
2638 act.mode = mode;
2639 act.cst = cst;
2640 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2641 if (*cached)
2642 return (*cached)->cost;
2643
2644 *cached = xmalloc (sizeof (struct mbc_entry));
2645 (*cached)->mode = mode;
2646 (*cached)->cst = cst;
2647
2648 start_sequence ();
2649 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2650 NULL_RTX, 0);
2651 seq = get_insns ();
2652 end_sequence ();
2653
2654 cost = seq_cost (seq);
2655
2656 if (dump_file && (dump_flags & TDF_DETAILS))
2657 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2658 (int) cst, GET_MODE_NAME (mode), cost);
2659
2660 (*cached)->cost = cost;
2661
2662 return cost;
2663 }
2664
2665 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2666 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2667 variable is omitted. The created memory accesses MODE.
2668
2669 TODO -- there must be some better way. This all is quite crude. */
2670
2671 static unsigned
2672 get_address_cost (bool symbol_present, bool var_present,
2673 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2674 {
2675 #define MAX_RATIO 128
2676 static sbitmap valid_mult;
2677 static HOST_WIDE_INT rat, off;
2678 static HOST_WIDE_INT min_offset, max_offset;
2679 static unsigned costs[2][2][2][2];
2680 unsigned cost, acost;
2681 rtx seq, addr, base;
2682 bool offset_p, ratio_p;
2683 rtx reg1;
2684 HOST_WIDE_INT s_offset;
2685 unsigned HOST_WIDE_INT mask;
2686 unsigned bits;
2687
2688 if (!valid_mult)
2689 {
2690 HOST_WIDE_INT i;
2691
2692 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2693
2694 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2695 for (i = 1; i <= 1 << 20; i <<= 1)
2696 {
2697 XEXP (addr, 1) = GEN_INT (i);
2698 if (!memory_address_p (Pmode, addr))
2699 break;
2700 }
2701 max_offset = i >> 1;
2702 off = max_offset;
2703
2704 for (i = 1; i <= 1 << 20; i <<= 1)
2705 {
2706 XEXP (addr, 1) = GEN_INT (-i);
2707 if (!memory_address_p (Pmode, addr))
2708 break;
2709 }
2710 min_offset = -(i >> 1);
2711
2712 if (dump_file && (dump_flags & TDF_DETAILS))
2713 {
2714 fprintf (dump_file, "get_address_cost:\n");
2715 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2716 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2717 }
2718
2719 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2720 sbitmap_zero (valid_mult);
2721 rat = 1;
2722 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2723 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2724 {
2725 XEXP (addr, 1) = GEN_INT (i);
2726 if (memory_address_p (Pmode, addr))
2727 {
2728 SET_BIT (valid_mult, i + MAX_RATIO);
2729 rat = i;
2730 }
2731 }
2732
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 {
2735 fprintf (dump_file, " allowed multipliers:");
2736 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2737 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2738 fprintf (dump_file, " %d", (int) i);
2739 fprintf (dump_file, "\n");
2740 fprintf (dump_file, "\n");
2741 }
2742 }
2743
2744 bits = GET_MODE_BITSIZE (Pmode);
2745 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2746 offset &= mask;
2747 if ((offset >> (bits - 1) & 1))
2748 offset |= ~mask;
2749 s_offset = offset;
2750
2751 cost = 0;
2752 offset_p = (s_offset != 0
2753 && min_offset <= s_offset && s_offset <= max_offset);
2754 ratio_p = (ratio != 1
2755 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2756 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2757
2758 if (ratio != 1 && !ratio_p)
2759 cost += multiply_by_cost (ratio, Pmode);
2760
2761 if (s_offset && !offset_p && !symbol_present)
2762 {
2763 cost += add_cost (Pmode);
2764 var_present = true;
2765 }
2766
2767 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2768 if (!acost)
2769 {
2770 acost = 0;
2771
2772 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2773 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2774 if (ratio_p)
2775 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2776
2777 if (var_present)
2778 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2779
2780 if (symbol_present)
2781 {
2782 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2783 if (offset_p)
2784 base = gen_rtx_fmt_e (CONST, Pmode,
2785 gen_rtx_fmt_ee (PLUS, Pmode,
2786 base,
2787 GEN_INT (off)));
2788 }
2789 else if (offset_p)
2790 base = GEN_INT (off);
2791 else
2792 base = NULL_RTX;
2793
2794 if (base)
2795 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2796
2797 start_sequence ();
2798 addr = memory_address (Pmode, addr);
2799 seq = get_insns ();
2800 end_sequence ();
2801
2802 acost = seq_cost (seq);
2803 acost += address_cost (addr, Pmode);
2804
2805 if (!acost)
2806 acost = 1;
2807 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2808 }
2809
2810 return cost + acost;
2811 }
2812
2813 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2814 the bitmap to that we should store it. */
2815
2816 static struct ivopts_data *fd_ivopts_data;
2817 static tree
2818 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2819 {
2820 bitmap *depends_on = data;
2821 struct version_info *info;
2822
2823 if (TREE_CODE (*expr_p) != SSA_NAME)
2824 return NULL_TREE;
2825 info = name_info (fd_ivopts_data, *expr_p);
2826
2827 if (!info->inv_id || info->has_nonlin_use)
2828 return NULL_TREE;
2829
2830 if (!*depends_on)
2831 *depends_on = BITMAP_ALLOC (NULL);
2832 bitmap_set_bit (*depends_on, info->inv_id);
2833
2834 return NULL_TREE;
2835 }
2836
2837 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2838 invariants the computation depends on. */
2839
2840 static unsigned
2841 force_var_cost (struct ivopts_data *data,
2842 tree expr, bitmap *depends_on)
2843 {
2844 static bool costs_initialized = false;
2845 static unsigned integer_cost;
2846 static unsigned symbol_cost;
2847 static unsigned address_cost;
2848 tree op0, op1;
2849 unsigned cost0, cost1, cost;
2850 enum machine_mode mode;
2851
2852 if (!costs_initialized)
2853 {
2854 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2855 rtx x = gen_rtx_MEM (DECL_MODE (var),
2856 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2857 tree addr;
2858 tree type = build_pointer_type (integer_type_node);
2859
2860 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2861 2000));
2862
2863 SET_DECL_RTL (var, x);
2864 TREE_STATIC (var) = 1;
2865 addr = build1 (ADDR_EXPR, type, var);
2866 symbol_cost = computation_cost (addr) + 1;
2867
2868 address_cost
2869 = computation_cost (build2 (PLUS_EXPR, type,
2870 addr,
2871 build_int_cst_type (type, 2000))) + 1;
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2873 {
2874 fprintf (dump_file, "force_var_cost:\n");
2875 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2876 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2877 fprintf (dump_file, " address %d\n", (int) address_cost);
2878 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2879 fprintf (dump_file, "\n");
2880 }
2881
2882 costs_initialized = true;
2883 }
2884
2885 STRIP_NOPS (expr);
2886
2887 if (depends_on)
2888 {
2889 fd_ivopts_data = data;
2890 walk_tree (&expr, find_depends, depends_on, NULL);
2891 }
2892
2893 if (SSA_VAR_P (expr))
2894 return 0;
2895
2896 if (TREE_INVARIANT (expr))
2897 {
2898 if (TREE_CODE (expr) == INTEGER_CST)
2899 return integer_cost;
2900
2901 if (TREE_CODE (expr) == ADDR_EXPR)
2902 {
2903 tree obj = TREE_OPERAND (expr, 0);
2904
2905 if (TREE_CODE (obj) == VAR_DECL
2906 || TREE_CODE (obj) == PARM_DECL
2907 || TREE_CODE (obj) == RESULT_DECL)
2908 return symbol_cost;
2909 }
2910
2911 return address_cost;
2912 }
2913
2914 switch (TREE_CODE (expr))
2915 {
2916 case PLUS_EXPR:
2917 case MINUS_EXPR:
2918 case MULT_EXPR:
2919 op0 = TREE_OPERAND (expr, 0);
2920 op1 = TREE_OPERAND (expr, 1);
2921 STRIP_NOPS (op0);
2922 STRIP_NOPS (op1);
2923
2924 if (is_gimple_val (op0))
2925 cost0 = 0;
2926 else
2927 cost0 = force_var_cost (data, op0, NULL);
2928
2929 if (is_gimple_val (op1))
2930 cost1 = 0;
2931 else
2932 cost1 = force_var_cost (data, op1, NULL);
2933
2934 break;
2935
2936 default:
2937 /* Just an arbitrary value, FIXME. */
2938 return target_spill_cost;
2939 }
2940
2941 mode = TYPE_MODE (TREE_TYPE (expr));
2942 switch (TREE_CODE (expr))
2943 {
2944 case PLUS_EXPR:
2945 case MINUS_EXPR:
2946 cost = add_cost (mode);
2947 break;
2948
2949 case MULT_EXPR:
2950 if (cst_and_fits_in_hwi (op0))
2951 cost = multiply_by_cost (int_cst_value (op0), mode);
2952 else if (cst_and_fits_in_hwi (op1))
2953 cost = multiply_by_cost (int_cst_value (op1), mode);
2954 else
2955 return target_spill_cost;
2956 break;
2957
2958 default:
2959 gcc_unreachable ();
2960 }
2961
2962 cost += cost0;
2963 cost += cost1;
2964
2965 /* Bound the cost by target_spill_cost. The parts of complicated
2966 computations often are either loop invariant or at least can
2967 be shared between several iv uses, so letting this grow without
2968 limits would not give reasonable results. */
2969 return cost < target_spill_cost ? cost : target_spill_cost;
2970 }
2971
2972 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2973 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2974 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2975 invariants the computation depends on. */
2976
2977 static unsigned
2978 split_address_cost (struct ivopts_data *data,
2979 tree addr, bool *symbol_present, bool *var_present,
2980 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2981 {
2982 tree core;
2983 HOST_WIDE_INT bitsize;
2984 HOST_WIDE_INT bitpos;
2985 tree toffset;
2986 enum machine_mode mode;
2987 int unsignedp, volatilep;
2988
2989 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2990 &unsignedp, &volatilep, false);
2991
2992 if (toffset != 0
2993 || bitpos % BITS_PER_UNIT != 0
2994 || TREE_CODE (core) != VAR_DECL)
2995 {
2996 *symbol_present = false;
2997 *var_present = true;
2998 fd_ivopts_data = data;
2999 walk_tree (&addr, find_depends, depends_on, NULL);
3000 return target_spill_cost;
3001 }
3002
3003 *offset += bitpos / BITS_PER_UNIT;
3004 if (TREE_STATIC (core)
3005 || DECL_EXTERNAL (core))
3006 {
3007 *symbol_present = true;
3008 *var_present = false;
3009 return 0;
3010 }
3011
3012 *symbol_present = false;
3013 *var_present = true;
3014 return 0;
3015 }
3016
3017 /* Estimates cost of expressing difference of addresses E1 - E2 as
3018 var + symbol + offset. The value of offset is added to OFFSET,
3019 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3020 part is missing. DEPENDS_ON is a set of the invariants the computation
3021 depends on. */
3022
3023 static unsigned
3024 ptr_difference_cost (struct ivopts_data *data,
3025 tree e1, tree e2, bool *symbol_present, bool *var_present,
3026 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3027 {
3028 HOST_WIDE_INT diff = 0;
3029 unsigned cost;
3030
3031 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3032
3033 if (ptr_difference_const (e1, e2, &diff))
3034 {
3035 *offset += diff;
3036 *symbol_present = false;
3037 *var_present = false;
3038 return 0;
3039 }
3040
3041 if (e2 == integer_zero_node)
3042 return split_address_cost (data, TREE_OPERAND (e1, 0),
3043 symbol_present, var_present, offset, depends_on);
3044
3045 *symbol_present = false;
3046 *var_present = true;
3047
3048 cost = force_var_cost (data, e1, depends_on);
3049 cost += force_var_cost (data, e2, depends_on);
3050 cost += add_cost (Pmode);
3051
3052 return cost;
3053 }
3054
3055 /* Estimates cost of expressing difference E1 - E2 as
3056 var + symbol + offset. The value of offset is added to OFFSET,
3057 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3058 part is missing. DEPENDS_ON is a set of the invariants the computation
3059 depends on. */
3060
3061 static unsigned
3062 difference_cost (struct ivopts_data *data,
3063 tree e1, tree e2, bool *symbol_present, bool *var_present,
3064 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3065 {
3066 unsigned cost;
3067 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3068 unsigned HOST_WIDE_INT off1, off2;
3069
3070 e1 = strip_offset (e1, false, &off1);
3071 e2 = strip_offset (e2, false, &off2);
3072 *offset += off1 - off2;
3073
3074 STRIP_NOPS (e1);
3075 STRIP_NOPS (e2);
3076
3077 if (TREE_CODE (e1) == ADDR_EXPR)
3078 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3079 depends_on);
3080 *symbol_present = false;
3081
3082 if (operand_equal_p (e1, e2, 0))
3083 {
3084 *var_present = false;
3085 return 0;
3086 }
3087 *var_present = true;
3088 if (zero_p (e2))
3089 return force_var_cost (data, e1, depends_on);
3090
3091 if (zero_p (e1))
3092 {
3093 cost = force_var_cost (data, e2, depends_on);
3094 cost += multiply_by_cost (-1, mode);
3095
3096 return cost;
3097 }
3098
3099 cost = force_var_cost (data, e1, depends_on);
3100 cost += force_var_cost (data, e2, depends_on);
3101 cost += add_cost (mode);
3102
3103 return cost;
3104 }
3105
3106 /* Determines the cost of the computation by that USE is expressed
3107 from induction variable CAND. If ADDRESS_P is true, we just need
3108 to create an address from it, otherwise we want to get it into
3109 register. A set of invariants we depend on is stored in
3110 DEPENDS_ON. AT is the statement at that the value is computed. */
3111
3112 static unsigned
3113 get_computation_cost_at (struct ivopts_data *data,
3114 struct iv_use *use, struct iv_cand *cand,
3115 bool address_p, bitmap *depends_on, tree at)
3116 {
3117 tree ubase = use->iv->base, ustep = use->iv->step;
3118 tree cbase, cstep;
3119 tree utype = TREE_TYPE (ubase), ctype;
3120 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3121 HOST_WIDE_INT ratio, aratio;
3122 bool var_present, symbol_present;
3123 unsigned cost = 0, n_sums;
3124
3125 *depends_on = NULL;
3126
3127 /* Only consider real candidates. */
3128 if (!cand->iv)
3129 return INFTY;
3130
3131 cbase = cand->iv->base;
3132 cstep = cand->iv->step;
3133 ctype = TREE_TYPE (cbase);
3134
3135 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3136 {
3137 /* We do not have a precision to express the values of use. */
3138 return INFTY;
3139 }
3140
3141 if (address_p)
3142 {
3143 /* Do not try to express address of an object with computation based
3144 on address of a different object. This may cause problems in rtl
3145 level alias analysis (that does not expect this to be happening,
3146 as this is illegal in C), and would be unlikely to be useful
3147 anyway. */
3148 if (use->iv->base_object
3149 && cand->iv->base_object
3150 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3151 return INFTY;
3152 }
3153
3154 if (!cst_and_fits_in_hwi (ustep)
3155 || !cst_and_fits_in_hwi (cstep))
3156 return INFTY;
3157
3158 if (TREE_CODE (ubase) == INTEGER_CST
3159 && !cst_and_fits_in_hwi (ubase))
3160 goto fallback;
3161
3162 if (TREE_CODE (cbase) == INTEGER_CST
3163 && !cst_and_fits_in_hwi (cbase))
3164 goto fallback;
3165
3166 ustepi = int_cst_value (ustep);
3167 cstepi = int_cst_value (cstep);
3168
3169 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3170 {
3171 /* TODO -- add direct handling of this case. */
3172 goto fallback;
3173 }
3174
3175 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3176 return INFTY;
3177
3178 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3179 or ratio == 1, it is better to handle this like
3180
3181 ubase - ratio * cbase + ratio * var
3182
3183 (also holds in the case ratio == -1, TODO. */
3184
3185 if (TREE_CODE (cbase) == INTEGER_CST)
3186 {
3187 offset = - ratio * int_cst_value (cbase);
3188 cost += difference_cost (data,
3189 ubase, integer_zero_node,
3190 &symbol_present, &var_present, &offset,
3191 depends_on);
3192 }
3193 else if (ratio == 1)
3194 {
3195 cost += difference_cost (data,
3196 ubase, cbase,
3197 &symbol_present, &var_present, &offset,
3198 depends_on);
3199 }
3200 else
3201 {
3202 cost += force_var_cost (data, cbase, depends_on);
3203 cost += add_cost (TYPE_MODE (ctype));
3204 cost += difference_cost (data,
3205 ubase, integer_zero_node,
3206 &symbol_present, &var_present, &offset,
3207 depends_on);
3208 }
3209
3210 /* If we are after the increment, the value of the candidate is higher by
3211 one iteration. */
3212 if (stmt_after_increment (data->current_loop, cand, at))
3213 offset -= ratio * cstepi;
3214
3215 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3216 (symbol/var/const parts may be omitted). If we are looking for an address,
3217 find the cost of addressing this. */
3218 if (address_p)
3219 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3220
3221 /* Otherwise estimate the costs for computing the expression. */
3222 aratio = ratio > 0 ? ratio : -ratio;
3223 if (!symbol_present && !var_present && !offset)
3224 {
3225 if (ratio != 1)
3226 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3227
3228 return cost;
3229 }
3230
3231 if (aratio != 1)
3232 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3233
3234 n_sums = 1;
3235 if (var_present
3236 /* Symbol + offset should be compile-time computable. */
3237 && (symbol_present || offset))
3238 n_sums++;
3239
3240 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3241
3242 fallback:
3243 {
3244 /* Just get the expression, expand it and measure the cost. */
3245 tree comp = get_computation_at (data->current_loop, use, cand, at);
3246
3247 if (!comp)
3248 return INFTY;
3249
3250 if (address_p)
3251 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3252
3253 return computation_cost (comp);
3254 }
3255 }
3256
3257 /* Determines the cost of the computation by that USE is expressed
3258 from induction variable CAND. If ADDRESS_P is true, we just need
3259 to create an address from it, otherwise we want to get it into
3260 register. A set of invariants we depend on is stored in
3261 DEPENDS_ON. */
3262
3263 static unsigned
3264 get_computation_cost (struct ivopts_data *data,
3265 struct iv_use *use, struct iv_cand *cand,
3266 bool address_p, bitmap *depends_on)
3267 {
3268 return get_computation_cost_at (data,
3269 use, cand, address_p, depends_on, use->stmt);
3270 }
3271
3272 /* Determines cost of basing replacement of USE on CAND in a generic
3273 expression. */
3274
3275 static bool
3276 determine_use_iv_cost_generic (struct ivopts_data *data,
3277 struct iv_use *use, struct iv_cand *cand)
3278 {
3279 bitmap depends_on;
3280 unsigned cost;
3281
3282 /* The simple case first -- if we need to express value of the preserved
3283 original biv, the cost is 0. This also prevents us from counting the
3284 cost of increment twice -- once at this use and once in the cost of
3285 the candidate. */
3286 if (cand->pos == IP_ORIGINAL
3287 && cand->incremented_at == use->stmt)
3288 {
3289 set_use_iv_cost (data, use, cand, 0, NULL);
3290 return true;
3291 }
3292
3293 cost = get_computation_cost (data, use, cand, false, &depends_on);
3294 set_use_iv_cost (data, use, cand, cost, depends_on);
3295
3296 return cost != INFTY;
3297 }
3298
3299 /* Determines cost of basing replacement of USE on CAND in an address. */
3300
3301 static bool
3302 determine_use_iv_cost_address (struct ivopts_data *data,
3303 struct iv_use *use, struct iv_cand *cand)
3304 {
3305 bitmap depends_on;
3306 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3307
3308 set_use_iv_cost (data, use, cand, cost, depends_on);
3309
3310 return cost != INFTY;
3311 }
3312
3313 /* Computes value of induction variable IV in iteration NITER. */
3314
3315 static tree
3316 iv_value (struct iv *iv, tree niter)
3317 {
3318 tree val;
3319 tree type = TREE_TYPE (iv->base);
3320
3321 niter = fold_convert (type, niter);
3322 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3323
3324 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3325 }
3326
3327 /* Computes value of candidate CAND at position AT in iteration NITER. */
3328
3329 static tree
3330 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3331 {
3332 tree val = iv_value (cand->iv, niter);
3333 tree type = TREE_TYPE (cand->iv->base);
3334
3335 if (stmt_after_increment (loop, cand, at))
3336 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3337
3338 return val;
3339 }
3340
3341 /* Returns period of induction variable iv. */
3342
3343 static tree
3344 iv_period (struct iv *iv)
3345 {
3346 tree step = iv->step, period, type;
3347 tree pow2div;
3348
3349 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3350
3351 /* Period of the iv is gcd (step, type range). Since type range is power
3352 of two, it suffices to determine the maximum power of two that divides
3353 step. */
3354 pow2div = num_ending_zeros (step);
3355 type = unsigned_type_for (TREE_TYPE (step));
3356
3357 period = build_low_bits_mask (type,
3358 (TYPE_PRECISION (type)
3359 - tree_low_cst (pow2div, 1)));
3360
3361 return period;
3362 }
3363
3364 /* Check whether it is possible to express the condition in USE by comparison
3365 of candidate CAND. If so, store the comparison code to COMPARE and the
3366 value compared with to BOUND. */
3367
3368 static bool
3369 may_eliminate_iv (struct ivopts_data *data,
3370 struct iv_use *use, struct iv_cand *cand,
3371 enum tree_code *compare, tree *bound)
3372 {
3373 basic_block ex_bb;
3374 edge exit;
3375 struct tree_niter_desc *niter;
3376 tree nit, nit_type;
3377 tree wider_type, period, per_type;
3378 struct loop *loop = data->current_loop;
3379
3380 /* For now works only for exits that dominate the loop latch. TODO -- extend
3381 for other conditions inside loop body. */
3382 ex_bb = bb_for_stmt (use->stmt);
3383 if (use->stmt != last_stmt (ex_bb)
3384 || TREE_CODE (use->stmt) != COND_EXPR)
3385 return false;
3386 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3387 return false;
3388
3389 exit = EDGE_SUCC (ex_bb, 0);
3390 if (flow_bb_inside_loop_p (loop, exit->dest))
3391 exit = EDGE_SUCC (ex_bb, 1);
3392 if (flow_bb_inside_loop_p (loop, exit->dest))
3393 return false;
3394
3395 niter = niter_for_exit (data, exit);
3396 if (!niter
3397 || !zero_p (niter->may_be_zero))
3398 return false;
3399
3400 nit = niter->niter;
3401 nit_type = TREE_TYPE (nit);
3402
3403 /* Determine whether we may use the variable to test whether niter iterations
3404 elapsed. This is the case iff the period of the induction variable is
3405 greater than the number of iterations. */
3406 period = iv_period (cand->iv);
3407 if (!period)
3408 return false;
3409 per_type = TREE_TYPE (period);
3410
3411 wider_type = TREE_TYPE (period);
3412 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3413 wider_type = per_type;
3414 else
3415 wider_type = nit_type;
3416
3417 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3418 fold_convert (wider_type, period),
3419 fold_convert (wider_type, nit)))))
3420 return false;
3421
3422 if (exit->flags & EDGE_TRUE_VALUE)
3423 *compare = EQ_EXPR;
3424 else
3425 *compare = NE_EXPR;
3426
3427 *bound = cand_value_at (loop, cand, use->stmt, nit);
3428 return true;
3429 }
3430
3431 /* Determines cost of basing replacement of USE on CAND in a condition. */
3432
3433 static bool
3434 determine_use_iv_cost_condition (struct ivopts_data *data,
3435 struct iv_use *use, struct iv_cand *cand)
3436 {
3437 tree bound;
3438 enum tree_code compare;
3439
3440 /* Only consider real candidates. */
3441 if (!cand->iv)
3442 {
3443 set_use_iv_cost (data, use, cand, INFTY, NULL);
3444 return false;
3445 }
3446
3447 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3448 {
3449 bitmap depends_on = NULL;
3450 unsigned cost = force_var_cost (data, bound, &depends_on);
3451
3452 set_use_iv_cost (data, use, cand, cost, depends_on);
3453 return cost != INFTY;
3454 }
3455
3456 /* The induction variable elimination failed; just express the original
3457 giv. If it is compared with an invariant, note that we cannot get
3458 rid of it. */
3459 if (TREE_CODE (*use->op_p) == SSA_NAME)
3460 record_invariant (data, *use->op_p, true);
3461 else
3462 {
3463 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3464 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3465 }
3466
3467 return determine_use_iv_cost_generic (data, use, cand);
3468 }
3469
3470 /* Checks whether it is possible to replace the final value of USE by
3471 a direct computation. If so, the formula is stored to *VALUE. */
3472
3473 static bool
3474 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3475 tree *value)
3476 {
3477 struct loop *loop = data->current_loop;
3478 edge exit;
3479 struct tree_niter_desc *niter;
3480
3481 exit = single_dom_exit (loop);
3482 if (!exit)
3483 return false;
3484
3485 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3486 bb_for_stmt (use->stmt)));
3487
3488 niter = niter_for_single_dom_exit (data);
3489 if (!niter
3490 || !zero_p (niter->may_be_zero))
3491 return false;
3492
3493 *value = iv_value (use->iv, niter->niter);
3494
3495 return true;
3496 }
3497
3498 /* Determines cost of replacing final value of USE using CAND. */
3499
3500 static bool
3501 determine_use_iv_cost_outer (struct ivopts_data *data,
3502 struct iv_use *use, struct iv_cand *cand)
3503 {
3504 bitmap depends_on;
3505 unsigned cost;
3506 edge exit;
3507 tree value;
3508 struct loop *loop = data->current_loop;
3509
3510 /* The simple case first -- if we need to express value of the preserved
3511 original biv, the cost is 0. This also prevents us from counting the
3512 cost of increment twice -- once at this use and once in the cost of
3513 the candidate. */
3514 if (cand->pos == IP_ORIGINAL
3515 && cand->incremented_at == use->stmt)
3516 {
3517 set_use_iv_cost (data, use, cand, 0, NULL);
3518 return true;
3519 }
3520
3521 if (!cand->iv)
3522 {
3523 if (!may_replace_final_value (data, use, &value))
3524 {
3525 set_use_iv_cost (data, use, cand, INFTY, NULL);
3526 return false;
3527 }
3528
3529 depends_on = NULL;
3530 cost = force_var_cost (data, value, &depends_on);
3531
3532 cost /= AVG_LOOP_NITER (loop);
3533
3534 set_use_iv_cost (data, use, cand, cost, depends_on);
3535 return cost != INFTY;
3536 }
3537
3538 exit = single_dom_exit (loop);
3539 if (exit)
3540 {
3541 /* If there is just a single exit, we may use value of the candidate
3542 after we take it to determine the value of use. */
3543 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3544 last_stmt (exit->src));
3545 if (cost != INFTY)
3546 cost /= AVG_LOOP_NITER (loop);
3547 }
3548 else
3549 {
3550 /* Otherwise we just need to compute the iv. */
3551 cost = get_computation_cost (data, use, cand, false, &depends_on);
3552 }
3553
3554 set_use_iv_cost (data, use, cand, cost, depends_on);
3555
3556 return cost != INFTY;
3557 }
3558
3559 /* Determines cost of basing replacement of USE on CAND. Returns false
3560 if USE cannot be based on CAND. */
3561
3562 static bool
3563 determine_use_iv_cost (struct ivopts_data *data,
3564 struct iv_use *use, struct iv_cand *cand)
3565 {
3566 switch (use->type)
3567 {
3568 case USE_NONLINEAR_EXPR:
3569 return determine_use_iv_cost_generic (data, use, cand);
3570
3571 case USE_OUTER:
3572 return determine_use_iv_cost_outer (data, use, cand);
3573
3574 case USE_ADDRESS:
3575 return determine_use_iv_cost_address (data, use, cand);
3576
3577 case USE_COMPARE:
3578 return determine_use_iv_cost_condition (data, use, cand);
3579
3580 default:
3581 gcc_unreachable ();
3582 }
3583 }
3584
3585 /* Determines costs of basing the use of the iv on an iv candidate. */
3586
3587 static void
3588 determine_use_iv_costs (struct ivopts_data *data)
3589 {
3590 unsigned i, j;
3591 struct iv_use *use;
3592 struct iv_cand *cand;
3593 bitmap to_clear = BITMAP_ALLOC (NULL);
3594
3595 alloc_use_cost_map (data);
3596
3597 for (i = 0; i < n_iv_uses (data); i++)
3598 {
3599 use = iv_use (data, i);
3600
3601 if (data->consider_all_candidates)
3602 {
3603 for (j = 0; j < n_iv_cands (data); j++)
3604 {
3605 cand = iv_cand (data, j);
3606 determine_use_iv_cost (data, use, cand);
3607 }
3608 }
3609 else
3610 {
3611 bitmap_iterator bi;
3612
3613 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3614 {
3615 cand = iv_cand (data, j);
3616 if (!determine_use_iv_cost (data, use, cand))
3617 bitmap_set_bit (to_clear, j);
3618 }
3619
3620 /* Remove the candidates for that the cost is infinite from
3621 the list of related candidates. */
3622 bitmap_and_compl_into (use->related_cands, to_clear);
3623 bitmap_clear (to_clear);
3624 }
3625 }
3626
3627 BITMAP_FREE (to_clear);
3628
3629 if (dump_file && (dump_flags & TDF_DETAILS))
3630 {
3631 fprintf (dump_file, "Use-candidate costs:\n");
3632
3633 for (i = 0; i < n_iv_uses (data); i++)
3634 {
3635 use = iv_use (data, i);
3636
3637 fprintf (dump_file, "Use %d:\n", i);
3638 fprintf (dump_file, " cand\tcost\tdepends on\n");
3639 for (j = 0; j < use->n_map_members; j++)
3640 {
3641 if (!use->cost_map[j].cand
3642 || use->cost_map[j].cost == INFTY)
3643 continue;
3644
3645 fprintf (dump_file, " %d\t%d\t",
3646 use->cost_map[j].cand->id,
3647 use->cost_map[j].cost);
3648 if (use->cost_map[j].depends_on)
3649 bitmap_print (dump_file,
3650 use->cost_map[j].depends_on, "","");
3651 fprintf (dump_file, "\n");
3652 }
3653
3654 fprintf (dump_file, "\n");
3655 }
3656 fprintf (dump_file, "\n");
3657 }
3658 }
3659
3660 /* Determines cost of the candidate CAND. */
3661
3662 static void
3663 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3664 {
3665 unsigned cost_base, cost_step;
3666 tree base;
3667
3668 if (!cand->iv)
3669 {
3670 cand->cost = 0;
3671 return;
3672 }
3673
3674 /* There are two costs associated with the candidate -- its increment
3675 and its initialization. The second is almost negligible for any loop
3676 that rolls enough, so we take it just very little into account. */
3677
3678 base = cand->iv->base;
3679 cost_base = force_var_cost (data, base, NULL);
3680 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3681
3682 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3683
3684 /* Prefer the original iv unless we may gain something by replacing it. */
3685 if (cand->pos == IP_ORIGINAL)
3686 cand->cost--;
3687
3688 /* Prefer not to insert statements into latch unless there are some
3689 already (so that we do not create unnecessary jumps). */
3690 if (cand->pos == IP_END
3691 && empty_block_p (ip_end_pos (data->current_loop)))
3692 cand->cost++;
3693 }
3694
3695 /* Determines costs of computation of the candidates. */
3696
3697 static void
3698 determine_iv_costs (struct ivopts_data *data)
3699 {
3700 unsigned i;
3701
3702 if (dump_file && (dump_flags & TDF_DETAILS))
3703 {
3704 fprintf (dump_file, "Candidate costs:\n");
3705 fprintf (dump_file, " cand\tcost\n");
3706 }
3707
3708 for (i = 0; i < n_iv_cands (data); i++)
3709 {
3710 struct iv_cand *cand = iv_cand (data, i);
3711
3712 determine_iv_cost (data, cand);
3713
3714 if (dump_file && (dump_flags & TDF_DETAILS))
3715 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3716 }
3717
3718 if (dump_file && (dump_flags & TDF_DETAILS))
3719 fprintf (dump_file, "\n");
3720 }
3721
3722 /* Calculates cost for having SIZE induction variables. */
3723
3724 static unsigned
3725 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3726 {
3727 return global_cost_for_size (size,
3728 loop_data (data->current_loop)->regs_used,
3729 n_iv_uses (data));
3730 }
3731
3732 /* For each size of the induction variable set determine the penalty. */
3733
3734 static void
3735 determine_set_costs (struct ivopts_data *data)
3736 {
3737 unsigned j, n;
3738 tree phi, op;
3739 struct loop *loop = data->current_loop;
3740 bitmap_iterator bi;
3741
3742 /* We use the following model (definitely improvable, especially the
3743 cost function -- TODO):
3744
3745 We estimate the number of registers available (using MD data), name it A.
3746
3747 We estimate the number of registers used by the loop, name it U. This
3748 number is obtained as the number of loop phi nodes (not counting virtual
3749 registers and bivs) + the number of variables from outside of the loop.
3750
3751 We set a reserve R (free regs that are used for temporary computations,
3752 etc.). For now the reserve is a constant 3.
3753
3754 Let I be the number of induction variables.
3755
3756 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3757 make a lot of ivs without a reason).
3758 -- if A - R < U + I <= A, the cost is I * PRES_COST
3759 -- if U + I > A, the cost is I * PRES_COST and
3760 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3761
3762 if (dump_file && (dump_flags & TDF_DETAILS))
3763 {
3764 fprintf (dump_file, "Global costs:\n");
3765 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3766 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3767 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3768 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3769 }
3770
3771 n = 0;
3772 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3773 {
3774 op = PHI_RESULT (phi);
3775
3776 if (!is_gimple_reg (op))
3777 continue;
3778
3779 if (get_iv (data, op))
3780 continue;
3781
3782 n++;
3783 }
3784
3785 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3786 {
3787 struct version_info *info = ver_info (data, j);
3788
3789 if (info->inv_id && info->has_nonlin_use)
3790 n++;
3791 }
3792
3793 loop_data (loop)->regs_used = n;
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3795 fprintf (dump_file, " regs_used %d\n", n);
3796
3797 if (dump_file && (dump_flags & TDF_DETAILS))
3798 {
3799 fprintf (dump_file, " cost for size:\n");
3800 fprintf (dump_file, " ivs\tcost\n");
3801 for (j = 0; j <= 2 * target_avail_regs; j++)
3802 fprintf (dump_file, " %d\t%d\n", j,
3803 ivopts_global_cost_for_size (data, j));
3804 fprintf (dump_file, "\n");
3805 }
3806 }
3807
3808 /* Returns true if A is a cheaper cost pair than B. */
3809
3810 static bool
3811 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3812 {
3813 if (!a)
3814 return false;
3815
3816 if (!b)
3817 return true;
3818
3819 if (a->cost < b->cost)
3820 return true;
3821
3822 if (a->cost > b->cost)
3823 return false;
3824
3825 /* In case the costs are the same, prefer the cheaper candidate. */
3826 if (a->cand->cost < b->cand->cost)
3827 return true;
3828
3829 return false;
3830 }
3831
3832 /* Computes the cost field of IVS structure. */
3833
3834 static void
3835 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3836 {
3837 unsigned cost = 0;
3838
3839 cost += ivs->cand_use_cost;
3840 cost += ivs->cand_cost;
3841 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3842
3843 ivs->cost = cost;
3844 }
3845
3846 /* Set USE not to be expressed by any candidate in IVS. */
3847
3848 static void
3849 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3850 struct iv_use *use)
3851 {
3852 unsigned uid = use->id, cid, iid;
3853 bitmap deps;
3854 struct cost_pair *cp;
3855 bitmap_iterator bi;
3856
3857 cp = ivs->cand_for_use[uid];
3858 if (!cp)
3859 return;
3860 cid = cp->cand->id;
3861
3862 ivs->bad_uses++;
3863 ivs->cand_for_use[uid] = NULL;
3864 ivs->n_cand_uses[cid]--;
3865
3866 if (ivs->n_cand_uses[cid] == 0)
3867 {
3868 bitmap_clear_bit (ivs->cands, cid);
3869 /* Do not count the pseudocandidates. */
3870 if (cp->cand->iv)
3871 ivs->n_regs--;
3872 ivs->n_cands--;
3873 ivs->cand_cost -= cp->cand->cost;
3874 }
3875
3876 ivs->cand_use_cost -= cp->cost;
3877
3878 deps = cp->depends_on;
3879
3880 if (deps)
3881 {
3882 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3883 {
3884 ivs->n_invariant_uses[iid]--;
3885 if (ivs->n_invariant_uses[iid] == 0)
3886 ivs->n_regs--;
3887 }
3888 }
3889
3890 iv_ca_recount_cost (data, ivs);
3891 }
3892
3893 /* Set cost pair for USE in set IVS to CP. */
3894
3895 static void
3896 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3897 struct iv_use *use, struct cost_pair *cp)
3898 {
3899 unsigned uid = use->id, cid, iid;
3900 bitmap deps;
3901 bitmap_iterator bi;
3902
3903 if (ivs->cand_for_use[uid] == cp)
3904 return;
3905
3906 if (ivs->cand_for_use[uid])
3907 iv_ca_set_no_cp (data, ivs, use);
3908
3909 if (cp)
3910 {
3911 cid = cp->cand->id;
3912
3913 ivs->bad_uses--;
3914 ivs->cand_for_use[uid] = cp;
3915 ivs->n_cand_uses[cid]++;
3916 if (ivs->n_cand_uses[cid] == 1)
3917 {
3918 bitmap_set_bit (ivs->cands, cid);
3919 /* Do not count the pseudocandidates. */
3920 if (cp->cand->iv)
3921 ivs->n_regs++;
3922 ivs->n_cands++;
3923 ivs->cand_cost += cp->cand->cost;
3924 }
3925
3926 ivs->cand_use_cost += cp->cost;
3927
3928 deps = cp->depends_on;
3929
3930 if (deps)
3931 {
3932 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3933 {
3934 ivs->n_invariant_uses[iid]++;
3935 if (ivs->n_invariant_uses[iid] == 1)
3936 ivs->n_regs++;
3937 }
3938 }
3939
3940 iv_ca_recount_cost (data, ivs);
3941 }
3942 }
3943
3944 /* Extend set IVS by expressing USE by some of the candidates in it
3945 if possible. */
3946
3947 static void
3948 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3949 struct iv_use *use)
3950 {
3951 struct cost_pair *best_cp = NULL, *cp;
3952 bitmap_iterator bi;
3953 unsigned i;
3954
3955 gcc_assert (ivs->upto >= use->id);
3956
3957 if (ivs->upto == use->id)
3958 {
3959 ivs->upto++;
3960 ivs->bad_uses++;
3961 }
3962
3963 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3964 {
3965 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3966
3967 if (cheaper_cost_pair (cp, best_cp))
3968 best_cp = cp;
3969 }
3970
3971 iv_ca_set_cp (data, ivs, use, best_cp);
3972 }
3973
3974 /* Get cost for assignment IVS. */
3975
3976 static unsigned
3977 iv_ca_cost (struct iv_ca *ivs)
3978 {
3979 return (ivs->bad_uses ? INFTY : ivs->cost);
3980 }
3981
3982 /* Returns true if all dependences of CP are among invariants in IVS. */
3983
3984 static bool
3985 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3986 {
3987 unsigned i;
3988 bitmap_iterator bi;
3989
3990 if (!cp->depends_on)
3991 return true;
3992
3993 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3994 {
3995 if (ivs->n_invariant_uses[i] == 0)
3996 return false;
3997 }
3998
3999 return true;
4000 }
4001
4002 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4003 it before NEXT_CHANGE. */
4004
4005 static struct iv_ca_delta *
4006 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4007 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4008 {
4009 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4010
4011 change->use = use;
4012 change->old_cp = old_cp;
4013 change->new_cp = new_cp;
4014 change->next_change = next_change;
4015
4016 return change;
4017 }
4018
4019 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4020 are rewritten. */
4021
4022 static struct iv_ca_delta *
4023 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4024 {
4025 struct iv_ca_delta *last;
4026
4027 if (!l2)
4028 return l1;
4029
4030 if (!l1)
4031 return l2;
4032
4033 for (last = l1; last->next_change; last = last->next_change)
4034 continue;
4035 last->next_change = l2;
4036
4037 return l1;
4038 }
4039
4040 /* Returns candidate by that USE is expressed in IVS. */
4041
4042 static struct cost_pair *
4043 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4044 {
4045 return ivs->cand_for_use[use->id];
4046 }
4047
4048 /* Reverse the list of changes DELTA, forming the inverse to it. */
4049
4050 static struct iv_ca_delta *
4051 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4052 {
4053 struct iv_ca_delta *act, *next, *prev = NULL;
4054 struct cost_pair *tmp;
4055
4056 for (act = delta; act; act = next)
4057 {
4058 next = act->next_change;
4059 act->next_change = prev;
4060 prev = act;
4061
4062 tmp = act->old_cp;
4063 act->old_cp = act->new_cp;
4064 act->new_cp = tmp;
4065 }
4066
4067 return prev;
4068 }
4069
4070 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4071 reverted instead. */
4072
4073 static void
4074 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4075 struct iv_ca_delta *delta, bool forward)
4076 {
4077 struct cost_pair *from, *to;
4078 struct iv_ca_delta *act;
4079
4080 if (!forward)
4081 delta = iv_ca_delta_reverse (delta);
4082
4083 for (act = delta; act; act = act->next_change)
4084 {
4085 from = act->old_cp;
4086 to = act->new_cp;
4087 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4088 iv_ca_set_cp (data, ivs, act->use, to);
4089 }
4090
4091 if (!forward)
4092 iv_ca_delta_reverse (delta);
4093 }
4094
4095 /* Returns true if CAND is used in IVS. */
4096
4097 static bool
4098 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4099 {
4100 return ivs->n_cand_uses[cand->id] > 0;
4101 }
4102
4103 /* Returns number of induction variable candidates in the set IVS. */
4104
4105 static unsigned
4106 iv_ca_n_cands (struct iv_ca *ivs)
4107 {
4108 return ivs->n_cands;
4109 }
4110
4111 /* Free the list of changes DELTA. */
4112
4113 static void
4114 iv_ca_delta_free (struct iv_ca_delta **delta)
4115 {
4116 struct iv_ca_delta *act, *next;
4117
4118 for (act = *delta; act; act = next)
4119 {
4120 next = act->next_change;
4121 free (act);
4122 }
4123
4124 *delta = NULL;
4125 }
4126
4127 /* Allocates new iv candidates assignment. */
4128
4129 static struct iv_ca *
4130 iv_ca_new (struct ivopts_data *data)
4131 {
4132 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4133
4134 nw->upto = 0;
4135 nw->bad_uses = 0;
4136 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4137 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4138 nw->cands = BITMAP_ALLOC (NULL);
4139 nw->n_cands = 0;
4140 nw->n_regs = 0;
4141 nw->cand_use_cost = 0;
4142 nw->cand_cost = 0;
4143 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4144 nw->cost = 0;
4145
4146 return nw;
4147 }
4148
4149 /* Free memory occupied by the set IVS. */
4150
4151 static void
4152 iv_ca_free (struct iv_ca **ivs)
4153 {
4154 free ((*ivs)->cand_for_use);
4155 free ((*ivs)->n_cand_uses);
4156 BITMAP_FREE ((*ivs)->cands);
4157 free ((*ivs)->n_invariant_uses);
4158 free (*ivs);
4159 *ivs = NULL;
4160 }
4161
4162 /* Dumps IVS to FILE. */
4163
4164 static void
4165 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4166 {
4167 const char *pref = " invariants ";
4168 unsigned i;
4169
4170 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4171 bitmap_print (file, ivs->cands, " candidates ","\n");
4172
4173 for (i = 1; i <= data->max_inv_id; i++)
4174 if (ivs->n_invariant_uses[i])
4175 {
4176 fprintf (file, "%s%d", pref, i);
4177 pref = ", ";
4178 }
4179 fprintf (file, "\n");
4180 }
4181
4182 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4183 new set, and store differences in DELTA. Number of induction variables
4184 in the new set is stored to N_IVS. */
4185
4186 static unsigned
4187 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4188 struct iv_cand *cand, struct iv_ca_delta **delta,
4189 unsigned *n_ivs)
4190 {
4191 unsigned i, cost;
4192 struct iv_use *use;
4193 struct cost_pair *old_cp, *new_cp;
4194
4195 *delta = NULL;
4196 for (i = 0; i < ivs->upto; i++)
4197 {
4198 use = iv_use (data, i);
4199 old_cp = iv_ca_cand_for_use (ivs, use);
4200
4201 if (old_cp
4202 && old_cp->cand == cand)
4203 continue;
4204
4205 new_cp = get_use_iv_cost (data, use, cand);
4206 if (!new_cp)
4207 continue;
4208
4209 if (!iv_ca_has_deps (ivs, new_cp))
4210 continue;
4211
4212 if (!cheaper_cost_pair (new_cp, old_cp))
4213 continue;
4214
4215 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4216 }
4217
4218 iv_ca_delta_commit (data, ivs, *delta, true);
4219 cost = iv_ca_cost (ivs);
4220 if (n_ivs)
4221 *n_ivs = iv_ca_n_cands (ivs);
4222 iv_ca_delta_commit (data, ivs, *delta, false);
4223
4224 return cost;
4225 }
4226
4227 /* Try narrowing set IVS by removing CAND. Return the cost of
4228 the new set and store the differences in DELTA. */
4229
4230 static unsigned
4231 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4232 struct iv_cand *cand, struct iv_ca_delta **delta)
4233 {
4234 unsigned i, ci;
4235 struct iv_use *use;
4236 struct cost_pair *old_cp, *new_cp, *cp;
4237 bitmap_iterator bi;
4238 struct iv_cand *cnd;
4239 unsigned cost;
4240
4241 *delta = NULL;
4242 for (i = 0; i < n_iv_uses (data); i++)
4243 {
4244 use = iv_use (data, i);
4245
4246 old_cp = iv_ca_cand_for_use (ivs, use);
4247 if (old_cp->cand != cand)
4248 continue;
4249
4250 new_cp = NULL;
4251
4252 if (data->consider_all_candidates)
4253 {
4254 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4255 {
4256 if (ci == cand->id)
4257 continue;
4258
4259 cnd = iv_cand (data, ci);
4260
4261 cp = get_use_iv_cost (data, use, cnd);
4262 if (!cp)
4263 continue;
4264 if (!iv_ca_has_deps (ivs, cp))
4265 continue;
4266
4267 if (!cheaper_cost_pair (cp, new_cp))
4268 continue;
4269
4270 new_cp = cp;
4271 }
4272 }
4273 else
4274 {
4275 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4276 {
4277 if (ci == cand->id)
4278 continue;
4279
4280 cnd = iv_cand (data, ci);
4281
4282 cp = get_use_iv_cost (data, use, cnd);
4283 if (!cp)
4284 continue;
4285 if (!iv_ca_has_deps (ivs, cp))
4286 continue;
4287
4288 if (!cheaper_cost_pair (cp, new_cp))
4289 continue;
4290
4291 new_cp = cp;
4292 }
4293 }
4294
4295 if (!new_cp)
4296 {
4297 iv_ca_delta_free (delta);
4298 return INFTY;
4299 }
4300
4301 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4302 }
4303
4304 iv_ca_delta_commit (data, ivs, *delta, true);
4305 cost = iv_ca_cost (ivs);
4306 iv_ca_delta_commit (data, ivs, *delta, false);
4307
4308 return cost;
4309 }
4310
4311 /* Try optimizing the set of candidates IVS by removing candidates different
4312 from to EXCEPT_CAND from it. Return cost of the new set, and store
4313 differences in DELTA. */
4314
4315 static unsigned
4316 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4317 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4318 {
4319 bitmap_iterator bi;
4320 struct iv_ca_delta *act_delta, *best_delta;
4321 unsigned i, best_cost, acost;
4322 struct iv_cand *cand;
4323
4324 best_delta = NULL;
4325 best_cost = iv_ca_cost (ivs);
4326
4327 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4328 {
4329 cand = iv_cand (data, i);
4330
4331 if (cand == except_cand)
4332 continue;
4333
4334 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4335
4336 if (acost < best_cost)
4337 {
4338 best_cost = acost;
4339 iv_ca_delta_free (&best_delta);
4340 best_delta = act_delta;
4341 }
4342 else
4343 iv_ca_delta_free (&act_delta);
4344 }
4345
4346 if (!best_delta)
4347 {
4348 *delta = NULL;
4349 return best_cost;
4350 }
4351
4352 /* Recurse to possibly remove other unnecessary ivs. */
4353 iv_ca_delta_commit (data, ivs, best_delta, true);
4354 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4355 iv_ca_delta_commit (data, ivs, best_delta, false);
4356 *delta = iv_ca_delta_join (best_delta, *delta);
4357 return best_cost;
4358 }
4359
4360 /* Tries to extend the sets IVS in the best possible way in order
4361 to express the USE. */
4362
4363 static bool
4364 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4365 struct iv_use *use)
4366 {
4367 unsigned best_cost, act_cost;
4368 unsigned i;
4369 bitmap_iterator bi;
4370 struct iv_cand *cand;
4371 struct iv_ca_delta *best_delta = NULL, *act_delta;
4372 struct cost_pair *cp;
4373
4374 iv_ca_add_use (data, ivs, use);
4375 best_cost = iv_ca_cost (ivs);
4376
4377 cp = iv_ca_cand_for_use (ivs, use);
4378 if (cp)
4379 {
4380 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4381 iv_ca_set_no_cp (data, ivs, use);
4382 }
4383
4384 /* First try important candidates. Only if it fails, try the specific ones.
4385 Rationale -- in loops with many variables the best choice often is to use
4386 just one generic biv. If we added here many ivs specific to the uses,
4387 the optimization algorithm later would be likely to get stuck in a local
4388 minimum, thus causing us to create too many ivs. The approach from
4389 few ivs to more seems more likely to be successful -- starting from few
4390 ivs, replacing an expensive use by a specific iv should always be a
4391 win. */
4392 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4393 {
4394 cand = iv_cand (data, i);
4395
4396 if (iv_ca_cand_used_p (ivs, cand))
4397 continue;
4398
4399 cp = get_use_iv_cost (data, use, cand);
4400 if (!cp)
4401 continue;
4402
4403 iv_ca_set_cp (data, ivs, use, cp);
4404 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4405 iv_ca_set_no_cp (data, ivs, use);
4406 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4407
4408 if (act_cost < best_cost)
4409 {
4410 best_cost = act_cost;
4411
4412 iv_ca_delta_free (&best_delta);
4413 best_delta = act_delta;
4414 }
4415 else
4416 iv_ca_delta_free (&act_delta);
4417 }
4418
4419 if (best_cost == INFTY)
4420 {
4421 for (i = 0; i < use->n_map_members; i++)
4422 {
4423 cp = use->cost_map + i;
4424 cand = cp->cand;
4425 if (!cand)
4426 continue;
4427
4428 /* Already tried this. */
4429 if (cand->important)
4430 continue;
4431
4432 if (iv_ca_cand_used_p (ivs, cand))
4433 continue;
4434
4435 act_delta = NULL;
4436 iv_ca_set_cp (data, ivs, use, cp);
4437 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4438 iv_ca_set_no_cp (data, ivs, use);
4439 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4440 cp, act_delta);
4441
4442 if (act_cost < best_cost)
4443 {
4444 best_cost = act_cost;
4445
4446 if (best_delta)
4447 iv_ca_delta_free (&best_delta);
4448 best_delta = act_delta;
4449 }
4450 else
4451 iv_ca_delta_free (&act_delta);
4452 }
4453 }
4454
4455 iv_ca_delta_commit (data, ivs, best_delta, true);
4456 iv_ca_delta_free (&best_delta);
4457
4458 return (best_cost != INFTY);
4459 }
4460
4461 /* Finds an initial assignment of candidates to uses. */
4462
4463 static struct iv_ca *
4464 get_initial_solution (struct ivopts_data *data)
4465 {
4466 struct iv_ca *ivs = iv_ca_new (data);
4467 unsigned i;
4468
4469 for (i = 0; i < n_iv_uses (data); i++)
4470 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4471 {
4472 iv_ca_free (&ivs);
4473 return NULL;
4474 }
4475
4476 return ivs;
4477 }
4478
4479 /* Tries to improve set of induction variables IVS. */
4480
4481 static bool
4482 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4483 {
4484 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4485 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4486 struct iv_cand *cand;
4487
4488 /* Try extending the set of induction variables by one. */
4489 for (i = 0; i < n_iv_cands (data); i++)
4490 {
4491 cand = iv_cand (data, i);
4492
4493 if (iv_ca_cand_used_p (ivs, cand))
4494 continue;
4495
4496 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4497 if (!act_delta)
4498 continue;
4499
4500 /* If we successfully added the candidate and the set is small enough,
4501 try optimizing it by removing other candidates. */
4502 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4503 {
4504 iv_ca_delta_commit (data, ivs, act_delta, true);
4505 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4506 iv_ca_delta_commit (data, ivs, act_delta, false);
4507 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4508 }
4509
4510 if (acost < best_cost)
4511 {
4512 best_cost = acost;
4513 iv_ca_delta_free (&best_delta);
4514 best_delta = act_delta;
4515 }
4516 else
4517 iv_ca_delta_free (&act_delta);
4518 }
4519
4520 if (!best_delta)
4521 {
4522 /* Try removing the candidates from the set instead. */
4523 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4524
4525 /* Nothing more we can do. */
4526 if (!best_delta)
4527 return false;
4528 }
4529
4530 iv_ca_delta_commit (data, ivs, best_delta, true);
4531 gcc_assert (best_cost == iv_ca_cost (ivs));
4532 iv_ca_delta_free (&best_delta);
4533 return true;
4534 }
4535
4536 /* Attempts to find the optimal set of induction variables. We do simple
4537 greedy heuristic -- we try to replace at most one candidate in the selected
4538 solution and remove the unused ivs while this improves the cost. */
4539
4540 static struct iv_ca *
4541 find_optimal_iv_set (struct ivopts_data *data)
4542 {
4543 unsigned i;
4544 struct iv_ca *set;
4545 struct iv_use *use;
4546
4547 /* Get the initial solution. */
4548 set = get_initial_solution (data);
4549 if (!set)
4550 {
4551 if (dump_file && (dump_flags & TDF_DETAILS))
4552 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4553 return NULL;
4554 }
4555
4556 if (dump_file && (dump_flags & TDF_DETAILS))
4557 {
4558 fprintf (dump_file, "Initial set of candidates:\n");
4559 iv_ca_dump (data, dump_file, set);
4560 }
4561
4562 while (try_improve_iv_set (data, set))
4563 {
4564 if (dump_file && (dump_flags & TDF_DETAILS))
4565 {
4566 fprintf (dump_file, "Improved to:\n");
4567 iv_ca_dump (data, dump_file, set);
4568 }
4569 }
4570
4571 if (dump_file && (dump_flags & TDF_DETAILS))
4572 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4573
4574 for (i = 0; i < n_iv_uses (data); i++)
4575 {
4576 use = iv_use (data, i);
4577 use->selected = iv_ca_cand_for_use (set, use)->cand;
4578 }
4579
4580 return set;
4581 }
4582
4583 /* Creates a new induction variable corresponding to CAND. */
4584
4585 static void
4586 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4587 {
4588 block_stmt_iterator incr_pos;
4589 tree base;
4590 bool after = false;
4591
4592 if (!cand->iv)
4593 return;
4594
4595 switch (cand->pos)
4596 {
4597 case IP_NORMAL:
4598 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4599 break;
4600
4601 case IP_END:
4602 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4603 after = true;
4604 break;
4605
4606 case IP_ORIGINAL:
4607 /* Mark that the iv is preserved. */
4608 name_info (data, cand->var_before)->preserve_biv = true;
4609 name_info (data, cand->var_after)->preserve_biv = true;
4610
4611 /* Rewrite the increment so that it uses var_before directly. */
4612 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4613
4614 return;
4615 }
4616
4617 gimple_add_tmp_var (cand->var_before);
4618 add_referenced_tmp_var (cand->var_before);
4619
4620 base = unshare_expr (cand->iv->base);
4621
4622 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4623 &incr_pos, after, &cand->var_before, &cand->var_after);
4624 }
4625
4626 /* Creates new induction variables described in SET. */
4627
4628 static void
4629 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4630 {
4631 unsigned i;
4632 struct iv_cand *cand;
4633 bitmap_iterator bi;
4634
4635 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4636 {
4637 cand = iv_cand (data, i);
4638 create_new_iv (data, cand);
4639 }
4640 }
4641
4642 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4643 is true, remove also the ssa name defined by the statement. */
4644
4645 static void
4646 remove_statement (tree stmt, bool including_defined_name)
4647 {
4648 if (TREE_CODE (stmt) == PHI_NODE)
4649 {
4650 if (!including_defined_name)
4651 {
4652 /* Prevent the ssa name defined by the statement from being removed. */
4653 SET_PHI_RESULT (stmt, NULL);
4654 }
4655 remove_phi_node (stmt, NULL_TREE);
4656 }
4657 else
4658 {
4659 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4660
4661 bsi_remove (&bsi);
4662 }
4663 }
4664
4665 /* Rewrites USE (definition of iv used in a nonlinear expression)
4666 using candidate CAND. */
4667
4668 static void
4669 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4670 struct iv_use *use, struct iv_cand *cand)
4671 {
4672 tree comp;
4673 tree op, stmts, tgt, ass;
4674 block_stmt_iterator bsi, pbsi;
4675
4676 /* An important special case -- if we are asked to express value of
4677 the original iv by itself, just exit; there is no need to
4678 introduce a new computation (that might also need casting the
4679 variable to unsigned and back). */
4680 if (cand->pos == IP_ORIGINAL
4681 && TREE_CODE (use->stmt) == MODIFY_EXPR
4682 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4683 {
4684 op = TREE_OPERAND (use->stmt, 1);
4685
4686 /* Be a bit careful. In case variable is expressed in some
4687 complicated way, rewrite it so that we may get rid of this
4688 complicated expression. */
4689 if ((TREE_CODE (op) == PLUS_EXPR
4690 || TREE_CODE (op) == MINUS_EXPR)
4691 && TREE_OPERAND (op, 0) == cand->var_before
4692 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4693 return;
4694 }
4695
4696 comp = unshare_expr (get_computation (data->current_loop,
4697 use, cand));
4698 switch (TREE_CODE (use->stmt))
4699 {
4700 case PHI_NODE:
4701 tgt = PHI_RESULT (use->stmt);
4702
4703 /* If we should keep the biv, do not replace it. */
4704 if (name_info (data, tgt)->preserve_biv)
4705 return;
4706
4707 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4708 while (!bsi_end_p (pbsi)
4709 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4710 {
4711 bsi = pbsi;
4712 bsi_next (&pbsi);
4713 }
4714 break;
4715
4716 case MODIFY_EXPR:
4717 tgt = TREE_OPERAND (use->stmt, 0);
4718 bsi = bsi_for_stmt (use->stmt);
4719 break;
4720
4721 default:
4722 gcc_unreachable ();
4723 }
4724
4725 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4726
4727 if (TREE_CODE (use->stmt) == PHI_NODE)
4728 {
4729 if (stmts)
4730 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4731 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4732 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4733 remove_statement (use->stmt, false);
4734 SSA_NAME_DEF_STMT (tgt) = ass;
4735 }
4736 else
4737 {
4738 if (stmts)
4739 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4740 TREE_OPERAND (use->stmt, 1) = op;
4741 }
4742 }
4743
4744 /* Replaces ssa name in index IDX by its basic variable. Callback for
4745 for_each_index. */
4746
4747 static bool
4748 idx_remove_ssa_names (tree base, tree *idx,
4749 void *data ATTRIBUTE_UNUSED)
4750 {
4751 tree *op;
4752
4753 if (TREE_CODE (*idx) == SSA_NAME)
4754 *idx = SSA_NAME_VAR (*idx);
4755
4756 if (TREE_CODE (base) == ARRAY_REF)
4757 {
4758 op = &TREE_OPERAND (base, 2);
4759 if (*op
4760 && TREE_CODE (*op) == SSA_NAME)
4761 *op = SSA_NAME_VAR (*op);
4762 op = &TREE_OPERAND (base, 3);
4763 if (*op
4764 && TREE_CODE (*op) == SSA_NAME)
4765 *op = SSA_NAME_VAR (*op);
4766 }
4767
4768 return true;
4769 }
4770
4771 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4772
4773 static tree
4774 unshare_and_remove_ssa_names (tree ref)
4775 {
4776 ref = unshare_expr (ref);
4777 for_each_index (&ref, idx_remove_ssa_names, NULL);
4778
4779 return ref;
4780 }
4781
4782 /* Rewrites base of memory access OP with expression WITH in statement
4783 pointed to by BSI. */
4784
4785 static void
4786 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4787 {
4788 tree bvar, var, new_var, new_name, copy, name;
4789 tree orig;
4790
4791 var = bvar = get_base_address (*op);
4792
4793 if (!var || TREE_CODE (with) != SSA_NAME)
4794 goto do_rewrite;
4795
4796 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4797 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4798 if (TREE_CODE (var) == INDIRECT_REF)
4799 var = TREE_OPERAND (var, 0);
4800 if (TREE_CODE (var) == SSA_NAME)
4801 {
4802 name = var;
4803 var = SSA_NAME_VAR (var);
4804 }
4805 else if (DECL_P (var))
4806 name = NULL_TREE;
4807 else
4808 goto do_rewrite;
4809
4810 if (var_ann (var)->type_mem_tag)
4811 var = var_ann (var)->type_mem_tag;
4812
4813 /* We need to add a memory tag for the variable. But we do not want
4814 to add it to the temporary used for the computations, since this leads
4815 to problems in redundancy elimination when there are common parts
4816 in two computations referring to the different arrays. So we copy
4817 the variable to a new temporary. */
4818 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4819 if (name)
4820 new_name = duplicate_ssa_name (name, copy);
4821 else
4822 {
4823 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4824 add_referenced_tmp_var (new_var);
4825 var_ann (new_var)->type_mem_tag = var;
4826 new_name = make_ssa_name (new_var, copy);
4827 }
4828 TREE_OPERAND (copy, 0) = new_name;
4829 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4830 with = new_name;
4831
4832 do_rewrite:
4833
4834 orig = NULL_TREE;
4835 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4836 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4837
4838 if (TREE_CODE (*op) == INDIRECT_REF)
4839 orig = REF_ORIGINAL (*op);
4840 if (!orig)
4841 orig = unshare_and_remove_ssa_names (*op);
4842
4843 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4844
4845 /* Record the original reference, for purposes of alias analysis. */
4846 REF_ORIGINAL (*op) = orig;
4847 }
4848
4849 /* Rewrites USE (address that is an iv) using candidate CAND. */
4850
4851 static void
4852 rewrite_use_address (struct ivopts_data *data,
4853 struct iv_use *use, struct iv_cand *cand)
4854 {
4855 tree comp = unshare_expr (get_computation (data->current_loop,
4856 use, cand));
4857 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4858 tree stmts;
4859 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4860
4861 if (stmts)
4862 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4863
4864 rewrite_address_base (&bsi, use->op_p, op);
4865 }
4866
4867 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4868 candidate CAND. */
4869
4870 static void
4871 rewrite_use_compare (struct ivopts_data *data,
4872 struct iv_use *use, struct iv_cand *cand)
4873 {
4874 tree comp;
4875 tree *op_p, cond, op, stmts, bound;
4876 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4877 enum tree_code compare;
4878
4879 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4880 {
4881 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4882 tree var_type = TREE_TYPE (var);
4883
4884 bound = fold_convert (var_type, bound);
4885 op = force_gimple_operand (unshare_expr (bound), &stmts,
4886 true, NULL_TREE);
4887
4888 if (stmts)
4889 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4890
4891 *use->op_p = build2 (compare, boolean_type_node, var, op);
4892 modify_stmt (use->stmt);
4893 return;
4894 }
4895
4896 /* The induction variable elimination failed; just express the original
4897 giv. */
4898 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4899
4900 cond = *use->op_p;
4901 op_p = &TREE_OPERAND (cond, 0);
4902 if (TREE_CODE (*op_p) != SSA_NAME
4903 || zero_p (get_iv (data, *op_p)->step))
4904 op_p = &TREE_OPERAND (cond, 1);
4905
4906 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4907 if (stmts)
4908 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4909
4910 *op_p = op;
4911 }
4912
4913 /* Ensure that operand *OP_P may be used at the end of EXIT without
4914 violating loop closed ssa form. */
4915
4916 static void
4917 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4918 {
4919 basic_block def_bb;
4920 struct loop *def_loop;
4921 tree phi, use;
4922
4923 use = USE_FROM_PTR (op_p);
4924 if (TREE_CODE (use) != SSA_NAME)
4925 return;
4926
4927 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4928 if (!def_bb)
4929 return;
4930
4931 def_loop = def_bb->loop_father;
4932 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4933 return;
4934
4935 /* Try finding a phi node that copies the value out of the loop. */
4936 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4937 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4938 break;
4939
4940 if (!phi)
4941 {
4942 /* Create such a phi node. */
4943 tree new_name = duplicate_ssa_name (use, NULL);
4944
4945 phi = create_phi_node (new_name, exit->dest);
4946 SSA_NAME_DEF_STMT (new_name) = phi;
4947 add_phi_arg (phi, use, exit);
4948 }
4949
4950 SET_USE (op_p, PHI_RESULT (phi));
4951 }
4952
4953 /* Ensure that operands of STMT may be used at the end of EXIT without
4954 violating loop closed ssa form. */
4955
4956 static void
4957 protect_loop_closed_ssa_form (edge exit, tree stmt)
4958 {
4959 use_optype uses;
4960 vuse_optype vuses;
4961 v_may_def_optype v_may_defs;
4962 unsigned i;
4963
4964 get_stmt_operands (stmt);
4965
4966 uses = STMT_USE_OPS (stmt);
4967 for (i = 0; i < NUM_USES (uses); i++)
4968 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4969
4970 vuses = STMT_VUSE_OPS (stmt);
4971 for (i = 0; i < NUM_VUSES (vuses); i++)
4972 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4973
4974 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4975 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4976 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4977 }
4978
4979 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4980 so that they are emitted on the correct place, and so that the loop closed
4981 ssa form is preserved. */
4982
4983 static void
4984 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4985 {
4986 tree_stmt_iterator tsi;
4987 block_stmt_iterator bsi;
4988 tree phi, stmt, def, next;
4989
4990 if (EDGE_COUNT (exit->dest->preds) > 1)
4991 split_loop_exit_edge (exit);
4992
4993 if (TREE_CODE (stmts) == STATEMENT_LIST)
4994 {
4995 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4996 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4997 }
4998 else
4999 protect_loop_closed_ssa_form (exit, stmts);
5000
5001 /* Ensure there is label in exit->dest, so that we can
5002 insert after it. */
5003 tree_block_label (exit->dest);
5004 bsi = bsi_after_labels (exit->dest);
5005 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5006
5007 if (!op)
5008 return;
5009
5010 for (phi = phi_nodes (exit->dest); phi; phi = next)
5011 {
5012 next = PHI_CHAIN (phi);
5013
5014 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5015 {
5016 def = PHI_RESULT (phi);
5017 remove_statement (phi, false);
5018 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5019 def, op);
5020 SSA_NAME_DEF_STMT (def) = stmt;
5021 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5022 }
5023 }
5024 }
5025
5026 /* Rewrites the final value of USE (that is only needed outside of the loop)
5027 using candidate CAND. */
5028
5029 static void
5030 rewrite_use_outer (struct ivopts_data *data,
5031 struct iv_use *use, struct iv_cand *cand)
5032 {
5033 edge exit;
5034 tree value, op, stmts, tgt;
5035 tree phi;
5036
5037 switch (TREE_CODE (use->stmt))
5038 {
5039 case PHI_NODE:
5040 tgt = PHI_RESULT (use->stmt);
5041 break;
5042 case MODIFY_EXPR:
5043 tgt = TREE_OPERAND (use->stmt, 0);
5044 break;
5045 default:
5046 gcc_unreachable ();
5047 }
5048
5049 exit = single_dom_exit (data->current_loop);
5050
5051 if (exit)
5052 {
5053 if (!cand->iv)
5054 {
5055 bool ok = may_replace_final_value (data, use, &value);
5056 gcc_assert (ok);
5057 }
5058 else
5059 value = get_computation_at (data->current_loop,
5060 use, cand, last_stmt (exit->src));
5061
5062 value = unshare_expr (value);
5063 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5064
5065 /* If we will preserve the iv anyway and we would need to perform
5066 some computation to replace the final value, do nothing. */
5067 if (stmts && name_info (data, tgt)->preserve_biv)
5068 return;
5069
5070 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5071 {
5072 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5073
5074 if (USE_FROM_PTR (use_p) == tgt)
5075 SET_USE (use_p, op);
5076 }
5077
5078 if (stmts)
5079 compute_phi_arg_on_exit (exit, stmts, op);
5080
5081 /* Enable removal of the statement. We cannot remove it directly,
5082 since we may still need the aliasing information attached to the
5083 ssa name defined by it. */
5084 name_info (data, tgt)->iv->have_use_for = false;
5085 return;
5086 }
5087
5088 /* If the variable is going to be preserved anyway, there is nothing to
5089 do. */
5090 if (name_info (data, tgt)->preserve_biv)
5091 return;
5092
5093 /* Otherwise we just need to compute the iv. */
5094 rewrite_use_nonlinear_expr (data, use, cand);
5095 }
5096
5097 /* Rewrites USE using candidate CAND. */
5098
5099 static void
5100 rewrite_use (struct ivopts_data *data,
5101 struct iv_use *use, struct iv_cand *cand)
5102 {
5103 switch (use->type)
5104 {
5105 case USE_NONLINEAR_EXPR:
5106 rewrite_use_nonlinear_expr (data, use, cand);
5107 break;
5108
5109 case USE_OUTER:
5110 rewrite_use_outer (data, use, cand);
5111 break;
5112
5113 case USE_ADDRESS:
5114 rewrite_use_address (data, use, cand);
5115 break;
5116
5117 case USE_COMPARE:
5118 rewrite_use_compare (data, use, cand);
5119 break;
5120
5121 default:
5122 gcc_unreachable ();
5123 }
5124 modify_stmt (use->stmt);
5125 }
5126
5127 /* Rewrite the uses using the selected induction variables. */
5128
5129 static void
5130 rewrite_uses (struct ivopts_data *data)
5131 {
5132 unsigned i;
5133 struct iv_cand *cand;
5134 struct iv_use *use;
5135
5136 for (i = 0; i < n_iv_uses (data); i++)
5137 {
5138 use = iv_use (data, i);
5139 cand = use->selected;
5140 gcc_assert (cand);
5141
5142 rewrite_use (data, use, cand);
5143 }
5144 }
5145
5146 /* Removes the ivs that are not used after rewriting. */
5147
5148 static void
5149 remove_unused_ivs (struct ivopts_data *data)
5150 {
5151 unsigned j;
5152 bitmap_iterator bi;
5153
5154 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5155 {
5156 struct version_info *info;
5157
5158 info = ver_info (data, j);
5159 if (info->iv
5160 && !zero_p (info->iv->step)
5161 && !info->inv_id
5162 && !info->iv->have_use_for
5163 && !info->preserve_biv)
5164 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5165 }
5166 }
5167
5168 /* Frees data allocated by the optimization of a single loop. */
5169
5170 static void
5171 free_loop_data (struct ivopts_data *data)
5172 {
5173 unsigned i, j;
5174 bitmap_iterator bi;
5175
5176 htab_empty (data->niters);
5177
5178 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5179 {
5180 struct version_info *info;
5181
5182 info = ver_info (data, i);
5183 if (info->iv)
5184 free (info->iv);
5185 info->iv = NULL;
5186 info->has_nonlin_use = false;
5187 info->preserve_biv = false;
5188 info->inv_id = 0;
5189 }
5190 bitmap_clear (data->relevant);
5191 bitmap_clear (data->important_candidates);
5192
5193 for (i = 0; i < n_iv_uses (data); i++)
5194 {
5195 struct iv_use *use = iv_use (data, i);
5196
5197 free (use->iv);
5198 BITMAP_FREE (use->related_cands);
5199 for (j = 0; j < use->n_map_members; j++)
5200 if (use->cost_map[j].depends_on)
5201 BITMAP_FREE (use->cost_map[j].depends_on);
5202 free (use->cost_map);
5203 free (use);
5204 }
5205 VARRAY_POP_ALL (data->iv_uses);
5206
5207 for (i = 0; i < n_iv_cands (data); i++)
5208 {
5209 struct iv_cand *cand = iv_cand (data, i);
5210
5211 if (cand->iv)
5212 free (cand->iv);
5213 free (cand);
5214 }
5215 VARRAY_POP_ALL (data->iv_candidates);
5216
5217 if (data->version_info_size < num_ssa_names)
5218 {
5219 data->version_info_size = 2 * num_ssa_names;
5220 free (data->version_info);
5221 data->version_info = xcalloc (data->version_info_size,
5222 sizeof (struct version_info));
5223 }
5224
5225 data->max_inv_id = 0;
5226
5227 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5228 {
5229 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5230
5231 SET_DECL_RTL (obj, NULL_RTX);
5232 }
5233 VARRAY_POP_ALL (decl_rtl_to_reset);
5234 }
5235
5236 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5237 loop tree. */
5238
5239 static void
5240 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5241 {
5242 unsigned i;
5243
5244 for (i = 1; i < loops->num; i++)
5245 if (loops->parray[i])
5246 {
5247 free (loops->parray[i]->aux);
5248 loops->parray[i]->aux = NULL;
5249 }
5250
5251 free_loop_data (data);
5252 free (data->version_info);
5253 BITMAP_FREE (data->relevant);
5254 BITMAP_FREE (data->important_candidates);
5255 htab_delete (data->niters);
5256
5257 VARRAY_FREE (decl_rtl_to_reset);
5258 VARRAY_FREE (data->iv_uses);
5259 VARRAY_FREE (data->iv_candidates);
5260 }
5261
5262 /* Optimizes the LOOP. Returns true if anything changed. */
5263
5264 static bool
5265 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5266 {
5267 bool changed = false;
5268 struct iv_ca *iv_ca;
5269 edge exit;
5270
5271 data->current_loop = loop;
5272
5273 if (dump_file && (dump_flags & TDF_DETAILS))
5274 {
5275 fprintf (dump_file, "Processing loop %d\n", loop->num);
5276
5277 exit = single_dom_exit (loop);
5278 if (exit)
5279 {
5280 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5281 exit->src->index, exit->dest->index);
5282 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5283 fprintf (dump_file, "\n");
5284 }
5285
5286 fprintf (dump_file, "\n");
5287 }
5288
5289 /* For each ssa name determines whether it behaves as an induction variable
5290 in some loop. */
5291 if (!find_induction_variables (data))
5292 goto finish;
5293
5294 /* Finds interesting uses (item 1). */
5295 find_interesting_uses (data);
5296 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5297 goto finish;
5298
5299 /* Finds candidates for the induction variables (item 2). */
5300 find_iv_candidates (data);
5301
5302 /* Calculates the costs (item 3, part 1). */
5303 determine_use_iv_costs (data);
5304 determine_iv_costs (data);
5305 determine_set_costs (data);
5306
5307 /* Find the optimal set of induction variables (item 3, part 2). */
5308 iv_ca = find_optimal_iv_set (data);
5309 if (!iv_ca)
5310 goto finish;
5311 changed = true;
5312
5313 /* Create the new induction variables (item 4, part 1). */
5314 create_new_ivs (data, iv_ca);
5315 iv_ca_free (&iv_ca);
5316
5317 /* Rewrite the uses (item 4, part 2). */
5318 rewrite_uses (data);
5319
5320 /* Remove the ivs that are unused after rewriting. */
5321 remove_unused_ivs (data);
5322
5323 /* We have changed the structure of induction variables; it might happen
5324 that definitions in the scev database refer to some of them that were
5325 eliminated. */
5326 scev_reset ();
5327
5328 finish:
5329 free_loop_data (data);
5330
5331 return changed;
5332 }
5333
5334 /* Main entry point. Optimizes induction variables in LOOPS. */
5335
5336 void
5337 tree_ssa_iv_optimize (struct loops *loops)
5338 {
5339 struct loop *loop;
5340 struct ivopts_data data;
5341
5342 tree_ssa_iv_optimize_init (loops, &data);
5343
5344 /* Optimize the loops starting with the innermost ones. */
5345 loop = loops->tree_root;
5346 while (loop->inner)
5347 loop = loop->inner;
5348
5349 #ifdef ENABLE_CHECKING
5350 verify_loop_closed_ssa ();
5351 verify_stmts ();
5352 #endif
5353
5354 /* Scan the loops, inner ones first. */
5355 while (loop != loops->tree_root)
5356 {
5357 if (dump_file && (dump_flags & TDF_DETAILS))
5358 flow_loop_dump (loop, dump_file, NULL, 1);
5359
5360 tree_ssa_iv_optimize_loop (&data, loop);
5361
5362 if (loop->next)
5363 {
5364 loop = loop->next;
5365 while (loop->inner)
5366 loop = loop->inner;
5367 }
5368 else
5369 loop = loop->outer;
5370 }
5371
5372 #ifdef ENABLE_CHECKING
5373 verify_loop_closed_ssa ();
5374 verify_stmts ();
5375 #endif
5376
5377 tree_ssa_iv_optimize_finalize (loops, &data);
5378 }