* lib/wrapper.exp (${tool}_maybe_build_wrapper): Don't unset gluefile.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92
93 /* The infinite cost. */
94 #define INFTY 10000000
95
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
99
100
101 /* Representation of the induction variable. */
102 struct iv
103 {
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111 };
112
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
115 {
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122 };
123
124 /* Information attached to loop. */
125 struct loop_data
126 {
127 unsigned regs_used; /* Number of registers used. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137 };
138
139 /* The candidate - cost pair. */
140 struct cost_pair
141 {
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 };
147
148 /* Use. */
149 struct iv_use
150 {
151 unsigned id; /* The id of the use. */
152 enum use_type type; /* Type of the use. */
153 struct iv *iv; /* The induction variable it is based on. */
154 tree stmt; /* Statement in that it occurs. */
155 tree *op_p; /* The place where it occurs. */
156 bitmap related_cands; /* The set of "related" iv candidates, plus the common
157 important ones. */
158
159 unsigned n_map_members; /* Number of candidates in the cost_map list. */
160 struct cost_pair *cost_map;
161 /* The costs wrto the iv candidates. */
162
163 struct iv_cand *selected;
164 /* The selected candidate. */
165 };
166
167 /* The position where the iv is computed. */
168 enum iv_position
169 {
170 IP_NORMAL, /* At the end, just before the exit condition. */
171 IP_END, /* At the end of the latch block. */
172 IP_ORIGINAL /* The original biv. */
173 };
174
175 /* The induction variable candidate. */
176 struct iv_cand
177 {
178 unsigned id; /* The number of the candidate. */
179 bool important; /* Whether this is an "important" candidate, i.e. such
180 that it should be considered by all uses. */
181 enum iv_position pos; /* Where it is computed. */
182 tree incremented_at; /* For original biv, the statement where it is
183 incremented. */
184 tree var_before; /* The variable used for it before increment. */
185 tree var_after; /* The variable used for it after increment. */
186 struct iv *iv; /* The value of the candidate. NULL for
187 "pseudocandidate" used to indicate the possibility
188 to replace the final value of an iv by direct
189 computation of the value. */
190 unsigned cost; /* Cost of the candidate. */
191 };
192
193 /* The data used by the induction variable optimizations. */
194
195 struct ivopts_data
196 {
197 /* The currently optimized loop. */
198 struct loop *current_loop;
199
200 /* Numbers of iterations for all exits of the current loop. */
201 htab_t niters;
202
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
205
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
208
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
211
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
214
215 /* The uses of induction variables. */
216 varray_type iv_uses;
217
218 /* The candidates. */
219 varray_type iv_candidates;
220
221 /* A bitmap of important candidates. */
222 bitmap important_candidates;
223
224 /* Whether to consider just related and important candidates when replacing a
225 use. */
226 bool consider_all_candidates;
227 };
228
229 /* An assignment of iv candidates to uses. */
230
231 struct iv_ca
232 {
233 /* The number of uses covered by the assignment. */
234 unsigned upto;
235
236 /* Number of uses that cannot be expressed by the candidates in the set. */
237 unsigned bad_uses;
238
239 /* Candidate assigned to a use, together with the related costs. */
240 struct cost_pair **cand_for_use;
241
242 /* Number of times each candidate is used. */
243 unsigned *n_cand_uses;
244
245 /* The candidates used. */
246 bitmap cands;
247
248 /* The number of candidates in the set. */
249 unsigned n_cands;
250
251 /* Total number of registers needed. */
252 unsigned n_regs;
253
254 /* Total cost of expressing uses. */
255 unsigned cand_use_cost;
256
257 /* Total cost of candidates. */
258 unsigned cand_cost;
259
260 /* Number of times each invariant is used. */
261 unsigned *n_invariant_uses;
262
263 /* Total cost of the assignment. */
264 unsigned cost;
265 };
266
267 /* Difference of two iv candidate assignments. */
268
269 struct iv_ca_delta
270 {
271 /* Changed use. */
272 struct iv_use *use;
273
274 /* An old assignment (for rollback purposes). */
275 struct cost_pair *old_cp;
276
277 /* A new assignment. */
278 struct cost_pair *new_cp;
279
280 /* Next change in the list. */
281 struct iv_ca_delta *next_change;
282 };
283
284 /* Bound on number of candidates below that all candidates are considered. */
285
286 #define CONSIDER_ALL_CANDIDATES_BOUND \
287 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288
289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
290 optimizing such a loop would help, and it would take ages). */
291
292 #define MAX_CONSIDERED_USES \
293 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294
295 /* If there are at most this number of ivs in the set, try removing unnecessary
296 ivs from the set always. */
297
298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300
301 /* The list of trees for that the decl_rtl field must be reset is stored
302 here. */
303
304 static varray_type decl_rtl_to_reset;
305
306 /* Number of uses recorded in DATA. */
307
308 static inline unsigned
309 n_iv_uses (struct ivopts_data *data)
310 {
311 return VARRAY_ACTIVE_SIZE (data->iv_uses);
312 }
313
314 /* Ith use recorded in DATA. */
315
316 static inline struct iv_use *
317 iv_use (struct ivopts_data *data, unsigned i)
318 {
319 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
320 }
321
322 /* Number of candidates recorded in DATA. */
323
324 static inline unsigned
325 n_iv_cands (struct ivopts_data *data)
326 {
327 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
328 }
329
330 /* Ith candidate recorded in DATA. */
331
332 static inline struct iv_cand *
333 iv_cand (struct ivopts_data *data, unsigned i)
334 {
335 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
336 }
337
338 /* The data for LOOP. */
339
340 static inline struct loop_data *
341 loop_data (struct loop *loop)
342 {
343 return loop->aux;
344 }
345
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
347
348 static edge
349 single_dom_exit (struct loop *loop)
350 {
351 edge exit = loop->single_exit;
352
353 if (!exit)
354 return NULL;
355
356 if (!just_once_each_iteration_p (loop, exit->src))
357 return NULL;
358
359 return exit;
360 }
361
362 /* Dumps information about the induction variable IV to FILE. */
363
364 extern void dump_iv (FILE *, struct iv *);
365 void
366 dump_iv (FILE *file, struct iv *iv)
367 {
368 if (iv->ssa_name)
369 {
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
373 }
374
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
378
379 if (iv->step)
380 {
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
384
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
388 }
389 else
390 {
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
394 }
395
396 if (iv->base_object)
397 {
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402
403 if (iv->biv_p)
404 fprintf (file, " is a biv\n");
405 }
406
407 /* Dumps information about the USE to FILE. */
408
409 extern void dump_use (FILE *, struct iv_use *);
410 void
411 dump_use (FILE *file, struct iv_use *use)
412 {
413 fprintf (file, "use %d\n", use->id);
414
415 switch (use->type)
416 {
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
419 break;
420
421 case USE_OUTER:
422 fprintf (file, " outside\n");
423 break;
424
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
428
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
432
433 default:
434 gcc_unreachable ();
435 }
436
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
440
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
445
446 dump_iv (file, use->iv);
447
448 if (use->related_cands)
449 {
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
452 }
453 }
454
455 /* Dumps information about the uses to FILE. */
456
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
460 {
461 unsigned i;
462 struct iv_use *use;
463
464 for (i = 0; i < n_iv_uses (data); i++)
465 {
466 use = iv_use (data, i);
467
468 dump_use (file, use);
469 fprintf (file, "\n");
470 }
471 }
472
473 /* Dumps information about induction variable candidate CAND to FILE. */
474
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
478 {
479 struct iv *iv = cand->iv;
480
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
483
484 if (!iv)
485 {
486 fprintf (file, " final value replacement\n");
487 return;
488 }
489
490 switch (cand->pos)
491 {
492 case IP_NORMAL:
493 fprintf (file, " incremented before exit test\n");
494 break;
495
496 case IP_END:
497 fprintf (file, " incremented at end\n");
498 break;
499
500 case IP_ORIGINAL:
501 fprintf (file, " original biv\n");
502 break;
503 }
504
505 dump_iv (file, iv);
506 }
507
508 /* Returns the info for ssa version VER. */
509
510 static inline struct version_info *
511 ver_info (struct ivopts_data *data, unsigned ver)
512 {
513 return data->version_info + ver;
514 }
515
516 /* Returns the info for ssa name NAME. */
517
518 static inline struct version_info *
519 name_info (struct ivopts_data *data, tree name)
520 {
521 return ver_info (data, SSA_NAME_VERSION (name));
522 }
523
524 /* Checks whether there exists number X such that X * B = A, counting modulo
525 2^BITS. */
526
527 static bool
528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
529 HOST_WIDE_INT *x)
530 {
531 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
532 unsigned HOST_WIDE_INT inv, ex, val;
533 unsigned i;
534
535 a &= mask;
536 b &= mask;
537
538 /* First divide the whole equation by 2 as long as possible. */
539 while (!(a & 1) && !(b & 1))
540 {
541 a >>= 1;
542 b >>= 1;
543 bits--;
544 mask >>= 1;
545 }
546
547 if (!(b & 1))
548 {
549 /* If b is still even, a is odd and there is no such x. */
550 return false;
551 }
552
553 /* Find the inverse of b. We compute it as
554 b^(2^(bits - 1) - 1) (mod 2^bits). */
555 inv = 1;
556 ex = b;
557 for (i = 0; i < bits - 1; i++)
558 {
559 inv = (inv * ex) & mask;
560 ex = (ex * ex) & mask;
561 }
562
563 val = (a * inv) & mask;
564
565 gcc_assert (((val * b) & mask) == a);
566
567 if ((val >> (bits - 1)) & 1)
568 val |= ~mask;
569
570 *x = val;
571
572 return true;
573 }
574
575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
576 emitted in LOOP. */
577
578 static bool
579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580 {
581 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582
583 gcc_assert (bb);
584
585 if (sbb == loop->latch)
586 return true;
587
588 if (sbb != bb)
589 return false;
590
591 return stmt == last_stmt (bb);
592 }
593
594 /* Returns true if STMT if after the place where the original induction
595 variable CAND is incremented. */
596
597 static bool
598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599 {
600 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
601 basic_block stmt_bb = bb_for_stmt (stmt);
602 block_stmt_iterator bsi;
603
604 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
605 return false;
606
607 if (stmt_bb != cand_bb)
608 return true;
609
610 /* Scan the block from the end, since the original ivs are usually
611 incremented at the end of the loop body. */
612 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 {
614 if (bsi_stmt (bsi) == cand->incremented_at)
615 return false;
616 if (bsi_stmt (bsi) == stmt)
617 return true;
618 }
619 }
620
621 /* Returns true if STMT if after the place where the induction variable
622 CAND is incremented in LOOP. */
623
624 static bool
625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626 {
627 switch (cand->pos)
628 {
629 case IP_END:
630 return false;
631
632 case IP_NORMAL:
633 return stmt_after_ip_normal_pos (loop, stmt);
634
635 case IP_ORIGINAL:
636 return stmt_after_ip_original_pos (cand, stmt);
637
638 default:
639 gcc_unreachable ();
640 }
641 }
642
643 /* Element of the table in that we cache the numbers of iterations obtained
644 from exits of the loop. */
645
646 struct nfe_cache_elt
647 {
648 /* The edge for that the number of iterations is cached. */
649 edge exit;
650
651 /* True if the # of iterations was successfully determined. */
652 bool valid_p;
653
654 /* Description of # of iterations. */
655 struct tree_niter_desc niter;
656 };
657
658 /* Hash function for nfe_cache_elt E. */
659
660 static hashval_t
661 nfe_hash (const void *e)
662 {
663 const struct nfe_cache_elt *elt = e;
664
665 return htab_hash_pointer (elt->exit);
666 }
667
668 /* Equality function for nfe_cache_elt E1 and edge E2. */
669
670 static int
671 nfe_eq (const void *e1, const void *e2)
672 {
673 const struct nfe_cache_elt *elt1 = e1;
674
675 return elt1->exit == e2;
676 }
677
678 /* Returns structure describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
680
681 static struct tree_niter_desc *
682 niter_for_exit (struct ivopts_data *data, edge exit)
683 {
684 struct nfe_cache_elt *nfe_desc;
685 PTR *slot;
686
687 slot = htab_find_slot_with_hash (data->niters, exit,
688 htab_hash_pointer (exit),
689 INSERT);
690
691 if (!*slot)
692 {
693 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
694 nfe_desc->exit = exit;
695 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
696 exit, &nfe_desc->niter);
697 *slot = nfe_desc;
698 }
699 else
700 nfe_desc = *slot;
701
702 if (!nfe_desc->valid_p)
703 return NULL;
704
705 return &nfe_desc->niter;
706 }
707
708 /* Returns structure describing number of iterations determined from
709 single dominating exit of DATA->current_loop, or NULL if something
710 goes wrong. */
711
712 static struct tree_niter_desc *
713 niter_for_single_dom_exit (struct ivopts_data *data)
714 {
715 edge exit = single_dom_exit (data->current_loop);
716
717 if (!exit)
718 return NULL;
719
720 return niter_for_exit (data, exit);
721 }
722
723 /* Initializes data structures used by the iv optimization pass, stored
724 in DATA. LOOPS is the loop tree. */
725
726 static void
727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
728 {
729 unsigned i;
730
731 data->version_info_size = 2 * num_ssa_names;
732 data->version_info = xcalloc (data->version_info_size,
733 sizeof (struct version_info));
734 data->relevant = BITMAP_ALLOC (NULL);
735 data->important_candidates = BITMAP_ALLOC (NULL);
736 data->max_inv_id = 0;
737 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
738
739 for (i = 1; i < loops->num; i++)
740 if (loops->parray[i])
741 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
742
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
744 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
745 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
746 }
747
748 /* Returns a memory object to that EXPR points. In case we are able to
749 determine that it does not point to any such object, NULL is returned. */
750
751 static tree
752 determine_base_object (tree expr)
753 {
754 enum tree_code code = TREE_CODE (expr);
755 tree base, obj, op0, op1;
756
757 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
758 return NULL_TREE;
759
760 switch (code)
761 {
762 case INTEGER_CST:
763 return NULL_TREE;
764
765 case ADDR_EXPR:
766 obj = TREE_OPERAND (expr, 0);
767 base = get_base_address (obj);
768
769 if (!base)
770 return expr;
771
772 if (TREE_CODE (base) == INDIRECT_REF)
773 return determine_base_object (TREE_OPERAND (base, 0));
774
775 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
776
777 case PLUS_EXPR:
778 case MINUS_EXPR:
779 op0 = determine_base_object (TREE_OPERAND (expr, 0));
780 op1 = determine_base_object (TREE_OPERAND (expr, 1));
781
782 if (!op1)
783 return op0;
784
785 if (!op0)
786 return (code == PLUS_EXPR
787 ? op1
788 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
789
790 return fold (build (code, ptr_type_node, op0, op1));
791
792 case NOP_EXPR:
793 case CONVERT_EXPR:
794 return determine_base_object (TREE_OPERAND (expr, 0));
795
796 default:
797 return fold_convert (ptr_type_node, expr);
798 }
799 }
800
801 /* Allocates an induction variable with given initial value BASE and step STEP
802 for loop LOOP. */
803
804 static struct iv *
805 alloc_iv (tree base, tree step)
806 {
807 struct iv *iv = xcalloc (1, sizeof (struct iv));
808
809 if (step && integer_zerop (step))
810 step = NULL_TREE;
811
812 iv->base = base;
813 iv->base_object = determine_base_object (base);
814 iv->step = step;
815 iv->biv_p = false;
816 iv->have_use_for = false;
817 iv->use_id = 0;
818 iv->ssa_name = NULL_TREE;
819
820 return iv;
821 }
822
823 /* Sets STEP and BASE for induction variable IV. */
824
825 static void
826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
827 {
828 struct version_info *info = name_info (data, iv);
829
830 gcc_assert (!info->iv);
831
832 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
833 info->iv = alloc_iv (base, step);
834 info->iv->ssa_name = iv;
835 }
836
837 /* Finds induction variable declaration for VAR. */
838
839 static struct iv *
840 get_iv (struct ivopts_data *data, tree var)
841 {
842 basic_block bb;
843
844 if (!name_info (data, var)->iv)
845 {
846 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
847
848 if (!bb
849 || !flow_bb_inside_loop_p (data->current_loop, bb))
850 set_iv (data, var, var, NULL_TREE);
851 }
852
853 return name_info (data, var)->iv;
854 }
855
856 /* Determines the step of a biv defined in PHI. */
857
858 static tree
859 determine_biv_step (tree phi)
860 {
861 struct loop *loop = bb_for_stmt (phi)->loop_father;
862 tree name = PHI_RESULT (phi), base, step;
863 tree type = TREE_TYPE (name);
864
865 if (!is_gimple_reg (name))
866 return NULL_TREE;
867
868 if (!simple_iv (loop, phi, name, &base, &step))
869 return NULL_TREE;
870
871 if (!step)
872 return build_int_cst (type, 0);
873
874 return step;
875 }
876
877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
878
879 static bool
880 abnormal_ssa_name_p (tree exp)
881 {
882 if (!exp)
883 return false;
884
885 if (TREE_CODE (exp) != SSA_NAME)
886 return false;
887
888 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
889 }
890
891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
892 abnormal phi node. Callback for for_each_index. */
893
894 static bool
895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
896 void *data ATTRIBUTE_UNUSED)
897 {
898 if (TREE_CODE (base) == ARRAY_REF)
899 {
900 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
901 return false;
902 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
903 return false;
904 }
905
906 return !abnormal_ssa_name_p (*index);
907 }
908
909 /* Returns true if EXPR contains a ssa name that occurs in an
910 abnormal phi node. */
911
912 static bool
913 contains_abnormal_ssa_name_p (tree expr)
914 {
915 enum tree_code code = TREE_CODE (expr);
916 enum tree_code_class class = TREE_CODE_CLASS (code);
917
918 if (code == SSA_NAME)
919 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
920
921 if (code == INTEGER_CST
922 || is_gimple_min_invariant (expr))
923 return false;
924
925 if (code == ADDR_EXPR)
926 return !for_each_index (&TREE_OPERAND (expr, 0),
927 idx_contains_abnormal_ssa_name_p,
928 NULL);
929
930 switch (class)
931 {
932 case tcc_binary:
933 case tcc_comparison:
934 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
935 return true;
936
937 /* Fallthru. */
938 case tcc_unary:
939 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
940 return true;
941
942 break;
943
944 default:
945 gcc_unreachable ();
946 }
947
948 return false;
949 }
950
951 /* Finds basic ivs. */
952
953 static bool
954 find_bivs (struct ivopts_data *data)
955 {
956 tree phi, step, type, base;
957 bool found = false;
958 struct loop *loop = data->current_loop;
959
960 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
961 {
962 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
963 continue;
964
965 step = determine_biv_step (phi);
966
967 if (!step)
968 continue;
969 if (cst_and_fits_in_hwi (step)
970 && int_cst_value (step) == 0)
971 continue;
972
973 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
974 if (contains_abnormal_ssa_name_p (base))
975 continue;
976
977 type = TREE_TYPE (PHI_RESULT (phi));
978 base = fold_convert (type, base);
979 step = fold_convert (type, step);
980
981 /* FIXME: We do not handle induction variables whose step does
982 not satisfy cst_and_fits_in_hwi. */
983 if (!cst_and_fits_in_hwi (step))
984 continue;
985
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
988 }
989
990 return found;
991 }
992
993 /* Marks basic ivs. */
994
995 static void
996 mark_bivs (struct ivopts_data *data)
997 {
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1002
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004 {
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1008
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1013
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1019
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1022 }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to BASE and STEP. */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030 tree *base, tree *step)
1031 {
1032 tree lhs;
1033 struct loop *loop = data->current_loop;
1034
1035 *base = NULL_TREE;
1036 *step = NULL_TREE;
1037
1038 if (TREE_CODE (stmt) != MODIFY_EXPR)
1039 return false;
1040
1041 lhs = TREE_OPERAND (stmt, 0);
1042 if (TREE_CODE (lhs) != SSA_NAME)
1043 return false;
1044
1045 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046 return false;
1047
1048 /* FIXME: We do not handle induction variables whose step does
1049 not satisfy cst_and_fits_in_hwi. */
1050 if (!zero_p (*step)
1051 && !cst_and_fits_in_hwi (*step))
1052 return false;
1053
1054 if (contains_abnormal_ssa_name_p (*base))
1055 return false;
1056
1057 return true;
1058 }
1059
1060 /* Finds general ivs in statement STMT. */
1061
1062 static void
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1064 {
1065 tree base, step;
1066
1067 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068 return;
1069
1070 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1071 }
1072
1073 /* Finds general ivs in basic block BB. */
1074
1075 static void
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1077 {
1078 block_stmt_iterator bsi;
1079
1080 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081 find_givs_in_stmt (data, bsi_stmt (bsi));
1082 }
1083
1084 /* Finds general ivs. */
1085
1086 static void
1087 find_givs (struct ivopts_data *data)
1088 {
1089 struct loop *loop = data->current_loop;
1090 basic_block *body = get_loop_body_in_dom_order (loop);
1091 unsigned i;
1092
1093 for (i = 0; i < loop->num_nodes; i++)
1094 find_givs_in_bb (data, body[i]);
1095 free (body);
1096 }
1097
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099 variable and if so, its initial value and step. */
1100
1101 static bool
1102 find_induction_variables (struct ivopts_data *data)
1103 {
1104 unsigned i;
1105 bitmap_iterator bi;
1106
1107 if (!find_bivs (data))
1108 return false;
1109
1110 find_givs (data);
1111 mark_bivs (data);
1112
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1114 {
1115 struct tree_niter_desc *niter;
1116
1117 niter = niter_for_single_dom_exit (data);
1118
1119 if (niter)
1120 {
1121 fprintf (dump_file, " number of iterations ");
1122 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123 fprintf (dump_file, "\n");
1124
1125 fprintf (dump_file, " may be zero if ");
1126 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127 fprintf (dump_file, "\n");
1128 fprintf (dump_file, "\n");
1129 };
1130
1131 fprintf (dump_file, "Induction variables:\n\n");
1132
1133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1134 {
1135 if (ver_info (data, i)->iv)
1136 dump_iv (dump_file, ver_info (data, i)->iv);
1137 }
1138 }
1139
1140 return true;
1141 }
1142
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1144
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147 tree stmt, enum use_type use_type)
1148 {
1149 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1150
1151 use->id = n_iv_uses (data);
1152 use->type = use_type;
1153 use->iv = iv;
1154 use->stmt = stmt;
1155 use->op_p = use_p;
1156 use->related_cands = BITMAP_ALLOC (NULL);
1157
1158 /* To avoid showing ssa name in the dumps, if it was not reset by the
1159 caller. */
1160 iv->ssa_name = NULL_TREE;
1161
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 dump_use (dump_file, use);
1164
1165 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1166
1167 return use;
1168 }
1169
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171 NONLINEAR_USE is true if the invariant is used in a way we do not
1172 handle specially. */
1173
1174 static void
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1176 {
1177 basic_block bb;
1178 struct version_info *info;
1179
1180 if (TREE_CODE (op) != SSA_NAME
1181 || !is_gimple_reg (op))
1182 return;
1183
1184 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185 if (bb
1186 && flow_bb_inside_loop_p (data->current_loop, bb))
1187 return;
1188
1189 info = name_info (data, op);
1190 info->name = op;
1191 info->has_nonlin_use |= nonlinear_use;
1192 if (!info->inv_id)
1193 info->inv_id = ++data->max_inv_id;
1194 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1195 }
1196
1197 /* Checks whether the use OP is interesting and if so, records it
1198 as TYPE. */
1199
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202 enum use_type type)
1203 {
1204 struct iv *iv;
1205 struct iv *civ;
1206 tree stmt;
1207 struct iv_use *use;
1208
1209 if (TREE_CODE (op) != SSA_NAME)
1210 return NULL;
1211
1212 iv = get_iv (data, op);
1213 if (!iv)
1214 return NULL;
1215
1216 if (iv->have_use_for)
1217 {
1218 use = iv_use (data, iv->use_id);
1219
1220 gcc_assert (use->type == USE_NONLINEAR_EXPR
1221 || use->type == USE_OUTER);
1222
1223 if (type == USE_NONLINEAR_EXPR)
1224 use->type = USE_NONLINEAR_EXPR;
1225 return use;
1226 }
1227
1228 if (zero_p (iv->step))
1229 {
1230 record_invariant (data, op, true);
1231 return NULL;
1232 }
1233 iv->have_use_for = true;
1234
1235 civ = xmalloc (sizeof (struct iv));
1236 *civ = *iv;
1237
1238 stmt = SSA_NAME_DEF_STMT (op);
1239 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240 || TREE_CODE (stmt) == MODIFY_EXPR);
1241
1242 use = record_use (data, NULL, civ, stmt, type);
1243 iv->use_id = use->id;
1244
1245 return use;
1246 }
1247
1248 /* Checks whether the use OP is interesting and if so, records it. */
1249
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1252 {
1253 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1254 }
1255
1256 /* Records a definition of induction variable OP that is used outside of the
1257 loop. */
1258
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1261 {
1262 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1263 }
1264
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266 and if so, records it. */
1267
1268 static void
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1270 {
1271 tree *op0_p;
1272 tree *op1_p;
1273 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274 struct iv const_iv;
1275 tree zero = integer_zero_node;
1276
1277 const_iv.step = NULL_TREE;
1278
1279 if (integer_zerop (*cond_p)
1280 || integer_nonzerop (*cond_p))
1281 return;
1282
1283 if (TREE_CODE (*cond_p) == SSA_NAME)
1284 {
1285 op0_p = cond_p;
1286 op1_p = &zero;
1287 }
1288 else
1289 {
1290 op0_p = &TREE_OPERAND (*cond_p, 0);
1291 op1_p = &TREE_OPERAND (*cond_p, 1);
1292 }
1293
1294 if (TREE_CODE (*op0_p) == SSA_NAME)
1295 iv0 = get_iv (data, *op0_p);
1296 else
1297 iv0 = &const_iv;
1298
1299 if (TREE_CODE (*op1_p) == SSA_NAME)
1300 iv1 = get_iv (data, *op1_p);
1301 else
1302 iv1 = &const_iv;
1303
1304 if (/* When comparing with non-invariant value, we may not do any senseful
1305 induction variable elimination. */
1306 (!iv0 || !iv1)
1307 /* Eliminating condition based on two ivs would be nontrivial.
1308 ??? TODO -- it is not really important to handle this case. */
1309 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1310 {
1311 find_interesting_uses_op (data, *op0_p);
1312 find_interesting_uses_op (data, *op1_p);
1313 return;
1314 }
1315
1316 if (zero_p (iv0->step) && zero_p (iv1->step))
1317 {
1318 /* If both are invariants, this is a work for unswitching. */
1319 return;
1320 }
1321
1322 civ = xmalloc (sizeof (struct iv));
1323 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1325 }
1326
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328 i.e. if all its operands are defined outside of the LOOP. */
1329
1330 bool
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1332 {
1333 basic_block def_bb;
1334 unsigned i, len;
1335
1336 if (is_gimple_min_invariant (expr))
1337 return true;
1338
1339 if (TREE_CODE (expr) == SSA_NAME)
1340 {
1341 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342 if (def_bb
1343 && flow_bb_inside_loop_p (loop, def_bb))
1344 return false;
1345
1346 return true;
1347 }
1348
1349 if (!EXPR_P (expr))
1350 return false;
1351
1352 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353 for (i = 0; i < len; i++)
1354 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355 return false;
1356
1357 return true;
1358 }
1359
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361 initial ones. Returns false when the value of the index cannot be determined.
1362 Callback for for_each_index. */
1363
1364 struct ifs_ivopts_data
1365 {
1366 struct ivopts_data *ivopts_data;
1367 tree stmt;
1368 tree *step_p;
1369 };
1370
1371 static bool
1372 idx_find_step (tree base, tree *idx, void *data)
1373 {
1374 struct ifs_ivopts_data *dta = data;
1375 struct iv *iv;
1376 tree step, type, iv_type, iv_step, lbound, off;
1377 struct loop *loop = dta->ivopts_data->current_loop;
1378
1379 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381 return false;
1382
1383 /* If base is a component ref, require that the offset of the reference
1384 be invariant. */
1385 if (TREE_CODE (base) == COMPONENT_REF)
1386 {
1387 off = component_ref_field_offset (base);
1388 return expr_invariant_in_loop_p (loop, off);
1389 }
1390
1391 /* If base is array, first check whether we will be able to move the
1392 reference out of the loop (in order to take its address in strength
1393 reduction). In order for this to work we need both lower bound
1394 and step to be loop invariants. */
1395 if (TREE_CODE (base) == ARRAY_REF)
1396 {
1397 step = array_ref_element_size (base);
1398 lbound = array_ref_low_bound (base);
1399
1400 if (!expr_invariant_in_loop_p (loop, step)
1401 || !expr_invariant_in_loop_p (loop, lbound))
1402 return false;
1403 }
1404
1405 if (TREE_CODE (*idx) != SSA_NAME)
1406 return true;
1407
1408 iv = get_iv (dta->ivopts_data, *idx);
1409 if (!iv)
1410 return false;
1411
1412 *idx = iv->base;
1413
1414 if (!iv->step)
1415 return true;
1416
1417 iv_type = TREE_TYPE (iv->base);
1418 type = build_pointer_type (TREE_TYPE (base));
1419 if (TREE_CODE (base) == ARRAY_REF)
1420 {
1421 step = array_ref_element_size (base);
1422
1423 /* We only handle addresses whose step is an integer constant. */
1424 if (TREE_CODE (step) != INTEGER_CST)
1425 return false;
1426 }
1427 else
1428 /* The step for pointer arithmetics already is 1 byte. */
1429 step = build_int_cst (type, 1);
1430
1431 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433 type, iv->base, iv->step, dta->stmt);
1434 else
1435 iv_step = fold_convert (iv_type, iv->step);
1436
1437 if (!iv_step)
1438 {
1439 /* The index might wrap. */
1440 return false;
1441 }
1442
1443 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1444
1445 if (!*dta->step_p)
1446 *dta->step_p = step;
1447 else
1448 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449 *dta->step_p, step);
1450
1451 return true;
1452 }
1453
1454 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1455 object is passed to it in DATA. */
1456
1457 static bool
1458 idx_record_use (tree base, tree *idx,
1459 void *data)
1460 {
1461 find_interesting_uses_op (data, *idx);
1462 if (TREE_CODE (base) == ARRAY_REF)
1463 {
1464 find_interesting_uses_op (data, array_ref_element_size (base));
1465 find_interesting_uses_op (data, array_ref_low_bound (base));
1466 }
1467 return true;
1468 }
1469
1470 /* Returns true if memory reference REF may be unaligned. */
1471
1472 static bool
1473 may_be_unaligned_p (tree ref)
1474 {
1475 tree base;
1476 tree base_type;
1477 HOST_WIDE_INT bitsize;
1478 HOST_WIDE_INT bitpos;
1479 tree toffset;
1480 enum machine_mode mode;
1481 int unsignedp, volatilep;
1482 unsigned base_align;
1483
1484 /* The test below is basically copy of what expr.c:normal_inner_ref
1485 does to check whether the object must be loaded by parts when
1486 STRICT_ALIGNMENT is true. */
1487 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488 &unsignedp, &volatilep, true);
1489 base_type = TREE_TYPE (base);
1490 base_align = TYPE_ALIGN (base_type);
1491
1492 if (mode != BLKmode
1493 && (base_align < GET_MODE_ALIGNMENT (mode)
1494 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495 || bitpos % BITS_PER_UNIT != 0))
1496 return true;
1497
1498 return false;
1499 }
1500
1501 /* Finds addresses in *OP_P inside STMT. */
1502
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505 {
1506 tree base = unshare_expr (*op_p), step = NULL;
1507 struct iv *civ;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1509
1510 /* Ignore bitfields for now. Not really something terribly complicated
1511 to handle. TODO. */
1512 if (TREE_CODE (base) == COMPONENT_REF
1513 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1514 goto fail;
1515
1516 if (STRICT_ALIGNMENT
1517 && may_be_unaligned_p (base))
1518 goto fail;
1519
1520 ifs_ivopts_data.ivopts_data = data;
1521 ifs_ivopts_data.stmt = stmt;
1522 ifs_ivopts_data.step_p = &step;
1523 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1524 || zero_p (step))
1525 goto fail;
1526
1527 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1528 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1529
1530 if (TREE_CODE (base) == INDIRECT_REF)
1531 base = TREE_OPERAND (base, 0);
1532 else
1533 base = build_addr (base);
1534
1535 civ = alloc_iv (base, step);
1536 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1537 return;
1538
1539 fail:
1540 for_each_index (op_p, idx_record_use, data);
1541 }
1542
1543 /* Finds and records invariants used in STMT. */
1544
1545 static void
1546 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1547 {
1548 use_optype uses = NULL;
1549 unsigned i, n;
1550 tree op;
1551
1552 if (TREE_CODE (stmt) == PHI_NODE)
1553 n = PHI_NUM_ARGS (stmt);
1554 else
1555 {
1556 get_stmt_operands (stmt);
1557 uses = STMT_USE_OPS (stmt);
1558 n = NUM_USES (uses);
1559 }
1560
1561 for (i = 0; i < n; i++)
1562 {
1563 if (TREE_CODE (stmt) == PHI_NODE)
1564 op = PHI_ARG_DEF (stmt, i);
1565 else
1566 op = USE_OP (uses, i);
1567
1568 record_invariant (data, op, false);
1569 }
1570 }
1571
1572 /* Finds interesting uses of induction variables in the statement STMT. */
1573
1574 static void
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 {
1577 struct iv *iv;
1578 tree op, lhs, rhs;
1579 use_optype uses = NULL;
1580 unsigned i, n;
1581
1582 find_invariants_stmt (data, stmt);
1583
1584 if (TREE_CODE (stmt) == COND_EXPR)
1585 {
1586 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1587 return;
1588 }
1589
1590 if (TREE_CODE (stmt) == MODIFY_EXPR)
1591 {
1592 lhs = TREE_OPERAND (stmt, 0);
1593 rhs = TREE_OPERAND (stmt, 1);
1594
1595 if (TREE_CODE (lhs) == SSA_NAME)
1596 {
1597 /* If the statement defines an induction variable, the uses are not
1598 interesting by themselves. */
1599
1600 iv = get_iv (data, lhs);
1601
1602 if (iv && !zero_p (iv->step))
1603 return;
1604 }
1605
1606 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 {
1608 case tcc_comparison:
1609 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1610 return;
1611
1612 case tcc_reference:
1613 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1616 return;
1617
1618 default: ;
1619 }
1620
1621 if (REFERENCE_CLASS_P (lhs)
1622 && is_gimple_val (rhs))
1623 {
1624 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1625 find_interesting_uses_op (data, rhs);
1626 return;
1627 }
1628
1629 /* TODO -- we should also handle address uses of type
1630
1631 memory = call (whatever);
1632
1633 and
1634
1635 call (memory). */
1636 }
1637
1638 if (TREE_CODE (stmt) == PHI_NODE
1639 && bb_for_stmt (stmt) == data->current_loop->header)
1640 {
1641 lhs = PHI_RESULT (stmt);
1642 iv = get_iv (data, lhs);
1643
1644 if (iv && !zero_p (iv->step))
1645 return;
1646 }
1647
1648 if (TREE_CODE (stmt) == PHI_NODE)
1649 n = PHI_NUM_ARGS (stmt);
1650 else
1651 {
1652 uses = STMT_USE_OPS (stmt);
1653 n = NUM_USES (uses);
1654 }
1655
1656 for (i = 0; i < n; i++)
1657 {
1658 if (TREE_CODE (stmt) == PHI_NODE)
1659 op = PHI_ARG_DEF (stmt, i);
1660 else
1661 op = USE_OP (uses, i);
1662
1663 if (TREE_CODE (op) != SSA_NAME)
1664 continue;
1665
1666 iv = get_iv (data, op);
1667 if (!iv)
1668 continue;
1669
1670 find_interesting_uses_op (data, op);
1671 }
1672 }
1673
1674 /* Finds interesting uses of induction variables outside of loops
1675 on loop exit edge EXIT. */
1676
1677 static void
1678 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1679 {
1680 tree phi, def;
1681
1682 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1683 {
1684 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1685 find_interesting_uses_outer (data, def);
1686 }
1687 }
1688
1689 /* Finds uses of the induction variables that are interesting. */
1690
1691 static void
1692 find_interesting_uses (struct ivopts_data *data)
1693 {
1694 basic_block bb;
1695 block_stmt_iterator bsi;
1696 tree phi;
1697 basic_block *body = get_loop_body (data->current_loop);
1698 unsigned i;
1699 struct version_info *info;
1700 edge e;
1701
1702 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fprintf (dump_file, "Uses:\n\n");
1704
1705 for (i = 0; i < data->current_loop->num_nodes; i++)
1706 {
1707 edge_iterator ei;
1708 bb = body[i];
1709
1710 FOR_EACH_EDGE (e, ei, bb->succs)
1711 if (e->dest != EXIT_BLOCK_PTR
1712 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1713 find_interesting_uses_outside (data, e);
1714
1715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1716 find_interesting_uses_stmt (data, phi);
1717 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1718 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1719 }
1720
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1722 {
1723 bitmap_iterator bi;
1724
1725 fprintf (dump_file, "\n");
1726
1727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1728 {
1729 info = ver_info (data, i);
1730 if (info->inv_id)
1731 {
1732 fprintf (dump_file, " ");
1733 print_generic_expr (dump_file, info->name, TDF_SLIM);
1734 fprintf (dump_file, " is invariant (%d)%s\n",
1735 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1736 }
1737 }
1738
1739 fprintf (dump_file, "\n");
1740 }
1741
1742 free (body);
1743 }
1744
1745 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1746 is true, assume we are inside an address. */
1747
1748 static tree
1749 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1750 {
1751 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1752 enum tree_code code;
1753 tree type, orig_type = TREE_TYPE (expr);
1754 unsigned HOST_WIDE_INT off0, off1, st;
1755 tree orig_expr = expr;
1756
1757 STRIP_NOPS (expr);
1758 type = TREE_TYPE (expr);
1759 code = TREE_CODE (expr);
1760 *offset = 0;
1761
1762 switch (code)
1763 {
1764 case INTEGER_CST:
1765 if (!cst_and_fits_in_hwi (expr)
1766 || zero_p (expr))
1767 return orig_expr;
1768
1769 *offset = int_cst_value (expr);
1770 return build_int_cst_type (orig_type, 0);
1771
1772 case PLUS_EXPR:
1773 case MINUS_EXPR:
1774 op0 = TREE_OPERAND (expr, 0);
1775 op1 = TREE_OPERAND (expr, 1);
1776
1777 op0 = strip_offset (op0, false, &off0);
1778 op1 = strip_offset (op1, false, &off1);
1779
1780 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1781 if (op0 == TREE_OPERAND (expr, 0)
1782 && op1 == TREE_OPERAND (expr, 1))
1783 return orig_expr;
1784
1785 if (zero_p (op1))
1786 expr = op0;
1787 else if (zero_p (op0))
1788 {
1789 if (code == PLUS_EXPR)
1790 expr = op1;
1791 else
1792 expr = build1 (NEGATE_EXPR, type, op1);
1793 }
1794 else
1795 expr = build2 (code, type, op0, op1);
1796
1797 return fold_convert (orig_type, expr);
1798
1799 case ARRAY_REF:
1800 if (!inside_addr)
1801 return orig_expr;
1802
1803 step = array_ref_element_size (expr);
1804 if (!cst_and_fits_in_hwi (step))
1805 break;
1806
1807 st = int_cst_value (step);
1808 op1 = TREE_OPERAND (expr, 1);
1809 op1 = strip_offset (op1, false, &off1);
1810 *offset = off1 * st;
1811 break;
1812
1813 case COMPONENT_REF:
1814 if (!inside_addr)
1815 return orig_expr;
1816 break;
1817
1818 case ADDR_EXPR:
1819 inside_addr = true;
1820 break;
1821
1822 default:
1823 return orig_expr;
1824 }
1825
1826 /* Default handling of expressions for that we want to recurse into
1827 the first operand. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset (op0, inside_addr, &off0);
1830 *offset += off0;
1831
1832 if (op0 == TREE_OPERAND (expr, 0)
1833 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1834 return orig_expr;
1835
1836 expr = copy_node (expr);
1837 TREE_OPERAND (expr, 0) = op0;
1838 if (op1)
1839 TREE_OPERAND (expr, 1) = op1;
1840
1841 return fold_convert (orig_type, expr);
1842 }
1843
1844 /* Returns variant of TYPE that can be used as base for different uses.
1845 For integer types, we return unsigned variant of the type, which
1846 avoids problems with overflows. For pointer types, we return void *. */
1847
1848 static tree
1849 generic_type_for (tree type)
1850 {
1851 if (POINTER_TYPE_P (type))
1852 return ptr_type_node;
1853
1854 if (TYPE_UNSIGNED (type))
1855 return type;
1856
1857 return unsigned_type_for (type);
1858 }
1859
1860 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1861 position to POS. If USE is not NULL, the candidate is set as related to
1862 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1863 replacement of the final value of the iv by a direct computation. */
1864
1865 static struct iv_cand *
1866 add_candidate_1 (struct ivopts_data *data,
1867 tree base, tree step, bool important, enum iv_position pos,
1868 struct iv_use *use, tree incremented_at)
1869 {
1870 unsigned i;
1871 struct iv_cand *cand = NULL;
1872 tree type, orig_type;
1873
1874 if (base)
1875 {
1876 orig_type = TREE_TYPE (base);
1877 type = generic_type_for (orig_type);
1878 if (type != orig_type)
1879 {
1880 base = fold_convert (type, base);
1881 if (step)
1882 step = fold_convert (type, step);
1883 }
1884 }
1885
1886 for (i = 0; i < n_iv_cands (data); i++)
1887 {
1888 cand = iv_cand (data, i);
1889
1890 if (cand->pos != pos)
1891 continue;
1892
1893 if (cand->incremented_at != incremented_at)
1894 continue;
1895
1896 if (!cand->iv)
1897 {
1898 if (!base && !step)
1899 break;
1900
1901 continue;
1902 }
1903
1904 if (!base && !step)
1905 continue;
1906
1907 if (!operand_equal_p (base, cand->iv->base, 0))
1908 continue;
1909
1910 if (zero_p (cand->iv->step))
1911 {
1912 if (zero_p (step))
1913 break;
1914 }
1915 else
1916 {
1917 if (step && operand_equal_p (step, cand->iv->step, 0))
1918 break;
1919 }
1920 }
1921
1922 if (i == n_iv_cands (data))
1923 {
1924 cand = xcalloc (1, sizeof (struct iv_cand));
1925 cand->id = i;
1926
1927 if (!base && !step)
1928 cand->iv = NULL;
1929 else
1930 cand->iv = alloc_iv (base, step);
1931
1932 cand->pos = pos;
1933 if (pos != IP_ORIGINAL && cand->iv)
1934 {
1935 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1936 cand->var_after = cand->var_before;
1937 }
1938 cand->important = important;
1939 cand->incremented_at = incremented_at;
1940 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1941
1942 if (dump_file && (dump_flags & TDF_DETAILS))
1943 dump_cand (dump_file, cand);
1944 }
1945
1946 if (important && !cand->important)
1947 {
1948 cand->important = true;
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1950 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1951 }
1952
1953 if (use)
1954 {
1955 bitmap_set_bit (use->related_cands, i);
1956 if (dump_file && (dump_flags & TDF_DETAILS))
1957 fprintf (dump_file, "Candidate %d is related to use %d\n",
1958 cand->id, use->id);
1959 }
1960
1961 return cand;
1962 }
1963
1964 /* Returns true if incrementing the induction variable at the end of the LOOP
1965 is allowed.
1966
1967 The purpose is to avoid splitting latch edge with a biv increment, thus
1968 creating a jump, possibly confusing other optimization passes and leaving
1969 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1970 is not available (so we do not have a better alternative), or if the latch
1971 edge is already nonempty. */
1972
1973 static bool
1974 allow_ip_end_pos_p (struct loop *loop)
1975 {
1976 if (!ip_normal_pos (loop))
1977 return true;
1978
1979 if (!empty_block_p (ip_end_pos (loop)))
1980 return true;
1981
1982 return false;
1983 }
1984
1985 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1986 position to POS. If USE is not NULL, the candidate is set as related to
1987 it. The candidate computation is scheduled on all available positions. */
1988
1989 static void
1990 add_candidate (struct ivopts_data *data,
1991 tree base, tree step, bool important, struct iv_use *use)
1992 {
1993 if (ip_normal_pos (data->current_loop))
1994 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1995 if (ip_end_pos (data->current_loop)
1996 && allow_ip_end_pos_p (data->current_loop))
1997 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1998 }
1999
2000 /* Add a standard "0 + 1 * iteration" iv candidate for a
2001 type with SIZE bits. */
2002
2003 static void
2004 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2005 unsigned int size)
2006 {
2007 tree type = lang_hooks.types.type_for_size (size, true);
2008 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2009 true, NULL);
2010 }
2011
2012 /* Adds standard iv candidates. */
2013
2014 static void
2015 add_standard_iv_candidates (struct ivopts_data *data)
2016 {
2017 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2018
2019 /* The same for a double-integer type if it is still fast enough. */
2020 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2021 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2022 }
2023
2024
2025 /* Adds candidates bases on the old induction variable IV. */
2026
2027 static void
2028 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2029 {
2030 tree phi, def;
2031 struct iv_cand *cand;
2032
2033 add_candidate (data, iv->base, iv->step, true, NULL);
2034
2035 /* The same, but with initial value zero. */
2036 add_candidate (data,
2037 build_int_cst (TREE_TYPE (iv->base), 0),
2038 iv->step, true, NULL);
2039
2040 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2041 if (TREE_CODE (phi) == PHI_NODE)
2042 {
2043 /* Additionally record the possibility of leaving the original iv
2044 untouched. */
2045 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2046 cand = add_candidate_1 (data,
2047 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2048 SSA_NAME_DEF_STMT (def));
2049 cand->var_before = iv->ssa_name;
2050 cand->var_after = def;
2051 }
2052 }
2053
2054 /* Adds candidates based on the old induction variables. */
2055
2056 static void
2057 add_old_ivs_candidates (struct ivopts_data *data)
2058 {
2059 unsigned i;
2060 struct iv *iv;
2061 bitmap_iterator bi;
2062
2063 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2064 {
2065 iv = ver_info (data, i)->iv;
2066 if (iv && iv->biv_p && !zero_p (iv->step))
2067 add_old_iv_candidates (data, iv);
2068 }
2069 }
2070
2071 /* Adds candidates based on the value of the induction variable IV and USE. */
2072
2073 static void
2074 add_iv_value_candidates (struct ivopts_data *data,
2075 struct iv *iv, struct iv_use *use)
2076 {
2077 add_candidate (data, iv->base, iv->step, false, use);
2078
2079 /* The same, but with initial value zero. */
2080 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2081 iv->step, false, use);
2082 }
2083
2084 /* Adds candidates based on the address IV and USE. */
2085
2086 static void
2087 add_address_candidates (struct ivopts_data *data,
2088 struct iv *iv, struct iv_use *use)
2089 {
2090 tree base, abase;
2091 unsigned HOST_WIDE_INT offset;
2092
2093 /* First, the trivial choices. */
2094 add_iv_value_candidates (data, iv, use);
2095
2096 /* Second, try removing the COMPONENT_REFs. */
2097 if (TREE_CODE (iv->base) == ADDR_EXPR)
2098 {
2099 base = TREE_OPERAND (iv->base, 0);
2100 while (TREE_CODE (base) == COMPONENT_REF
2101 || (TREE_CODE (base) == ARRAY_REF
2102 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2103 base = TREE_OPERAND (base, 0);
2104
2105 if (base != TREE_OPERAND (iv->base, 0))
2106 {
2107 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2108 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2109
2110 if (TREE_CODE (base) == INDIRECT_REF)
2111 base = TREE_OPERAND (base, 0);
2112 else
2113 base = build_addr (base);
2114 add_candidate (data, base, iv->step, false, use);
2115 }
2116 }
2117
2118 /* Third, try removing the constant offset. */
2119 abase = iv->base;
2120 base = strip_offset (abase, false, &offset);
2121 if (offset)
2122 add_candidate (data, base, iv->step, false, use);
2123 }
2124
2125 /* Possibly adds pseudocandidate for replacing the final value of USE by
2126 a direct computation. */
2127
2128 static void
2129 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2130 {
2131 struct tree_niter_desc *niter;
2132
2133 /* We must know where we exit the loop and how many times does it roll. */
2134 niter = niter_for_single_dom_exit (data);
2135 if (!niter
2136 || !zero_p (niter->may_be_zero))
2137 return;
2138
2139 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2140 }
2141
2142 /* Adds candidates based on the uses. */
2143
2144 static void
2145 add_derived_ivs_candidates (struct ivopts_data *data)
2146 {
2147 unsigned i;
2148
2149 for (i = 0; i < n_iv_uses (data); i++)
2150 {
2151 struct iv_use *use = iv_use (data, i);
2152
2153 if (!use)
2154 continue;
2155
2156 switch (use->type)
2157 {
2158 case USE_NONLINEAR_EXPR:
2159 case USE_COMPARE:
2160 /* Just add the ivs based on the value of the iv used here. */
2161 add_iv_value_candidates (data, use->iv, use);
2162 break;
2163
2164 case USE_OUTER:
2165 add_iv_value_candidates (data, use->iv, use);
2166
2167 /* Additionally, add the pseudocandidate for the possibility to
2168 replace the final value by a direct computation. */
2169 add_iv_outer_candidates (data, use);
2170 break;
2171
2172 case USE_ADDRESS:
2173 add_address_candidates (data, use->iv, use);
2174 break;
2175
2176 default:
2177 gcc_unreachable ();
2178 }
2179 }
2180 }
2181
2182 /* Record important candidates and add them to related_cands bitmaps
2183 if needed. */
2184
2185 static void
2186 record_important_candidates (struct ivopts_data *data)
2187 {
2188 unsigned i;
2189 struct iv_use *use;
2190
2191 for (i = 0; i < n_iv_cands (data); i++)
2192 {
2193 struct iv_cand *cand = iv_cand (data, i);
2194
2195 if (cand->important)
2196 bitmap_set_bit (data->important_candidates, i);
2197 }
2198
2199 data->consider_all_candidates = (n_iv_cands (data)
2200 <= CONSIDER_ALL_CANDIDATES_BOUND);
2201
2202 if (data->consider_all_candidates)
2203 {
2204 /* We will not need "related_cands" bitmaps in this case,
2205 so release them to decrease peak memory consumption. */
2206 for (i = 0; i < n_iv_uses (data); i++)
2207 {
2208 use = iv_use (data, i);
2209 BITMAP_FREE (use->related_cands);
2210 }
2211 }
2212 else
2213 {
2214 /* Add important candidates to the related_cands bitmaps. */
2215 for (i = 0; i < n_iv_uses (data); i++)
2216 bitmap_ior_into (iv_use (data, i)->related_cands,
2217 data->important_candidates);
2218 }
2219 }
2220
2221 /* Finds the candidates for the induction variables. */
2222
2223 static void
2224 find_iv_candidates (struct ivopts_data *data)
2225 {
2226 /* Add commonly used ivs. */
2227 add_standard_iv_candidates (data);
2228
2229 /* Add old induction variables. */
2230 add_old_ivs_candidates (data);
2231
2232 /* Add induction variables derived from uses. */
2233 add_derived_ivs_candidates (data);
2234
2235 /* Record the important candidates. */
2236 record_important_candidates (data);
2237 }
2238
2239 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2240 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2241 we allocate a simple list to every use. */
2242
2243 static void
2244 alloc_use_cost_map (struct ivopts_data *data)
2245 {
2246 unsigned i, size, s, j;
2247
2248 for (i = 0; i < n_iv_uses (data); i++)
2249 {
2250 struct iv_use *use = iv_use (data, i);
2251 bitmap_iterator bi;
2252
2253 if (data->consider_all_candidates)
2254 size = n_iv_cands (data);
2255 else
2256 {
2257 s = 0;
2258 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2259 {
2260 s++;
2261 }
2262
2263 /* Round up to the power of two, so that moduling by it is fast. */
2264 for (size = 1; size < s; size <<= 1)
2265 continue;
2266 }
2267
2268 use->n_map_members = size;
2269 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2270 }
2271 }
2272
2273 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2274 on invariants DEPENDS_ON. */
2275
2276 static void
2277 set_use_iv_cost (struct ivopts_data *data,
2278 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2279 bitmap depends_on)
2280 {
2281 unsigned i, s;
2282
2283 if (cost == INFTY)
2284 {
2285 BITMAP_FREE (depends_on);
2286 return;
2287 }
2288
2289 if (data->consider_all_candidates)
2290 {
2291 use->cost_map[cand->id].cand = cand;
2292 use->cost_map[cand->id].cost = cost;
2293 use->cost_map[cand->id].depends_on = depends_on;
2294 return;
2295 }
2296
2297 /* n_map_members is a power of two, so this computes modulo. */
2298 s = cand->id & (use->n_map_members - 1);
2299 for (i = s; i < use->n_map_members; i++)
2300 if (!use->cost_map[i].cand)
2301 goto found;
2302 for (i = 0; i < s; i++)
2303 if (!use->cost_map[i].cand)
2304 goto found;
2305
2306 gcc_unreachable ();
2307
2308 found:
2309 use->cost_map[i].cand = cand;
2310 use->cost_map[i].cost = cost;
2311 use->cost_map[i].depends_on = depends_on;
2312 }
2313
2314 /* Gets cost of (USE, CANDIDATE) pair. */
2315
2316 static struct cost_pair *
2317 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2318 struct iv_cand *cand)
2319 {
2320 unsigned i, s;
2321 struct cost_pair *ret;
2322
2323 if (!cand)
2324 return NULL;
2325
2326 if (data->consider_all_candidates)
2327 {
2328 ret = use->cost_map + cand->id;
2329 if (!ret->cand)
2330 return NULL;
2331
2332 return ret;
2333 }
2334
2335 /* n_map_members is a power of two, so this computes modulo. */
2336 s = cand->id & (use->n_map_members - 1);
2337 for (i = s; i < use->n_map_members; i++)
2338 if (use->cost_map[i].cand == cand)
2339 return use->cost_map + i;
2340
2341 for (i = 0; i < s; i++)
2342 if (use->cost_map[i].cand == cand)
2343 return use->cost_map + i;
2344
2345 return NULL;
2346 }
2347
2348 /* Returns estimate on cost of computing SEQ. */
2349
2350 static unsigned
2351 seq_cost (rtx seq)
2352 {
2353 unsigned cost = 0;
2354 rtx set;
2355
2356 for (; seq; seq = NEXT_INSN (seq))
2357 {
2358 set = single_set (seq);
2359 if (set)
2360 cost += rtx_cost (set, SET);
2361 else
2362 cost++;
2363 }
2364
2365 return cost;
2366 }
2367
2368 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2369 static rtx
2370 produce_memory_decl_rtl (tree obj, int *regno)
2371 {
2372 rtx x;
2373
2374 gcc_assert (obj);
2375 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2376 {
2377 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2378 x = gen_rtx_SYMBOL_REF (Pmode, name);
2379 }
2380 else
2381 x = gen_raw_REG (Pmode, (*regno)++);
2382
2383 return gen_rtx_MEM (DECL_MODE (obj), x);
2384 }
2385
2386 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2387 walk_tree. DATA contains the actual fake register number. */
2388
2389 static tree
2390 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2391 {
2392 tree obj = NULL_TREE;
2393 rtx x = NULL_RTX;
2394 int *regno = data;
2395
2396 switch (TREE_CODE (*expr_p))
2397 {
2398 case ADDR_EXPR:
2399 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2400 handled_component_p (*expr_p);
2401 expr_p = &TREE_OPERAND (*expr_p, 0))
2402 continue;
2403 obj = *expr_p;
2404 if (DECL_P (obj))
2405 x = produce_memory_decl_rtl (obj, regno);
2406 break;
2407
2408 case SSA_NAME:
2409 *ws = 0;
2410 obj = SSA_NAME_VAR (*expr_p);
2411 if (!DECL_RTL_SET_P (obj))
2412 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2413 break;
2414
2415 case VAR_DECL:
2416 case PARM_DECL:
2417 case RESULT_DECL:
2418 *ws = 0;
2419 obj = *expr_p;
2420
2421 if (DECL_RTL_SET_P (obj))
2422 break;
2423
2424 if (DECL_MODE (obj) == BLKmode)
2425 x = produce_memory_decl_rtl (obj, regno);
2426 else
2427 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2428
2429 break;
2430
2431 default:
2432 break;
2433 }
2434
2435 if (x)
2436 {
2437 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2438 SET_DECL_RTL (obj, x);
2439 }
2440
2441 return NULL_TREE;
2442 }
2443
2444 /* Determines cost of the computation of EXPR. */
2445
2446 static unsigned
2447 computation_cost (tree expr)
2448 {
2449 rtx seq, rslt;
2450 tree type = TREE_TYPE (expr);
2451 unsigned cost;
2452 /* Avoid using hard regs in ways which may be unsupported. */
2453 int regno = LAST_VIRTUAL_REGISTER + 1;
2454
2455 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2456 start_sequence ();
2457 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2458 seq = get_insns ();
2459 end_sequence ();
2460
2461 cost = seq_cost (seq);
2462 if (GET_CODE (rslt) == MEM)
2463 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2464
2465 return cost;
2466 }
2467
2468 /* Returns variable containing the value of candidate CAND at statement AT. */
2469
2470 static tree
2471 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2472 {
2473 if (stmt_after_increment (loop, cand, stmt))
2474 return cand->var_after;
2475 else
2476 return cand->var_before;
2477 }
2478
2479 /* Determines the expression by that USE is expressed from induction variable
2480 CAND at statement AT in LOOP. */
2481
2482 static tree
2483 get_computation_at (struct loop *loop,
2484 struct iv_use *use, struct iv_cand *cand, tree at)
2485 {
2486 tree ubase = use->iv->base;
2487 tree ustep = use->iv->step;
2488 tree cbase = cand->iv->base;
2489 tree cstep = cand->iv->step;
2490 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2491 tree uutype;
2492 tree expr, delta;
2493 tree ratio;
2494 unsigned HOST_WIDE_INT ustepi, cstepi;
2495 HOST_WIDE_INT ratioi;
2496
2497 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2498 {
2499 /* We do not have a precision to express the values of use. */
2500 return NULL_TREE;
2501 }
2502
2503 expr = var_at_stmt (loop, cand, at);
2504
2505 if (TREE_TYPE (expr) != ctype)
2506 {
2507 /* This may happen with the original ivs. */
2508 expr = fold_convert (ctype, expr);
2509 }
2510
2511 if (TYPE_UNSIGNED (utype))
2512 uutype = utype;
2513 else
2514 {
2515 uutype = unsigned_type_for (utype);
2516 ubase = fold_convert (uutype, ubase);
2517 ustep = fold_convert (uutype, ustep);
2518 }
2519
2520 if (uutype != ctype)
2521 {
2522 expr = fold_convert (uutype, expr);
2523 cbase = fold_convert (uutype, cbase);
2524 cstep = fold_convert (uutype, cstep);
2525 }
2526
2527 if (!cst_and_fits_in_hwi (cstep)
2528 || !cst_and_fits_in_hwi (ustep))
2529 return NULL_TREE;
2530
2531 ustepi = int_cst_value (ustep);
2532 cstepi = int_cst_value (cstep);
2533
2534 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2535 {
2536 /* TODO maybe consider case when ustep divides cstep and the ratio is
2537 a power of 2 (so that the division is fast to execute)? We would
2538 need to be much more careful with overflows etc. then. */
2539 return NULL_TREE;
2540 }
2541
2542 /* We may need to shift the value if we are after the increment. */
2543 if (stmt_after_increment (loop, cand, at))
2544 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2545
2546 /* use = ubase - ratio * cbase + ratio * var.
2547
2548 In general case ubase + ratio * (var - cbase) could be better (one less
2549 multiplication), but often it is possible to eliminate redundant parts
2550 of computations from (ubase - ratio * cbase) term, and if it does not
2551 happen, fold is able to apply the distributive law to obtain this form
2552 anyway. */
2553
2554 if (ratioi == 1)
2555 {
2556 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2557 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2558 }
2559 else if (ratioi == -1)
2560 {
2561 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2562 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2563 }
2564 else
2565 {
2566 ratio = build_int_cst_type (uutype, ratioi);
2567 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2568 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2569 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2570 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2571 }
2572
2573 return fold_convert (utype, expr);
2574 }
2575
2576 /* Determines the expression by that USE is expressed from induction variable
2577 CAND in LOOP. */
2578
2579 static tree
2580 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2581 {
2582 return get_computation_at (loop, use, cand, use->stmt);
2583 }
2584
2585 /* Returns cost of addition in MODE. */
2586
2587 static unsigned
2588 add_cost (enum machine_mode mode)
2589 {
2590 static unsigned costs[NUM_MACHINE_MODES];
2591 rtx seq;
2592 unsigned cost;
2593
2594 if (costs[mode])
2595 return costs[mode];
2596
2597 start_sequence ();
2598 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2599 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2600 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2601 NULL_RTX);
2602 seq = get_insns ();
2603 end_sequence ();
2604
2605 cost = seq_cost (seq);
2606 if (!cost)
2607 cost = 1;
2608
2609 costs[mode] = cost;
2610
2611 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, "Addition in %s costs %d\n",
2613 GET_MODE_NAME (mode), cost);
2614 return cost;
2615 }
2616
2617 /* Entry in a hashtable of already known costs for multiplication. */
2618 struct mbc_entry
2619 {
2620 HOST_WIDE_INT cst; /* The constant to multiply by. */
2621 enum machine_mode mode; /* In mode. */
2622 unsigned cost; /* The cost. */
2623 };
2624
2625 /* Counts hash value for the ENTRY. */
2626
2627 static hashval_t
2628 mbc_entry_hash (const void *entry)
2629 {
2630 const struct mbc_entry *e = entry;
2631
2632 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2633 }
2634
2635 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2636
2637 static int
2638 mbc_entry_eq (const void *entry1, const void *entry2)
2639 {
2640 const struct mbc_entry *e1 = entry1;
2641 const struct mbc_entry *e2 = entry2;
2642
2643 return (e1->mode == e2->mode
2644 && e1->cst == e2->cst);
2645 }
2646
2647 /* Returns cost of multiplication by constant CST in MODE. */
2648
2649 static unsigned
2650 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2651 {
2652 static htab_t costs;
2653 struct mbc_entry **cached, act;
2654 rtx seq;
2655 unsigned cost;
2656
2657 if (!costs)
2658 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2659
2660 act.mode = mode;
2661 act.cst = cst;
2662 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2663 if (*cached)
2664 return (*cached)->cost;
2665
2666 *cached = xmalloc (sizeof (struct mbc_entry));
2667 (*cached)->mode = mode;
2668 (*cached)->cst = cst;
2669
2670 start_sequence ();
2671 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2672 NULL_RTX, 0);
2673 seq = get_insns ();
2674 end_sequence ();
2675
2676 cost = seq_cost (seq);
2677
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2680 (int) cst, GET_MODE_NAME (mode), cost);
2681
2682 (*cached)->cost = cost;
2683
2684 return cost;
2685 }
2686
2687 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2688 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2689 variable is omitted. The created memory accesses MODE.
2690
2691 TODO -- there must be some better way. This all is quite crude. */
2692
2693 static unsigned
2694 get_address_cost (bool symbol_present, bool var_present,
2695 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2696 {
2697 #define MAX_RATIO 128
2698 static sbitmap valid_mult;
2699 static HOST_WIDE_INT rat, off;
2700 static HOST_WIDE_INT min_offset, max_offset;
2701 static unsigned costs[2][2][2][2];
2702 unsigned cost, acost;
2703 rtx seq, addr, base;
2704 bool offset_p, ratio_p;
2705 rtx reg1;
2706 HOST_WIDE_INT s_offset;
2707 unsigned HOST_WIDE_INT mask;
2708 unsigned bits;
2709
2710 if (!valid_mult)
2711 {
2712 HOST_WIDE_INT i;
2713
2714 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2715
2716 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2717 for (i = 1; i <= 1 << 20; i <<= 1)
2718 {
2719 XEXP (addr, 1) = GEN_INT (i);
2720 if (!memory_address_p (Pmode, addr))
2721 break;
2722 }
2723 max_offset = i >> 1;
2724 off = max_offset;
2725
2726 for (i = 1; i <= 1 << 20; i <<= 1)
2727 {
2728 XEXP (addr, 1) = GEN_INT (-i);
2729 if (!memory_address_p (Pmode, addr))
2730 break;
2731 }
2732 min_offset = -(i >> 1);
2733
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 {
2736 fprintf (dump_file, "get_address_cost:\n");
2737 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2738 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2739 }
2740
2741 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2742 sbitmap_zero (valid_mult);
2743 rat = 1;
2744 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2745 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2746 {
2747 XEXP (addr, 1) = GEN_INT (i);
2748 if (memory_address_p (Pmode, addr))
2749 {
2750 SET_BIT (valid_mult, i + MAX_RATIO);
2751 rat = i;
2752 }
2753 }
2754
2755 if (dump_file && (dump_flags & TDF_DETAILS))
2756 {
2757 fprintf (dump_file, " allowed multipliers:");
2758 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2759 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2760 fprintf (dump_file, " %d", (int) i);
2761 fprintf (dump_file, "\n");
2762 fprintf (dump_file, "\n");
2763 }
2764 }
2765
2766 bits = GET_MODE_BITSIZE (Pmode);
2767 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2768 offset &= mask;
2769 if ((offset >> (bits - 1) & 1))
2770 offset |= ~mask;
2771 s_offset = offset;
2772
2773 cost = 0;
2774 offset_p = (s_offset != 0
2775 && min_offset <= s_offset && s_offset <= max_offset);
2776 ratio_p = (ratio != 1
2777 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2778 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2779
2780 if (ratio != 1 && !ratio_p)
2781 cost += multiply_by_cost (ratio, Pmode);
2782
2783 if (s_offset && !offset_p && !symbol_present)
2784 {
2785 cost += add_cost (Pmode);
2786 var_present = true;
2787 }
2788
2789 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2790 if (!acost)
2791 {
2792 acost = 0;
2793
2794 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2795 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2796 if (ratio_p)
2797 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2798
2799 if (var_present)
2800 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2801
2802 if (symbol_present)
2803 {
2804 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2805 if (offset_p)
2806 base = gen_rtx_fmt_e (CONST, Pmode,
2807 gen_rtx_fmt_ee (PLUS, Pmode,
2808 base,
2809 GEN_INT (off)));
2810 }
2811 else if (offset_p)
2812 base = GEN_INT (off);
2813 else
2814 base = NULL_RTX;
2815
2816 if (base)
2817 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2818
2819 start_sequence ();
2820 addr = memory_address (Pmode, addr);
2821 seq = get_insns ();
2822 end_sequence ();
2823
2824 acost = seq_cost (seq);
2825 acost += address_cost (addr, Pmode);
2826
2827 if (!acost)
2828 acost = 1;
2829 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2830 }
2831
2832 return cost + acost;
2833 }
2834
2835 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2836 the bitmap to that we should store it. */
2837
2838 static struct ivopts_data *fd_ivopts_data;
2839 static tree
2840 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2841 {
2842 bitmap *depends_on = data;
2843 struct version_info *info;
2844
2845 if (TREE_CODE (*expr_p) != SSA_NAME)
2846 return NULL_TREE;
2847 info = name_info (fd_ivopts_data, *expr_p);
2848
2849 if (!info->inv_id || info->has_nonlin_use)
2850 return NULL_TREE;
2851
2852 if (!*depends_on)
2853 *depends_on = BITMAP_ALLOC (NULL);
2854 bitmap_set_bit (*depends_on, info->inv_id);
2855
2856 return NULL_TREE;
2857 }
2858
2859 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2860 invariants the computation depends on. */
2861
2862 static unsigned
2863 force_var_cost (struct ivopts_data *data,
2864 tree expr, bitmap *depends_on)
2865 {
2866 static bool costs_initialized = false;
2867 static unsigned integer_cost;
2868 static unsigned symbol_cost;
2869 static unsigned address_cost;
2870 tree op0, op1;
2871 unsigned cost0, cost1, cost;
2872 enum machine_mode mode;
2873
2874 if (!costs_initialized)
2875 {
2876 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2877 rtx x = gen_rtx_MEM (DECL_MODE (var),
2878 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2879 tree addr;
2880 tree type = build_pointer_type (integer_type_node);
2881
2882 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2883 2000));
2884
2885 SET_DECL_RTL (var, x);
2886 TREE_STATIC (var) = 1;
2887 addr = build1 (ADDR_EXPR, type, var);
2888 symbol_cost = computation_cost (addr) + 1;
2889
2890 address_cost
2891 = computation_cost (build2 (PLUS_EXPR, type,
2892 addr,
2893 build_int_cst_type (type, 2000))) + 1;
2894 if (dump_file && (dump_flags & TDF_DETAILS))
2895 {
2896 fprintf (dump_file, "force_var_cost:\n");
2897 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2898 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2899 fprintf (dump_file, " address %d\n", (int) address_cost);
2900 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2901 fprintf (dump_file, "\n");
2902 }
2903
2904 costs_initialized = true;
2905 }
2906
2907 STRIP_NOPS (expr);
2908
2909 if (depends_on)
2910 {
2911 fd_ivopts_data = data;
2912 walk_tree (&expr, find_depends, depends_on, NULL);
2913 }
2914
2915 if (SSA_VAR_P (expr))
2916 return 0;
2917
2918 if (TREE_INVARIANT (expr))
2919 {
2920 if (TREE_CODE (expr) == INTEGER_CST)
2921 return integer_cost;
2922
2923 if (TREE_CODE (expr) == ADDR_EXPR)
2924 {
2925 tree obj = TREE_OPERAND (expr, 0);
2926
2927 if (TREE_CODE (obj) == VAR_DECL
2928 || TREE_CODE (obj) == PARM_DECL
2929 || TREE_CODE (obj) == RESULT_DECL)
2930 return symbol_cost;
2931 }
2932
2933 return address_cost;
2934 }
2935
2936 switch (TREE_CODE (expr))
2937 {
2938 case PLUS_EXPR:
2939 case MINUS_EXPR:
2940 case MULT_EXPR:
2941 op0 = TREE_OPERAND (expr, 0);
2942 op1 = TREE_OPERAND (expr, 1);
2943 STRIP_NOPS (op0);
2944 STRIP_NOPS (op1);
2945
2946 if (is_gimple_val (op0))
2947 cost0 = 0;
2948 else
2949 cost0 = force_var_cost (data, op0, NULL);
2950
2951 if (is_gimple_val (op1))
2952 cost1 = 0;
2953 else
2954 cost1 = force_var_cost (data, op1, NULL);
2955
2956 break;
2957
2958 default:
2959 /* Just an arbitrary value, FIXME. */
2960 return target_spill_cost;
2961 }
2962
2963 mode = TYPE_MODE (TREE_TYPE (expr));
2964 switch (TREE_CODE (expr))
2965 {
2966 case PLUS_EXPR:
2967 case MINUS_EXPR:
2968 cost = add_cost (mode);
2969 break;
2970
2971 case MULT_EXPR:
2972 if (cst_and_fits_in_hwi (op0))
2973 cost = multiply_by_cost (int_cst_value (op0), mode);
2974 else if (cst_and_fits_in_hwi (op1))
2975 cost = multiply_by_cost (int_cst_value (op1), mode);
2976 else
2977 return target_spill_cost;
2978 break;
2979
2980 default:
2981 gcc_unreachable ();
2982 }
2983
2984 cost += cost0;
2985 cost += cost1;
2986
2987 /* Bound the cost by target_spill_cost. The parts of complicated
2988 computations often are either loop invariant or at least can
2989 be shared between several iv uses, so letting this grow without
2990 limits would not give reasonable results. */
2991 return cost < target_spill_cost ? cost : target_spill_cost;
2992 }
2993
2994 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2995 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2996 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2997 invariants the computation depends on. */
2998
2999 static unsigned
3000 split_address_cost (struct ivopts_data *data,
3001 tree addr, bool *symbol_present, bool *var_present,
3002 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3003 {
3004 tree core;
3005 HOST_WIDE_INT bitsize;
3006 HOST_WIDE_INT bitpos;
3007 tree toffset;
3008 enum machine_mode mode;
3009 int unsignedp, volatilep;
3010
3011 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3012 &unsignedp, &volatilep, false);
3013
3014 if (toffset != 0
3015 || bitpos % BITS_PER_UNIT != 0
3016 || TREE_CODE (core) != VAR_DECL)
3017 {
3018 *symbol_present = false;
3019 *var_present = true;
3020 fd_ivopts_data = data;
3021 walk_tree (&addr, find_depends, depends_on, NULL);
3022 return target_spill_cost;
3023 }
3024
3025 *offset += bitpos / BITS_PER_UNIT;
3026 if (TREE_STATIC (core)
3027 || DECL_EXTERNAL (core))
3028 {
3029 *symbol_present = true;
3030 *var_present = false;
3031 return 0;
3032 }
3033
3034 *symbol_present = false;
3035 *var_present = true;
3036 return 0;
3037 }
3038
3039 /* Estimates cost of expressing difference of addresses E1 - E2 as
3040 var + symbol + offset. The value of offset is added to OFFSET,
3041 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3042 part is missing. DEPENDS_ON is a set of the invariants the computation
3043 depends on. */
3044
3045 static unsigned
3046 ptr_difference_cost (struct ivopts_data *data,
3047 tree e1, tree e2, bool *symbol_present, bool *var_present,
3048 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3049 {
3050 HOST_WIDE_INT diff = 0;
3051 unsigned cost;
3052
3053 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3054
3055 if (ptr_difference_const (e1, e2, &diff))
3056 {
3057 *offset += diff;
3058 *symbol_present = false;
3059 *var_present = false;
3060 return 0;
3061 }
3062
3063 if (e2 == integer_zero_node)
3064 return split_address_cost (data, TREE_OPERAND (e1, 0),
3065 symbol_present, var_present, offset, depends_on);
3066
3067 *symbol_present = false;
3068 *var_present = true;
3069
3070 cost = force_var_cost (data, e1, depends_on);
3071 cost += force_var_cost (data, e2, depends_on);
3072 cost += add_cost (Pmode);
3073
3074 return cost;
3075 }
3076
3077 /* Estimates cost of expressing difference E1 - E2 as
3078 var + symbol + offset. The value of offset is added to OFFSET,
3079 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3080 part is missing. DEPENDS_ON is a set of the invariants the computation
3081 depends on. */
3082
3083 static unsigned
3084 difference_cost (struct ivopts_data *data,
3085 tree e1, tree e2, bool *symbol_present, bool *var_present,
3086 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3087 {
3088 unsigned cost;
3089 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3090 unsigned HOST_WIDE_INT off1, off2;
3091
3092 e1 = strip_offset (e1, false, &off1);
3093 e2 = strip_offset (e2, false, &off2);
3094 *offset += off1 - off2;
3095
3096 STRIP_NOPS (e1);
3097 STRIP_NOPS (e2);
3098
3099 if (TREE_CODE (e1) == ADDR_EXPR)
3100 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3101 depends_on);
3102 *symbol_present = false;
3103
3104 if (operand_equal_p (e1, e2, 0))
3105 {
3106 *var_present = false;
3107 return 0;
3108 }
3109 *var_present = true;
3110 if (zero_p (e2))
3111 return force_var_cost (data, e1, depends_on);
3112
3113 if (zero_p (e1))
3114 {
3115 cost = force_var_cost (data, e2, depends_on);
3116 cost += multiply_by_cost (-1, mode);
3117
3118 return cost;
3119 }
3120
3121 cost = force_var_cost (data, e1, depends_on);
3122 cost += force_var_cost (data, e2, depends_on);
3123 cost += add_cost (mode);
3124
3125 return cost;
3126 }
3127
3128 /* Determines the cost of the computation by that USE is expressed
3129 from induction variable CAND. If ADDRESS_P is true, we just need
3130 to create an address from it, otherwise we want to get it into
3131 register. A set of invariants we depend on is stored in
3132 DEPENDS_ON. AT is the statement at that the value is computed. */
3133
3134 static unsigned
3135 get_computation_cost_at (struct ivopts_data *data,
3136 struct iv_use *use, struct iv_cand *cand,
3137 bool address_p, bitmap *depends_on, tree at)
3138 {
3139 tree ubase = use->iv->base, ustep = use->iv->step;
3140 tree cbase, cstep;
3141 tree utype = TREE_TYPE (ubase), ctype;
3142 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3143 HOST_WIDE_INT ratio, aratio;
3144 bool var_present, symbol_present;
3145 unsigned cost = 0, n_sums;
3146
3147 *depends_on = NULL;
3148
3149 /* Only consider real candidates. */
3150 if (!cand->iv)
3151 return INFTY;
3152
3153 cbase = cand->iv->base;
3154 cstep = cand->iv->step;
3155 ctype = TREE_TYPE (cbase);
3156
3157 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3158 {
3159 /* We do not have a precision to express the values of use. */
3160 return INFTY;
3161 }
3162
3163 if (address_p)
3164 {
3165 /* Do not try to express address of an object with computation based
3166 on address of a different object. This may cause problems in rtl
3167 level alias analysis (that does not expect this to be happening,
3168 as this is illegal in C), and would be unlikely to be useful
3169 anyway. */
3170 if (use->iv->base_object
3171 && cand->iv->base_object
3172 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3173 return INFTY;
3174 }
3175
3176 if (!cst_and_fits_in_hwi (ustep)
3177 || !cst_and_fits_in_hwi (cstep))
3178 return INFTY;
3179
3180 if (TREE_CODE (ubase) == INTEGER_CST
3181 && !cst_and_fits_in_hwi (ubase))
3182 goto fallback;
3183
3184 if (TREE_CODE (cbase) == INTEGER_CST
3185 && !cst_and_fits_in_hwi (cbase))
3186 goto fallback;
3187
3188 ustepi = int_cst_value (ustep);
3189 cstepi = int_cst_value (cstep);
3190
3191 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3192 {
3193 /* TODO -- add direct handling of this case. */
3194 goto fallback;
3195 }
3196
3197 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3198 return INFTY;
3199
3200 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3201 or ratio == 1, it is better to handle this like
3202
3203 ubase - ratio * cbase + ratio * var
3204
3205 (also holds in the case ratio == -1, TODO. */
3206
3207 if (TREE_CODE (cbase) == INTEGER_CST)
3208 {
3209 offset = - ratio * int_cst_value (cbase);
3210 cost += difference_cost (data,
3211 ubase, integer_zero_node,
3212 &symbol_present, &var_present, &offset,
3213 depends_on);
3214 }
3215 else if (ratio == 1)
3216 {
3217 cost += difference_cost (data,
3218 ubase, cbase,
3219 &symbol_present, &var_present, &offset,
3220 depends_on);
3221 }
3222 else
3223 {
3224 cost += force_var_cost (data, cbase, depends_on);
3225 cost += add_cost (TYPE_MODE (ctype));
3226 cost += difference_cost (data,
3227 ubase, integer_zero_node,
3228 &symbol_present, &var_present, &offset,
3229 depends_on);
3230 }
3231
3232 /* If we are after the increment, the value of the candidate is higher by
3233 one iteration. */
3234 if (stmt_after_increment (data->current_loop, cand, at))
3235 offset -= ratio * cstepi;
3236
3237 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3238 (symbol/var/const parts may be omitted). If we are looking for an address,
3239 find the cost of addressing this. */
3240 if (address_p)
3241 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3242
3243 /* Otherwise estimate the costs for computing the expression. */
3244 aratio = ratio > 0 ? ratio : -ratio;
3245 if (!symbol_present && !var_present && !offset)
3246 {
3247 if (ratio != 1)
3248 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3249
3250 return cost;
3251 }
3252
3253 if (aratio != 1)
3254 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3255
3256 n_sums = 1;
3257 if (var_present
3258 /* Symbol + offset should be compile-time computable. */
3259 && (symbol_present || offset))
3260 n_sums++;
3261
3262 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3263
3264 fallback:
3265 {
3266 /* Just get the expression, expand it and measure the cost. */
3267 tree comp = get_computation_at (data->current_loop, use, cand, at);
3268
3269 if (!comp)
3270 return INFTY;
3271
3272 if (address_p)
3273 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3274
3275 return computation_cost (comp);
3276 }
3277 }
3278
3279 /* Determines the cost of the computation by that USE is expressed
3280 from induction variable CAND. If ADDRESS_P is true, we just need
3281 to create an address from it, otherwise we want to get it into
3282 register. A set of invariants we depend on is stored in
3283 DEPENDS_ON. */
3284
3285 static unsigned
3286 get_computation_cost (struct ivopts_data *data,
3287 struct iv_use *use, struct iv_cand *cand,
3288 bool address_p, bitmap *depends_on)
3289 {
3290 return get_computation_cost_at (data,
3291 use, cand, address_p, depends_on, use->stmt);
3292 }
3293
3294 /* Determines cost of basing replacement of USE on CAND in a generic
3295 expression. */
3296
3297 static bool
3298 determine_use_iv_cost_generic (struct ivopts_data *data,
3299 struct iv_use *use, struct iv_cand *cand)
3300 {
3301 bitmap depends_on;
3302 unsigned cost;
3303
3304 /* The simple case first -- if we need to express value of the preserved
3305 original biv, the cost is 0. This also prevents us from counting the
3306 cost of increment twice -- once at this use and once in the cost of
3307 the candidate. */
3308 if (cand->pos == IP_ORIGINAL
3309 && cand->incremented_at == use->stmt)
3310 {
3311 set_use_iv_cost (data, use, cand, 0, NULL);
3312 return true;
3313 }
3314
3315 cost = get_computation_cost (data, use, cand, false, &depends_on);
3316 set_use_iv_cost (data, use, cand, cost, depends_on);
3317
3318 return cost != INFTY;
3319 }
3320
3321 /* Determines cost of basing replacement of USE on CAND in an address. */
3322
3323 static bool
3324 determine_use_iv_cost_address (struct ivopts_data *data,
3325 struct iv_use *use, struct iv_cand *cand)
3326 {
3327 bitmap depends_on;
3328 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3329
3330 set_use_iv_cost (data, use, cand, cost, depends_on);
3331
3332 return cost != INFTY;
3333 }
3334
3335 /* Computes value of induction variable IV in iteration NITER. */
3336
3337 static tree
3338 iv_value (struct iv *iv, tree niter)
3339 {
3340 tree val;
3341 tree type = TREE_TYPE (iv->base);
3342
3343 niter = fold_convert (type, niter);
3344 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3345
3346 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3347 }
3348
3349 /* Computes value of candidate CAND at position AT in iteration NITER. */
3350
3351 static tree
3352 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3353 {
3354 tree val = iv_value (cand->iv, niter);
3355 tree type = TREE_TYPE (cand->iv->base);
3356
3357 if (stmt_after_increment (loop, cand, at))
3358 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3359
3360 return val;
3361 }
3362
3363 /* Returns period of induction variable iv. */
3364
3365 static tree
3366 iv_period (struct iv *iv)
3367 {
3368 tree step = iv->step, period, type;
3369 tree pow2div;
3370
3371 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3372
3373 /* Period of the iv is gcd (step, type range). Since type range is power
3374 of two, it suffices to determine the maximum power of two that divides
3375 step. */
3376 pow2div = num_ending_zeros (step);
3377 type = unsigned_type_for (TREE_TYPE (step));
3378
3379 period = build_low_bits_mask (type,
3380 (TYPE_PRECISION (type)
3381 - tree_low_cst (pow2div, 1)));
3382
3383 return period;
3384 }
3385
3386 /* Check whether it is possible to express the condition in USE by comparison
3387 of candidate CAND. If so, store the comparison code to COMPARE and the
3388 value compared with to BOUND. */
3389
3390 static bool
3391 may_eliminate_iv (struct ivopts_data *data,
3392 struct iv_use *use, struct iv_cand *cand,
3393 enum tree_code *compare, tree *bound)
3394 {
3395 basic_block ex_bb;
3396 edge exit;
3397 struct tree_niter_desc *niter;
3398 tree nit, nit_type;
3399 tree wider_type, period, per_type;
3400 struct loop *loop = data->current_loop;
3401
3402 /* For now works only for exits that dominate the loop latch. TODO -- extend
3403 for other conditions inside loop body. */
3404 ex_bb = bb_for_stmt (use->stmt);
3405 if (use->stmt != last_stmt (ex_bb)
3406 || TREE_CODE (use->stmt) != COND_EXPR)
3407 return false;
3408 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3409 return false;
3410
3411 exit = EDGE_SUCC (ex_bb, 0);
3412 if (flow_bb_inside_loop_p (loop, exit->dest))
3413 exit = EDGE_SUCC (ex_bb, 1);
3414 if (flow_bb_inside_loop_p (loop, exit->dest))
3415 return false;
3416
3417 niter = niter_for_exit (data, exit);
3418 if (!niter
3419 || !zero_p (niter->may_be_zero))
3420 return false;
3421
3422 nit = niter->niter;
3423 nit_type = TREE_TYPE (nit);
3424
3425 /* Determine whether we may use the variable to test whether niter iterations
3426 elapsed. This is the case iff the period of the induction variable is
3427 greater than the number of iterations. */
3428 period = iv_period (cand->iv);
3429 if (!period)
3430 return false;
3431 per_type = TREE_TYPE (period);
3432
3433 wider_type = TREE_TYPE (period);
3434 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3435 wider_type = per_type;
3436 else
3437 wider_type = nit_type;
3438
3439 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3440 fold_convert (wider_type, period),
3441 fold_convert (wider_type, nit)))))
3442 return false;
3443
3444 if (exit->flags & EDGE_TRUE_VALUE)
3445 *compare = EQ_EXPR;
3446 else
3447 *compare = NE_EXPR;
3448
3449 *bound = cand_value_at (loop, cand, use->stmt, nit);
3450 return true;
3451 }
3452
3453 /* Determines cost of basing replacement of USE on CAND in a condition. */
3454
3455 static bool
3456 determine_use_iv_cost_condition (struct ivopts_data *data,
3457 struct iv_use *use, struct iv_cand *cand)
3458 {
3459 tree bound;
3460 enum tree_code compare;
3461
3462 /* Only consider real candidates. */
3463 if (!cand->iv)
3464 {
3465 set_use_iv_cost (data, use, cand, INFTY, NULL);
3466 return false;
3467 }
3468
3469 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3470 {
3471 bitmap depends_on = NULL;
3472 unsigned cost = force_var_cost (data, bound, &depends_on);
3473
3474 set_use_iv_cost (data, use, cand, cost, depends_on);
3475 return cost != INFTY;
3476 }
3477
3478 /* The induction variable elimination failed; just express the original
3479 giv. If it is compared with an invariant, note that we cannot get
3480 rid of it. */
3481 if (TREE_CODE (*use->op_p) == SSA_NAME)
3482 record_invariant (data, *use->op_p, true);
3483 else
3484 {
3485 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3486 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3487 }
3488
3489 return determine_use_iv_cost_generic (data, use, cand);
3490 }
3491
3492 /* Checks whether it is possible to replace the final value of USE by
3493 a direct computation. If so, the formula is stored to *VALUE. */
3494
3495 static bool
3496 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3497 tree *value)
3498 {
3499 struct loop *loop = data->current_loop;
3500 edge exit;
3501 struct tree_niter_desc *niter;
3502
3503 exit = single_dom_exit (loop);
3504 if (!exit)
3505 return false;
3506
3507 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3508 bb_for_stmt (use->stmt)));
3509
3510 niter = niter_for_single_dom_exit (data);
3511 if (!niter
3512 || !zero_p (niter->may_be_zero))
3513 return false;
3514
3515 *value = iv_value (use->iv, niter->niter);
3516
3517 return true;
3518 }
3519
3520 /* Determines cost of replacing final value of USE using CAND. */
3521
3522 static bool
3523 determine_use_iv_cost_outer (struct ivopts_data *data,
3524 struct iv_use *use, struct iv_cand *cand)
3525 {
3526 bitmap depends_on;
3527 unsigned cost;
3528 edge exit;
3529 tree value;
3530 struct loop *loop = data->current_loop;
3531
3532 /* The simple case first -- if we need to express value of the preserved
3533 original biv, the cost is 0. This also prevents us from counting the
3534 cost of increment twice -- once at this use and once in the cost of
3535 the candidate. */
3536 if (cand->pos == IP_ORIGINAL
3537 && cand->incremented_at == use->stmt)
3538 {
3539 set_use_iv_cost (data, use, cand, 0, NULL);
3540 return true;
3541 }
3542
3543 if (!cand->iv)
3544 {
3545 if (!may_replace_final_value (data, use, &value))
3546 {
3547 set_use_iv_cost (data, use, cand, INFTY, NULL);
3548 return false;
3549 }
3550
3551 depends_on = NULL;
3552 cost = force_var_cost (data, value, &depends_on);
3553
3554 cost /= AVG_LOOP_NITER (loop);
3555
3556 set_use_iv_cost (data, use, cand, cost, depends_on);
3557 return cost != INFTY;
3558 }
3559
3560 exit = single_dom_exit (loop);
3561 if (exit)
3562 {
3563 /* If there is just a single exit, we may use value of the candidate
3564 after we take it to determine the value of use. */
3565 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3566 last_stmt (exit->src));
3567 if (cost != INFTY)
3568 cost /= AVG_LOOP_NITER (loop);
3569 }
3570 else
3571 {
3572 /* Otherwise we just need to compute the iv. */
3573 cost = get_computation_cost (data, use, cand, false, &depends_on);
3574 }
3575
3576 set_use_iv_cost (data, use, cand, cost, depends_on);
3577
3578 return cost != INFTY;
3579 }
3580
3581 /* Determines cost of basing replacement of USE on CAND. Returns false
3582 if USE cannot be based on CAND. */
3583
3584 static bool
3585 determine_use_iv_cost (struct ivopts_data *data,
3586 struct iv_use *use, struct iv_cand *cand)
3587 {
3588 switch (use->type)
3589 {
3590 case USE_NONLINEAR_EXPR:
3591 return determine_use_iv_cost_generic (data, use, cand);
3592
3593 case USE_OUTER:
3594 return determine_use_iv_cost_outer (data, use, cand);
3595
3596 case USE_ADDRESS:
3597 return determine_use_iv_cost_address (data, use, cand);
3598
3599 case USE_COMPARE:
3600 return determine_use_iv_cost_condition (data, use, cand);
3601
3602 default:
3603 gcc_unreachable ();
3604 }
3605 }
3606
3607 /* Determines costs of basing the use of the iv on an iv candidate. */
3608
3609 static void
3610 determine_use_iv_costs (struct ivopts_data *data)
3611 {
3612 unsigned i, j;
3613 struct iv_use *use;
3614 struct iv_cand *cand;
3615 bitmap to_clear = BITMAP_ALLOC (NULL);
3616
3617 alloc_use_cost_map (data);
3618
3619 for (i = 0; i < n_iv_uses (data); i++)
3620 {
3621 use = iv_use (data, i);
3622
3623 if (data->consider_all_candidates)
3624 {
3625 for (j = 0; j < n_iv_cands (data); j++)
3626 {
3627 cand = iv_cand (data, j);
3628 determine_use_iv_cost (data, use, cand);
3629 }
3630 }
3631 else
3632 {
3633 bitmap_iterator bi;
3634
3635 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3636 {
3637 cand = iv_cand (data, j);
3638 if (!determine_use_iv_cost (data, use, cand))
3639 bitmap_set_bit (to_clear, j);
3640 }
3641
3642 /* Remove the candidates for that the cost is infinite from
3643 the list of related candidates. */
3644 bitmap_and_compl_into (use->related_cands, to_clear);
3645 bitmap_clear (to_clear);
3646 }
3647 }
3648
3649 BITMAP_FREE (to_clear);
3650
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3652 {
3653 fprintf (dump_file, "Use-candidate costs:\n");
3654
3655 for (i = 0; i < n_iv_uses (data); i++)
3656 {
3657 use = iv_use (data, i);
3658
3659 fprintf (dump_file, "Use %d:\n", i);
3660 fprintf (dump_file, " cand\tcost\tdepends on\n");
3661 for (j = 0; j < use->n_map_members; j++)
3662 {
3663 if (!use->cost_map[j].cand
3664 || use->cost_map[j].cost == INFTY)
3665 continue;
3666
3667 fprintf (dump_file, " %d\t%d\t",
3668 use->cost_map[j].cand->id,
3669 use->cost_map[j].cost);
3670 if (use->cost_map[j].depends_on)
3671 bitmap_print (dump_file,
3672 use->cost_map[j].depends_on, "","");
3673 fprintf (dump_file, "\n");
3674 }
3675
3676 fprintf (dump_file, "\n");
3677 }
3678 fprintf (dump_file, "\n");
3679 }
3680 }
3681
3682 /* Determines cost of the candidate CAND. */
3683
3684 static void
3685 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3686 {
3687 unsigned cost_base, cost_step;
3688 tree base;
3689
3690 if (!cand->iv)
3691 {
3692 cand->cost = 0;
3693 return;
3694 }
3695
3696 /* There are two costs associated with the candidate -- its increment
3697 and its initialization. The second is almost negligible for any loop
3698 that rolls enough, so we take it just very little into account. */
3699
3700 base = cand->iv->base;
3701 cost_base = force_var_cost (data, base, NULL);
3702 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3703
3704 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3705
3706 /* Prefer the original iv unless we may gain something by replacing it;
3707 this is not really relevant for artificial ivs created by other
3708 passes. */
3709 if (cand->pos == IP_ORIGINAL
3710 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3711 cand->cost--;
3712
3713 /* Prefer not to insert statements into latch unless there are some
3714 already (so that we do not create unnecessary jumps). */
3715 if (cand->pos == IP_END
3716 && empty_block_p (ip_end_pos (data->current_loop)))
3717 cand->cost++;
3718 }
3719
3720 /* Determines costs of computation of the candidates. */
3721
3722 static void
3723 determine_iv_costs (struct ivopts_data *data)
3724 {
3725 unsigned i;
3726
3727 if (dump_file && (dump_flags & TDF_DETAILS))
3728 {
3729 fprintf (dump_file, "Candidate costs:\n");
3730 fprintf (dump_file, " cand\tcost\n");
3731 }
3732
3733 for (i = 0; i < n_iv_cands (data); i++)
3734 {
3735 struct iv_cand *cand = iv_cand (data, i);
3736
3737 determine_iv_cost (data, cand);
3738
3739 if (dump_file && (dump_flags & TDF_DETAILS))
3740 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3741 }
3742
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3744 fprintf (dump_file, "\n");
3745 }
3746
3747 /* Calculates cost for having SIZE induction variables. */
3748
3749 static unsigned
3750 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3751 {
3752 return global_cost_for_size (size,
3753 loop_data (data->current_loop)->regs_used,
3754 n_iv_uses (data));
3755 }
3756
3757 /* For each size of the induction variable set determine the penalty. */
3758
3759 static void
3760 determine_set_costs (struct ivopts_data *data)
3761 {
3762 unsigned j, n;
3763 tree phi, op;
3764 struct loop *loop = data->current_loop;
3765 bitmap_iterator bi;
3766
3767 /* We use the following model (definitely improvable, especially the
3768 cost function -- TODO):
3769
3770 We estimate the number of registers available (using MD data), name it A.
3771
3772 We estimate the number of registers used by the loop, name it U. This
3773 number is obtained as the number of loop phi nodes (not counting virtual
3774 registers and bivs) + the number of variables from outside of the loop.
3775
3776 We set a reserve R (free regs that are used for temporary computations,
3777 etc.). For now the reserve is a constant 3.
3778
3779 Let I be the number of induction variables.
3780
3781 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3782 make a lot of ivs without a reason).
3783 -- if A - R < U + I <= A, the cost is I * PRES_COST
3784 -- if U + I > A, the cost is I * PRES_COST and
3785 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3786
3787 if (dump_file && (dump_flags & TDF_DETAILS))
3788 {
3789 fprintf (dump_file, "Global costs:\n");
3790 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3791 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3792 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3793 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3794 }
3795
3796 n = 0;
3797 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3798 {
3799 op = PHI_RESULT (phi);
3800
3801 if (!is_gimple_reg (op))
3802 continue;
3803
3804 if (get_iv (data, op))
3805 continue;
3806
3807 n++;
3808 }
3809
3810 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3811 {
3812 struct version_info *info = ver_info (data, j);
3813
3814 if (info->inv_id && info->has_nonlin_use)
3815 n++;
3816 }
3817
3818 loop_data (loop)->regs_used = n;
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, " regs_used %d\n", n);
3821
3822 if (dump_file && (dump_flags & TDF_DETAILS))
3823 {
3824 fprintf (dump_file, " cost for size:\n");
3825 fprintf (dump_file, " ivs\tcost\n");
3826 for (j = 0; j <= 2 * target_avail_regs; j++)
3827 fprintf (dump_file, " %d\t%d\n", j,
3828 ivopts_global_cost_for_size (data, j));
3829 fprintf (dump_file, "\n");
3830 }
3831 }
3832
3833 /* Returns true if A is a cheaper cost pair than B. */
3834
3835 static bool
3836 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3837 {
3838 if (!a)
3839 return false;
3840
3841 if (!b)
3842 return true;
3843
3844 if (a->cost < b->cost)
3845 return true;
3846
3847 if (a->cost > b->cost)
3848 return false;
3849
3850 /* In case the costs are the same, prefer the cheaper candidate. */
3851 if (a->cand->cost < b->cand->cost)
3852 return true;
3853
3854 return false;
3855 }
3856
3857 /* Computes the cost field of IVS structure. */
3858
3859 static void
3860 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3861 {
3862 unsigned cost = 0;
3863
3864 cost += ivs->cand_use_cost;
3865 cost += ivs->cand_cost;
3866 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3867
3868 ivs->cost = cost;
3869 }
3870
3871 /* Set USE not to be expressed by any candidate in IVS. */
3872
3873 static void
3874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3875 struct iv_use *use)
3876 {
3877 unsigned uid = use->id, cid, iid;
3878 bitmap deps;
3879 struct cost_pair *cp;
3880 bitmap_iterator bi;
3881
3882 cp = ivs->cand_for_use[uid];
3883 if (!cp)
3884 return;
3885 cid = cp->cand->id;
3886
3887 ivs->bad_uses++;
3888 ivs->cand_for_use[uid] = NULL;
3889 ivs->n_cand_uses[cid]--;
3890
3891 if (ivs->n_cand_uses[cid] == 0)
3892 {
3893 bitmap_clear_bit (ivs->cands, cid);
3894 /* Do not count the pseudocandidates. */
3895 if (cp->cand->iv)
3896 ivs->n_regs--;
3897 ivs->n_cands--;
3898 ivs->cand_cost -= cp->cand->cost;
3899 }
3900
3901 ivs->cand_use_cost -= cp->cost;
3902
3903 deps = cp->depends_on;
3904
3905 if (deps)
3906 {
3907 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3908 {
3909 ivs->n_invariant_uses[iid]--;
3910 if (ivs->n_invariant_uses[iid] == 0)
3911 ivs->n_regs--;
3912 }
3913 }
3914
3915 iv_ca_recount_cost (data, ivs);
3916 }
3917
3918 /* Set cost pair for USE in set IVS to CP. */
3919
3920 static void
3921 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3922 struct iv_use *use, struct cost_pair *cp)
3923 {
3924 unsigned uid = use->id, cid, iid;
3925 bitmap deps;
3926 bitmap_iterator bi;
3927
3928 if (ivs->cand_for_use[uid] == cp)
3929 return;
3930
3931 if (ivs->cand_for_use[uid])
3932 iv_ca_set_no_cp (data, ivs, use);
3933
3934 if (cp)
3935 {
3936 cid = cp->cand->id;
3937
3938 ivs->bad_uses--;
3939 ivs->cand_for_use[uid] = cp;
3940 ivs->n_cand_uses[cid]++;
3941 if (ivs->n_cand_uses[cid] == 1)
3942 {
3943 bitmap_set_bit (ivs->cands, cid);
3944 /* Do not count the pseudocandidates. */
3945 if (cp->cand->iv)
3946 ivs->n_regs++;
3947 ivs->n_cands++;
3948 ivs->cand_cost += cp->cand->cost;
3949 }
3950
3951 ivs->cand_use_cost += cp->cost;
3952
3953 deps = cp->depends_on;
3954
3955 if (deps)
3956 {
3957 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3958 {
3959 ivs->n_invariant_uses[iid]++;
3960 if (ivs->n_invariant_uses[iid] == 1)
3961 ivs->n_regs++;
3962 }
3963 }
3964
3965 iv_ca_recount_cost (data, ivs);
3966 }
3967 }
3968
3969 /* Extend set IVS by expressing USE by some of the candidates in it
3970 if possible. */
3971
3972 static void
3973 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3974 struct iv_use *use)
3975 {
3976 struct cost_pair *best_cp = NULL, *cp;
3977 bitmap_iterator bi;
3978 unsigned i;
3979
3980 gcc_assert (ivs->upto >= use->id);
3981
3982 if (ivs->upto == use->id)
3983 {
3984 ivs->upto++;
3985 ivs->bad_uses++;
3986 }
3987
3988 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3989 {
3990 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3991
3992 if (cheaper_cost_pair (cp, best_cp))
3993 best_cp = cp;
3994 }
3995
3996 iv_ca_set_cp (data, ivs, use, best_cp);
3997 }
3998
3999 /* Get cost for assignment IVS. */
4000
4001 static unsigned
4002 iv_ca_cost (struct iv_ca *ivs)
4003 {
4004 return (ivs->bad_uses ? INFTY : ivs->cost);
4005 }
4006
4007 /* Returns true if all dependences of CP are among invariants in IVS. */
4008
4009 static bool
4010 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4011 {
4012 unsigned i;
4013 bitmap_iterator bi;
4014
4015 if (!cp->depends_on)
4016 return true;
4017
4018 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4019 {
4020 if (ivs->n_invariant_uses[i] == 0)
4021 return false;
4022 }
4023
4024 return true;
4025 }
4026
4027 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4028 it before NEXT_CHANGE. */
4029
4030 static struct iv_ca_delta *
4031 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4032 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4033 {
4034 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4035
4036 change->use = use;
4037 change->old_cp = old_cp;
4038 change->new_cp = new_cp;
4039 change->next_change = next_change;
4040
4041 return change;
4042 }
4043
4044 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4045 are rewritten. */
4046
4047 static struct iv_ca_delta *
4048 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4049 {
4050 struct iv_ca_delta *last;
4051
4052 if (!l2)
4053 return l1;
4054
4055 if (!l1)
4056 return l2;
4057
4058 for (last = l1; last->next_change; last = last->next_change)
4059 continue;
4060 last->next_change = l2;
4061
4062 return l1;
4063 }
4064
4065 /* Returns candidate by that USE is expressed in IVS. */
4066
4067 static struct cost_pair *
4068 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4069 {
4070 return ivs->cand_for_use[use->id];
4071 }
4072
4073 /* Reverse the list of changes DELTA, forming the inverse to it. */
4074
4075 static struct iv_ca_delta *
4076 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4077 {
4078 struct iv_ca_delta *act, *next, *prev = NULL;
4079 struct cost_pair *tmp;
4080
4081 for (act = delta; act; act = next)
4082 {
4083 next = act->next_change;
4084 act->next_change = prev;
4085 prev = act;
4086
4087 tmp = act->old_cp;
4088 act->old_cp = act->new_cp;
4089 act->new_cp = tmp;
4090 }
4091
4092 return prev;
4093 }
4094
4095 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4096 reverted instead. */
4097
4098 static void
4099 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4100 struct iv_ca_delta *delta, bool forward)
4101 {
4102 struct cost_pair *from, *to;
4103 struct iv_ca_delta *act;
4104
4105 if (!forward)
4106 delta = iv_ca_delta_reverse (delta);
4107
4108 for (act = delta; act; act = act->next_change)
4109 {
4110 from = act->old_cp;
4111 to = act->new_cp;
4112 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4113 iv_ca_set_cp (data, ivs, act->use, to);
4114 }
4115
4116 if (!forward)
4117 iv_ca_delta_reverse (delta);
4118 }
4119
4120 /* Returns true if CAND is used in IVS. */
4121
4122 static bool
4123 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4124 {
4125 return ivs->n_cand_uses[cand->id] > 0;
4126 }
4127
4128 /* Returns number of induction variable candidates in the set IVS. */
4129
4130 static unsigned
4131 iv_ca_n_cands (struct iv_ca *ivs)
4132 {
4133 return ivs->n_cands;
4134 }
4135
4136 /* Free the list of changes DELTA. */
4137
4138 static void
4139 iv_ca_delta_free (struct iv_ca_delta **delta)
4140 {
4141 struct iv_ca_delta *act, *next;
4142
4143 for (act = *delta; act; act = next)
4144 {
4145 next = act->next_change;
4146 free (act);
4147 }
4148
4149 *delta = NULL;
4150 }
4151
4152 /* Allocates new iv candidates assignment. */
4153
4154 static struct iv_ca *
4155 iv_ca_new (struct ivopts_data *data)
4156 {
4157 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4158
4159 nw->upto = 0;
4160 nw->bad_uses = 0;
4161 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4162 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4163 nw->cands = BITMAP_ALLOC (NULL);
4164 nw->n_cands = 0;
4165 nw->n_regs = 0;
4166 nw->cand_use_cost = 0;
4167 nw->cand_cost = 0;
4168 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4169 nw->cost = 0;
4170
4171 return nw;
4172 }
4173
4174 /* Free memory occupied by the set IVS. */
4175
4176 static void
4177 iv_ca_free (struct iv_ca **ivs)
4178 {
4179 free ((*ivs)->cand_for_use);
4180 free ((*ivs)->n_cand_uses);
4181 BITMAP_FREE ((*ivs)->cands);
4182 free ((*ivs)->n_invariant_uses);
4183 free (*ivs);
4184 *ivs = NULL;
4185 }
4186
4187 /* Dumps IVS to FILE. */
4188
4189 static void
4190 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4191 {
4192 const char *pref = " invariants ";
4193 unsigned i;
4194
4195 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4196 bitmap_print (file, ivs->cands, " candidates ","\n");
4197
4198 for (i = 1; i <= data->max_inv_id; i++)
4199 if (ivs->n_invariant_uses[i])
4200 {
4201 fprintf (file, "%s%d", pref, i);
4202 pref = ", ";
4203 }
4204 fprintf (file, "\n");
4205 }
4206
4207 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4208 new set, and store differences in DELTA. Number of induction variables
4209 in the new set is stored to N_IVS. */
4210
4211 static unsigned
4212 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4213 struct iv_cand *cand, struct iv_ca_delta **delta,
4214 unsigned *n_ivs)
4215 {
4216 unsigned i, cost;
4217 struct iv_use *use;
4218 struct cost_pair *old_cp, *new_cp;
4219
4220 *delta = NULL;
4221 for (i = 0; i < ivs->upto; i++)
4222 {
4223 use = iv_use (data, i);
4224 old_cp = iv_ca_cand_for_use (ivs, use);
4225
4226 if (old_cp
4227 && old_cp->cand == cand)
4228 continue;
4229
4230 new_cp = get_use_iv_cost (data, use, cand);
4231 if (!new_cp)
4232 continue;
4233
4234 if (!iv_ca_has_deps (ivs, new_cp))
4235 continue;
4236
4237 if (!cheaper_cost_pair (new_cp, old_cp))
4238 continue;
4239
4240 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4241 }
4242
4243 iv_ca_delta_commit (data, ivs, *delta, true);
4244 cost = iv_ca_cost (ivs);
4245 if (n_ivs)
4246 *n_ivs = iv_ca_n_cands (ivs);
4247 iv_ca_delta_commit (data, ivs, *delta, false);
4248
4249 return cost;
4250 }
4251
4252 /* Try narrowing set IVS by removing CAND. Return the cost of
4253 the new set and store the differences in DELTA. */
4254
4255 static unsigned
4256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4257 struct iv_cand *cand, struct iv_ca_delta **delta)
4258 {
4259 unsigned i, ci;
4260 struct iv_use *use;
4261 struct cost_pair *old_cp, *new_cp, *cp;
4262 bitmap_iterator bi;
4263 struct iv_cand *cnd;
4264 unsigned cost;
4265
4266 *delta = NULL;
4267 for (i = 0; i < n_iv_uses (data); i++)
4268 {
4269 use = iv_use (data, i);
4270
4271 old_cp = iv_ca_cand_for_use (ivs, use);
4272 if (old_cp->cand != cand)
4273 continue;
4274
4275 new_cp = NULL;
4276
4277 if (data->consider_all_candidates)
4278 {
4279 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4280 {
4281 if (ci == cand->id)
4282 continue;
4283
4284 cnd = iv_cand (data, ci);
4285
4286 cp = get_use_iv_cost (data, use, cnd);
4287 if (!cp)
4288 continue;
4289 if (!iv_ca_has_deps (ivs, cp))
4290 continue;
4291
4292 if (!cheaper_cost_pair (cp, new_cp))
4293 continue;
4294
4295 new_cp = cp;
4296 }
4297 }
4298 else
4299 {
4300 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4301 {
4302 if (ci == cand->id)
4303 continue;
4304
4305 cnd = iv_cand (data, ci);
4306
4307 cp = get_use_iv_cost (data, use, cnd);
4308 if (!cp)
4309 continue;
4310 if (!iv_ca_has_deps (ivs, cp))
4311 continue;
4312
4313 if (!cheaper_cost_pair (cp, new_cp))
4314 continue;
4315
4316 new_cp = cp;
4317 }
4318 }
4319
4320 if (!new_cp)
4321 {
4322 iv_ca_delta_free (delta);
4323 return INFTY;
4324 }
4325
4326 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4327 }
4328
4329 iv_ca_delta_commit (data, ivs, *delta, true);
4330 cost = iv_ca_cost (ivs);
4331 iv_ca_delta_commit (data, ivs, *delta, false);
4332
4333 return cost;
4334 }
4335
4336 /* Try optimizing the set of candidates IVS by removing candidates different
4337 from to EXCEPT_CAND from it. Return cost of the new set, and store
4338 differences in DELTA. */
4339
4340 static unsigned
4341 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4342 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4343 {
4344 bitmap_iterator bi;
4345 struct iv_ca_delta *act_delta, *best_delta;
4346 unsigned i, best_cost, acost;
4347 struct iv_cand *cand;
4348
4349 best_delta = NULL;
4350 best_cost = iv_ca_cost (ivs);
4351
4352 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4353 {
4354 cand = iv_cand (data, i);
4355
4356 if (cand == except_cand)
4357 continue;
4358
4359 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4360
4361 if (acost < best_cost)
4362 {
4363 best_cost = acost;
4364 iv_ca_delta_free (&best_delta);
4365 best_delta = act_delta;
4366 }
4367 else
4368 iv_ca_delta_free (&act_delta);
4369 }
4370
4371 if (!best_delta)
4372 {
4373 *delta = NULL;
4374 return best_cost;
4375 }
4376
4377 /* Recurse to possibly remove other unnecessary ivs. */
4378 iv_ca_delta_commit (data, ivs, best_delta, true);
4379 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4380 iv_ca_delta_commit (data, ivs, best_delta, false);
4381 *delta = iv_ca_delta_join (best_delta, *delta);
4382 return best_cost;
4383 }
4384
4385 /* Tries to extend the sets IVS in the best possible way in order
4386 to express the USE. */
4387
4388 static bool
4389 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4390 struct iv_use *use)
4391 {
4392 unsigned best_cost, act_cost;
4393 unsigned i;
4394 bitmap_iterator bi;
4395 struct iv_cand *cand;
4396 struct iv_ca_delta *best_delta = NULL, *act_delta;
4397 struct cost_pair *cp;
4398
4399 iv_ca_add_use (data, ivs, use);
4400 best_cost = iv_ca_cost (ivs);
4401
4402 cp = iv_ca_cand_for_use (ivs, use);
4403 if (cp)
4404 {
4405 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4406 iv_ca_set_no_cp (data, ivs, use);
4407 }
4408
4409 /* First try important candidates. Only if it fails, try the specific ones.
4410 Rationale -- in loops with many variables the best choice often is to use
4411 just one generic biv. If we added here many ivs specific to the uses,
4412 the optimization algorithm later would be likely to get stuck in a local
4413 minimum, thus causing us to create too many ivs. The approach from
4414 few ivs to more seems more likely to be successful -- starting from few
4415 ivs, replacing an expensive use by a specific iv should always be a
4416 win. */
4417 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4418 {
4419 cand = iv_cand (data, i);
4420
4421 if (iv_ca_cand_used_p (ivs, cand))
4422 continue;
4423
4424 cp = get_use_iv_cost (data, use, cand);
4425 if (!cp)
4426 continue;
4427
4428 iv_ca_set_cp (data, ivs, use, cp);
4429 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4430 iv_ca_set_no_cp (data, ivs, use);
4431 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4432
4433 if (act_cost < best_cost)
4434 {
4435 best_cost = act_cost;
4436
4437 iv_ca_delta_free (&best_delta);
4438 best_delta = act_delta;
4439 }
4440 else
4441 iv_ca_delta_free (&act_delta);
4442 }
4443
4444 if (best_cost == INFTY)
4445 {
4446 for (i = 0; i < use->n_map_members; i++)
4447 {
4448 cp = use->cost_map + i;
4449 cand = cp->cand;
4450 if (!cand)
4451 continue;
4452
4453 /* Already tried this. */
4454 if (cand->important)
4455 continue;
4456
4457 if (iv_ca_cand_used_p (ivs, cand))
4458 continue;
4459
4460 act_delta = NULL;
4461 iv_ca_set_cp (data, ivs, use, cp);
4462 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4463 iv_ca_set_no_cp (data, ivs, use);
4464 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4465 cp, act_delta);
4466
4467 if (act_cost < best_cost)
4468 {
4469 best_cost = act_cost;
4470
4471 if (best_delta)
4472 iv_ca_delta_free (&best_delta);
4473 best_delta = act_delta;
4474 }
4475 else
4476 iv_ca_delta_free (&act_delta);
4477 }
4478 }
4479
4480 iv_ca_delta_commit (data, ivs, best_delta, true);
4481 iv_ca_delta_free (&best_delta);
4482
4483 return (best_cost != INFTY);
4484 }
4485
4486 /* Finds an initial assignment of candidates to uses. */
4487
4488 static struct iv_ca *
4489 get_initial_solution (struct ivopts_data *data)
4490 {
4491 struct iv_ca *ivs = iv_ca_new (data);
4492 unsigned i;
4493
4494 for (i = 0; i < n_iv_uses (data); i++)
4495 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4496 {
4497 iv_ca_free (&ivs);
4498 return NULL;
4499 }
4500
4501 return ivs;
4502 }
4503
4504 /* Tries to improve set of induction variables IVS. */
4505
4506 static bool
4507 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4508 {
4509 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4510 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4511 struct iv_cand *cand;
4512
4513 /* Try extending the set of induction variables by one. */
4514 for (i = 0; i < n_iv_cands (data); i++)
4515 {
4516 cand = iv_cand (data, i);
4517
4518 if (iv_ca_cand_used_p (ivs, cand))
4519 continue;
4520
4521 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4522 if (!act_delta)
4523 continue;
4524
4525 /* If we successfully added the candidate and the set is small enough,
4526 try optimizing it by removing other candidates. */
4527 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4528 {
4529 iv_ca_delta_commit (data, ivs, act_delta, true);
4530 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4531 iv_ca_delta_commit (data, ivs, act_delta, false);
4532 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4533 }
4534
4535 if (acost < best_cost)
4536 {
4537 best_cost = acost;
4538 iv_ca_delta_free (&best_delta);
4539 best_delta = act_delta;
4540 }
4541 else
4542 iv_ca_delta_free (&act_delta);
4543 }
4544
4545 if (!best_delta)
4546 {
4547 /* Try removing the candidates from the set instead. */
4548 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4549
4550 /* Nothing more we can do. */
4551 if (!best_delta)
4552 return false;
4553 }
4554
4555 iv_ca_delta_commit (data, ivs, best_delta, true);
4556 gcc_assert (best_cost == iv_ca_cost (ivs));
4557 iv_ca_delta_free (&best_delta);
4558 return true;
4559 }
4560
4561 /* Attempts to find the optimal set of induction variables. We do simple
4562 greedy heuristic -- we try to replace at most one candidate in the selected
4563 solution and remove the unused ivs while this improves the cost. */
4564
4565 static struct iv_ca *
4566 find_optimal_iv_set (struct ivopts_data *data)
4567 {
4568 unsigned i;
4569 struct iv_ca *set;
4570 struct iv_use *use;
4571
4572 /* Get the initial solution. */
4573 set = get_initial_solution (data);
4574 if (!set)
4575 {
4576 if (dump_file && (dump_flags & TDF_DETAILS))
4577 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4578 return NULL;
4579 }
4580
4581 if (dump_file && (dump_flags & TDF_DETAILS))
4582 {
4583 fprintf (dump_file, "Initial set of candidates:\n");
4584 iv_ca_dump (data, dump_file, set);
4585 }
4586
4587 while (try_improve_iv_set (data, set))
4588 {
4589 if (dump_file && (dump_flags & TDF_DETAILS))
4590 {
4591 fprintf (dump_file, "Improved to:\n");
4592 iv_ca_dump (data, dump_file, set);
4593 }
4594 }
4595
4596 if (dump_file && (dump_flags & TDF_DETAILS))
4597 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4598
4599 for (i = 0; i < n_iv_uses (data); i++)
4600 {
4601 use = iv_use (data, i);
4602 use->selected = iv_ca_cand_for_use (set, use)->cand;
4603 }
4604
4605 return set;
4606 }
4607
4608 /* Creates a new induction variable corresponding to CAND. */
4609
4610 static void
4611 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4612 {
4613 block_stmt_iterator incr_pos;
4614 tree base;
4615 bool after = false;
4616
4617 if (!cand->iv)
4618 return;
4619
4620 switch (cand->pos)
4621 {
4622 case IP_NORMAL:
4623 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4624 break;
4625
4626 case IP_END:
4627 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4628 after = true;
4629 break;
4630
4631 case IP_ORIGINAL:
4632 /* Mark that the iv is preserved. */
4633 name_info (data, cand->var_before)->preserve_biv = true;
4634 name_info (data, cand->var_after)->preserve_biv = true;
4635
4636 /* Rewrite the increment so that it uses var_before directly. */
4637 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4638
4639 return;
4640 }
4641
4642 gimple_add_tmp_var (cand->var_before);
4643 add_referenced_tmp_var (cand->var_before);
4644
4645 base = unshare_expr (cand->iv->base);
4646
4647 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4648 &incr_pos, after, &cand->var_before, &cand->var_after);
4649 }
4650
4651 /* Creates new induction variables described in SET. */
4652
4653 static void
4654 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4655 {
4656 unsigned i;
4657 struct iv_cand *cand;
4658 bitmap_iterator bi;
4659
4660 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4661 {
4662 cand = iv_cand (data, i);
4663 create_new_iv (data, cand);
4664 }
4665 }
4666
4667 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4668 is true, remove also the ssa name defined by the statement. */
4669
4670 static void
4671 remove_statement (tree stmt, bool including_defined_name)
4672 {
4673 if (TREE_CODE (stmt) == PHI_NODE)
4674 {
4675 if (!including_defined_name)
4676 {
4677 /* Prevent the ssa name defined by the statement from being removed. */
4678 SET_PHI_RESULT (stmt, NULL);
4679 }
4680 remove_phi_node (stmt, NULL_TREE);
4681 }
4682 else
4683 {
4684 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4685
4686 bsi_remove (&bsi);
4687 }
4688 }
4689
4690 /* Rewrites USE (definition of iv used in a nonlinear expression)
4691 using candidate CAND. */
4692
4693 static void
4694 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4695 struct iv_use *use, struct iv_cand *cand)
4696 {
4697 tree comp;
4698 tree op, stmts, tgt, ass;
4699 block_stmt_iterator bsi, pbsi;
4700
4701 /* An important special case -- if we are asked to express value of
4702 the original iv by itself, just exit; there is no need to
4703 introduce a new computation (that might also need casting the
4704 variable to unsigned and back). */
4705 if (cand->pos == IP_ORIGINAL
4706 && TREE_CODE (use->stmt) == MODIFY_EXPR
4707 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4708 {
4709 op = TREE_OPERAND (use->stmt, 1);
4710
4711 /* Be a bit careful. In case variable is expressed in some
4712 complicated way, rewrite it so that we may get rid of this
4713 complicated expression. */
4714 if ((TREE_CODE (op) == PLUS_EXPR
4715 || TREE_CODE (op) == MINUS_EXPR)
4716 && TREE_OPERAND (op, 0) == cand->var_before
4717 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4718 return;
4719 }
4720
4721 comp = unshare_expr (get_computation (data->current_loop,
4722 use, cand));
4723 switch (TREE_CODE (use->stmt))
4724 {
4725 case PHI_NODE:
4726 tgt = PHI_RESULT (use->stmt);
4727
4728 /* If we should keep the biv, do not replace it. */
4729 if (name_info (data, tgt)->preserve_biv)
4730 return;
4731
4732 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4733 while (!bsi_end_p (pbsi)
4734 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4735 {
4736 bsi = pbsi;
4737 bsi_next (&pbsi);
4738 }
4739 break;
4740
4741 case MODIFY_EXPR:
4742 tgt = TREE_OPERAND (use->stmt, 0);
4743 bsi = bsi_for_stmt (use->stmt);
4744 break;
4745
4746 default:
4747 gcc_unreachable ();
4748 }
4749
4750 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4751
4752 if (TREE_CODE (use->stmt) == PHI_NODE)
4753 {
4754 if (stmts)
4755 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4756 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4757 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4758 remove_statement (use->stmt, false);
4759 SSA_NAME_DEF_STMT (tgt) = ass;
4760 }
4761 else
4762 {
4763 if (stmts)
4764 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4765 TREE_OPERAND (use->stmt, 1) = op;
4766 }
4767 }
4768
4769 /* Replaces ssa name in index IDX by its basic variable. Callback for
4770 for_each_index. */
4771
4772 static bool
4773 idx_remove_ssa_names (tree base, tree *idx,
4774 void *data ATTRIBUTE_UNUSED)
4775 {
4776 tree *op;
4777
4778 if (TREE_CODE (*idx) == SSA_NAME)
4779 *idx = SSA_NAME_VAR (*idx);
4780
4781 if (TREE_CODE (base) == ARRAY_REF)
4782 {
4783 op = &TREE_OPERAND (base, 2);
4784 if (*op
4785 && TREE_CODE (*op) == SSA_NAME)
4786 *op = SSA_NAME_VAR (*op);
4787 op = &TREE_OPERAND (base, 3);
4788 if (*op
4789 && TREE_CODE (*op) == SSA_NAME)
4790 *op = SSA_NAME_VAR (*op);
4791 }
4792
4793 return true;
4794 }
4795
4796 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4797
4798 static tree
4799 unshare_and_remove_ssa_names (tree ref)
4800 {
4801 ref = unshare_expr (ref);
4802 for_each_index (&ref, idx_remove_ssa_names, NULL);
4803
4804 return ref;
4805 }
4806
4807 /* Rewrites base of memory access OP with expression WITH in statement
4808 pointed to by BSI. */
4809
4810 static void
4811 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4812 {
4813 tree bvar, var, new_name, copy, name;
4814 tree orig;
4815
4816 var = bvar = get_base_address (*op);
4817
4818 if (!var || TREE_CODE (with) != SSA_NAME)
4819 goto do_rewrite;
4820
4821 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4822 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4823 if (TREE_CODE (var) == INDIRECT_REF)
4824 var = TREE_OPERAND (var, 0);
4825 if (TREE_CODE (var) == SSA_NAME)
4826 {
4827 name = var;
4828 var = SSA_NAME_VAR (var);
4829 }
4830 else if (DECL_P (var))
4831 name = NULL_TREE;
4832 else
4833 goto do_rewrite;
4834
4835 /* We need to add a memory tag for the variable. But we do not want
4836 to add it to the temporary used for the computations, since this leads
4837 to problems in redundancy elimination when there are common parts
4838 in two computations referring to the different arrays. So we copy
4839 the variable to a new temporary. */
4840 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4841
4842 if (name)
4843 new_name = duplicate_ssa_name (name, copy);
4844 else
4845 {
4846 tree tag = var_ann (var)->type_mem_tag;
4847 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4848 add_referenced_tmp_var (new_ptr);
4849 if (tag)
4850 var_ann (new_ptr)->type_mem_tag = tag;
4851 else
4852 add_type_alias (new_ptr, var);
4853 new_name = make_ssa_name (new_ptr, copy);
4854 }
4855
4856 TREE_OPERAND (copy, 0) = new_name;
4857 update_stmt (copy);
4858 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4859 with = new_name;
4860
4861 do_rewrite:
4862
4863 orig = NULL_TREE;
4864 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4865 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4866
4867 if (TREE_CODE (*op) == INDIRECT_REF)
4868 orig = REF_ORIGINAL (*op);
4869 if (!orig)
4870 orig = unshare_and_remove_ssa_names (*op);
4871
4872 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4873
4874 /* Record the original reference, for purposes of alias analysis. */
4875 REF_ORIGINAL (*op) = orig;
4876
4877 /* Virtual operands in the original statement may have to be renamed
4878 because of the replacement. */
4879 mark_new_vars_to_rename (bsi_stmt (*bsi));
4880 }
4881
4882 /* Rewrites USE (address that is an iv) using candidate CAND. */
4883
4884 static void
4885 rewrite_use_address (struct ivopts_data *data,
4886 struct iv_use *use, struct iv_cand *cand)
4887 {
4888 tree comp = unshare_expr (get_computation (data->current_loop,
4889 use, cand));
4890 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4891 tree stmts;
4892 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4893
4894 if (stmts)
4895 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4896
4897 rewrite_address_base (&bsi, use->op_p, op);
4898 }
4899
4900 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4901 candidate CAND. */
4902
4903 static void
4904 rewrite_use_compare (struct ivopts_data *data,
4905 struct iv_use *use, struct iv_cand *cand)
4906 {
4907 tree comp;
4908 tree *op_p, cond, op, stmts, bound;
4909 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4910 enum tree_code compare;
4911
4912 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4913 {
4914 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4915 tree var_type = TREE_TYPE (var);
4916
4917 bound = fold_convert (var_type, bound);
4918 op = force_gimple_operand (unshare_expr (bound), &stmts,
4919 true, NULL_TREE);
4920
4921 if (stmts)
4922 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4923
4924 *use->op_p = build2 (compare, boolean_type_node, var, op);
4925 update_stmt (use->stmt);
4926 return;
4927 }
4928
4929 /* The induction variable elimination failed; just express the original
4930 giv. */
4931 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4932
4933 cond = *use->op_p;
4934 op_p = &TREE_OPERAND (cond, 0);
4935 if (TREE_CODE (*op_p) != SSA_NAME
4936 || zero_p (get_iv (data, *op_p)->step))
4937 op_p = &TREE_OPERAND (cond, 1);
4938
4939 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4940 if (stmts)
4941 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4942
4943 *op_p = op;
4944 }
4945
4946 /* Ensure that operand *OP_P may be used at the end of EXIT without
4947 violating loop closed ssa form. */
4948
4949 static void
4950 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4951 {
4952 basic_block def_bb;
4953 struct loop *def_loop;
4954 tree phi, use;
4955
4956 use = USE_FROM_PTR (op_p);
4957 if (TREE_CODE (use) != SSA_NAME)
4958 return;
4959
4960 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4961 if (!def_bb)
4962 return;
4963
4964 def_loop = def_bb->loop_father;
4965 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4966 return;
4967
4968 /* Try finding a phi node that copies the value out of the loop. */
4969 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4970 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4971 break;
4972
4973 if (!phi)
4974 {
4975 /* Create such a phi node. */
4976 tree new_name = duplicate_ssa_name (use, NULL);
4977
4978 phi = create_phi_node (new_name, exit->dest);
4979 SSA_NAME_DEF_STMT (new_name) = phi;
4980 add_phi_arg (phi, use, exit);
4981 }
4982
4983 SET_USE (op_p, PHI_RESULT (phi));
4984 }
4985
4986 /* Ensure that operands of STMT may be used at the end of EXIT without
4987 violating loop closed ssa form. */
4988
4989 static void
4990 protect_loop_closed_ssa_form (edge exit, tree stmt)
4991 {
4992 use_optype uses;
4993 vuse_optype vuses;
4994 v_may_def_optype v_may_defs;
4995 unsigned i;
4996
4997 get_stmt_operands (stmt);
4998
4999 uses = STMT_USE_OPS (stmt);
5000 for (i = 0; i < NUM_USES (uses); i++)
5001 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5002
5003 vuses = STMT_VUSE_OPS (stmt);
5004 for (i = 0; i < NUM_VUSES (vuses); i++)
5005 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5006
5007 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5008 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5009 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5010 }
5011
5012 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5013 so that they are emitted on the correct place, and so that the loop closed
5014 ssa form is preserved. */
5015
5016 static void
5017 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5018 {
5019 tree_stmt_iterator tsi;
5020 block_stmt_iterator bsi;
5021 tree phi, stmt, def, next;
5022
5023 if (!single_pred_p (exit->dest))
5024 split_loop_exit_edge (exit);
5025
5026 /* Ensure there is label in exit->dest, so that we can
5027 insert after it. */
5028 tree_block_label (exit->dest);
5029 bsi = bsi_after_labels (exit->dest);
5030
5031 if (TREE_CODE (stmts) == STATEMENT_LIST)
5032 {
5033 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5034 {
5035 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5036 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5037 }
5038 }
5039 else
5040 {
5041 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5042 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5043 }
5044
5045 if (!op)
5046 return;
5047
5048 for (phi = phi_nodes (exit->dest); phi; phi = next)
5049 {
5050 next = PHI_CHAIN (phi);
5051
5052 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5053 {
5054 def = PHI_RESULT (phi);
5055 remove_statement (phi, false);
5056 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5057 def, op);
5058 SSA_NAME_DEF_STMT (def) = stmt;
5059 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5060 }
5061 }
5062 }
5063
5064 /* Rewrites the final value of USE (that is only needed outside of the loop)
5065 using candidate CAND. */
5066
5067 static void
5068 rewrite_use_outer (struct ivopts_data *data,
5069 struct iv_use *use, struct iv_cand *cand)
5070 {
5071 edge exit;
5072 tree value, op, stmts, tgt;
5073 tree phi;
5074
5075 switch (TREE_CODE (use->stmt))
5076 {
5077 case PHI_NODE:
5078 tgt = PHI_RESULT (use->stmt);
5079 break;
5080 case MODIFY_EXPR:
5081 tgt = TREE_OPERAND (use->stmt, 0);
5082 break;
5083 default:
5084 gcc_unreachable ();
5085 }
5086
5087 exit = single_dom_exit (data->current_loop);
5088
5089 if (exit)
5090 {
5091 if (!cand->iv)
5092 {
5093 bool ok = may_replace_final_value (data, use, &value);
5094 gcc_assert (ok);
5095 }
5096 else
5097 value = get_computation_at (data->current_loop,
5098 use, cand, last_stmt (exit->src));
5099
5100 value = unshare_expr (value);
5101 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5102
5103 /* If we will preserve the iv anyway and we would need to perform
5104 some computation to replace the final value, do nothing. */
5105 if (stmts && name_info (data, tgt)->preserve_biv)
5106 return;
5107
5108 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5109 {
5110 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5111
5112 if (USE_FROM_PTR (use_p) == tgt)
5113 SET_USE (use_p, op);
5114 }
5115
5116 if (stmts)
5117 compute_phi_arg_on_exit (exit, stmts, op);
5118
5119 /* Enable removal of the statement. We cannot remove it directly,
5120 since we may still need the aliasing information attached to the
5121 ssa name defined by it. */
5122 name_info (data, tgt)->iv->have_use_for = false;
5123 return;
5124 }
5125
5126 /* If the variable is going to be preserved anyway, there is nothing to
5127 do. */
5128 if (name_info (data, tgt)->preserve_biv)
5129 return;
5130
5131 /* Otherwise we just need to compute the iv. */
5132 rewrite_use_nonlinear_expr (data, use, cand);
5133 }
5134
5135 /* Rewrites USE using candidate CAND. */
5136
5137 static void
5138 rewrite_use (struct ivopts_data *data,
5139 struct iv_use *use, struct iv_cand *cand)
5140 {
5141 switch (use->type)
5142 {
5143 case USE_NONLINEAR_EXPR:
5144 rewrite_use_nonlinear_expr (data, use, cand);
5145 break;
5146
5147 case USE_OUTER:
5148 rewrite_use_outer (data, use, cand);
5149 break;
5150
5151 case USE_ADDRESS:
5152 rewrite_use_address (data, use, cand);
5153 break;
5154
5155 case USE_COMPARE:
5156 rewrite_use_compare (data, use, cand);
5157 break;
5158
5159 default:
5160 gcc_unreachable ();
5161 }
5162 update_stmt (use->stmt);
5163 }
5164
5165 /* Rewrite the uses using the selected induction variables. */
5166
5167 static void
5168 rewrite_uses (struct ivopts_data *data)
5169 {
5170 unsigned i;
5171 struct iv_cand *cand;
5172 struct iv_use *use;
5173
5174 for (i = 0; i < n_iv_uses (data); i++)
5175 {
5176 use = iv_use (data, i);
5177 cand = use->selected;
5178 gcc_assert (cand);
5179
5180 rewrite_use (data, use, cand);
5181 }
5182 }
5183
5184 /* Removes the ivs that are not used after rewriting. */
5185
5186 static void
5187 remove_unused_ivs (struct ivopts_data *data)
5188 {
5189 unsigned j;
5190 bitmap_iterator bi;
5191
5192 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5193 {
5194 struct version_info *info;
5195
5196 info = ver_info (data, j);
5197 if (info->iv
5198 && !zero_p (info->iv->step)
5199 && !info->inv_id
5200 && !info->iv->have_use_for
5201 && !info->preserve_biv)
5202 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5203 }
5204 }
5205
5206 /* Frees data allocated by the optimization of a single loop. */
5207
5208 static void
5209 free_loop_data (struct ivopts_data *data)
5210 {
5211 unsigned i, j;
5212 bitmap_iterator bi;
5213
5214 htab_empty (data->niters);
5215
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5217 {
5218 struct version_info *info;
5219
5220 info = ver_info (data, i);
5221 if (info->iv)
5222 free (info->iv);
5223 info->iv = NULL;
5224 info->has_nonlin_use = false;
5225 info->preserve_biv = false;
5226 info->inv_id = 0;
5227 }
5228 bitmap_clear (data->relevant);
5229 bitmap_clear (data->important_candidates);
5230
5231 for (i = 0; i < n_iv_uses (data); i++)
5232 {
5233 struct iv_use *use = iv_use (data, i);
5234
5235 free (use->iv);
5236 BITMAP_FREE (use->related_cands);
5237 for (j = 0; j < use->n_map_members; j++)
5238 if (use->cost_map[j].depends_on)
5239 BITMAP_FREE (use->cost_map[j].depends_on);
5240 free (use->cost_map);
5241 free (use);
5242 }
5243 VARRAY_POP_ALL (data->iv_uses);
5244
5245 for (i = 0; i < n_iv_cands (data); i++)
5246 {
5247 struct iv_cand *cand = iv_cand (data, i);
5248
5249 if (cand->iv)
5250 free (cand->iv);
5251 free (cand);
5252 }
5253 VARRAY_POP_ALL (data->iv_candidates);
5254
5255 if (data->version_info_size < num_ssa_names)
5256 {
5257 data->version_info_size = 2 * num_ssa_names;
5258 free (data->version_info);
5259 data->version_info = xcalloc (data->version_info_size,
5260 sizeof (struct version_info));
5261 }
5262
5263 data->max_inv_id = 0;
5264
5265 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5266 {
5267 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5268
5269 SET_DECL_RTL (obj, NULL_RTX);
5270 }
5271 VARRAY_POP_ALL (decl_rtl_to_reset);
5272 }
5273
5274 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5275 loop tree. */
5276
5277 static void
5278 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5279 {
5280 unsigned i;
5281
5282 for (i = 1; i < loops->num; i++)
5283 if (loops->parray[i])
5284 {
5285 free (loops->parray[i]->aux);
5286 loops->parray[i]->aux = NULL;
5287 }
5288
5289 free_loop_data (data);
5290 free (data->version_info);
5291 BITMAP_FREE (data->relevant);
5292 BITMAP_FREE (data->important_candidates);
5293 htab_delete (data->niters);
5294
5295 VARRAY_FREE (decl_rtl_to_reset);
5296 VARRAY_FREE (data->iv_uses);
5297 VARRAY_FREE (data->iv_candidates);
5298 }
5299
5300 /* Optimizes the LOOP. Returns true if anything changed. */
5301
5302 static bool
5303 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5304 {
5305 bool changed = false;
5306 struct iv_ca *iv_ca;
5307 edge exit;
5308
5309 data->current_loop = loop;
5310
5311 if (dump_file && (dump_flags & TDF_DETAILS))
5312 {
5313 fprintf (dump_file, "Processing loop %d\n", loop->num);
5314
5315 exit = single_dom_exit (loop);
5316 if (exit)
5317 {
5318 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5319 exit->src->index, exit->dest->index);
5320 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5321 fprintf (dump_file, "\n");
5322 }
5323
5324 fprintf (dump_file, "\n");
5325 }
5326
5327 /* For each ssa name determines whether it behaves as an induction variable
5328 in some loop. */
5329 if (!find_induction_variables (data))
5330 goto finish;
5331
5332 /* Finds interesting uses (item 1). */
5333 find_interesting_uses (data);
5334 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5335 goto finish;
5336
5337 /* Finds candidates for the induction variables (item 2). */
5338 find_iv_candidates (data);
5339
5340 /* Calculates the costs (item 3, part 1). */
5341 determine_use_iv_costs (data);
5342 determine_iv_costs (data);
5343 determine_set_costs (data);
5344
5345 /* Find the optimal set of induction variables (item 3, part 2). */
5346 iv_ca = find_optimal_iv_set (data);
5347 if (!iv_ca)
5348 goto finish;
5349 changed = true;
5350
5351 /* Create the new induction variables (item 4, part 1). */
5352 create_new_ivs (data, iv_ca);
5353 iv_ca_free (&iv_ca);
5354
5355 /* Rewrite the uses (item 4, part 2). */
5356 rewrite_uses (data);
5357
5358 /* Remove the ivs that are unused after rewriting. */
5359 remove_unused_ivs (data);
5360
5361 /* We have changed the structure of induction variables; it might happen
5362 that definitions in the scev database refer to some of them that were
5363 eliminated. */
5364 scev_reset ();
5365
5366 finish:
5367 free_loop_data (data);
5368
5369 return changed;
5370 }
5371
5372 /* Main entry point. Optimizes induction variables in LOOPS. */
5373
5374 void
5375 tree_ssa_iv_optimize (struct loops *loops)
5376 {
5377 struct loop *loop;
5378 struct ivopts_data data;
5379
5380 tree_ssa_iv_optimize_init (loops, &data);
5381
5382 /* Optimize the loops starting with the innermost ones. */
5383 loop = loops->tree_root;
5384 while (loop->inner)
5385 loop = loop->inner;
5386
5387 /* Scan the loops, inner ones first. */
5388 while (loop != loops->tree_root)
5389 {
5390 if (dump_file && (dump_flags & TDF_DETAILS))
5391 flow_loop_dump (loop, dump_file, NULL, 1);
5392
5393 tree_ssa_iv_optimize_loop (&data, loop);
5394
5395 if (loop->next)
5396 {
5397 loop = loop->next;
5398 while (loop->inner)
5399 loop = loop->inner;
5400 }
5401 else
5402 loop = loop->outer;
5403 }
5404
5405 /* FIXME. IV opts introduces new aliases and call-clobbered
5406 variables, which need to be renamed. However, when we call the
5407 renamer, not all statements will be scanned for operands. In
5408 particular, the newly introduced aliases may appear in statements
5409 that are considered "unmodified", so the renamer will not get a
5410 chance to rename those operands.
5411
5412 Work around this problem by forcing an operand re-scan on every
5413 statement. This will not be necessary once the new operand
5414 scanner is implemented. */
5415 if (need_ssa_update_p ())
5416 {
5417 basic_block bb;
5418 block_stmt_iterator si;
5419 FOR_EACH_BB (bb)
5420 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5421 update_stmt (bsi_stmt (si));
5422
5423 update_ssa (TODO_update_ssa);
5424 }
5425
5426 rewrite_into_loop_closed_ssa (NULL);
5427 tree_ssa_iv_optimize_finalize (loops, &data);
5428 }