(Synchronize with addition made to binutils sources):
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95
96 /* The infinite cost. */
97 #define INFTY 10000000
98
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
102
103
104 /* Representation of the induction variable. */
105 struct iv
106 {
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114 };
115
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
118 {
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 };
126
127 /* Types of uses. */
128 enum use_type
129 {
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133 };
134
135 /* Cost of a computation. */
136 typedef struct
137 {
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
144
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
147
148 /* The candidate - cost pair. */
149 struct cost_pair
150 {
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158 };
159
160 /* Use. */
161 struct iv_use
162 {
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177 };
178
179 /* The position where the iv is computed. */
180 enum iv_position
181 {
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
185 };
186
187 /* The induction variable candidate. */
188 struct iv_cand
189 {
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
205 };
206
207 /* The data used by the induction variable optimizations. */
208
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
212
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
216
217 struct ivopts_data
218 {
219 /* The currently optimized loop. */
220 struct loop *current_loop;
221
222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
224
225 /* Number of registers used in it. */
226 unsigned regs_used;
227
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
230
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
233
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
236
237 /* The uses of induction variables. */
238 VEC(iv_use_p,heap) *iv_uses;
239
240 /* The candidates. */
241 VEC(iv_cand_p,heap) *iv_candidates;
242
243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
245
246 /* The maximum invariant id. */
247 unsigned max_inv_id;
248
249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
252
253 /* Are we optimizing for speed? */
254 bool speed;
255 };
256
257 /* An assignment of iv candidates to uses. */
258
259 struct iv_ca
260 {
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
263
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
266
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
269
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
272
273 /* The candidates used. */
274 bitmap cands;
275
276 /* The number of candidates in the set. */
277 unsigned n_cands;
278
279 /* Total number of registers needed. */
280 unsigned n_regs;
281
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
284
285 /* Total cost of candidates. */
286 unsigned cand_cost;
287
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
290
291 /* Total cost of the assignment. */
292 comp_cost cost;
293 };
294
295 /* Difference of two iv candidate assignments. */
296
297 struct iv_ca_delta
298 {
299 /* Changed use. */
300 struct iv_use *use;
301
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
304
305 /* A new assignment. */
306 struct cost_pair *new_cp;
307
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
310 };
311
312 /* Bound on number of candidates below that all candidates are considered. */
313
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
316
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
319
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
322
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
325
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
328
329 /* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
331
332 static VEC(tree,heap) *decl_rtl_to_reset;
333
334 /* Number of uses recorded in DATA. */
335
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
338 {
339 return VEC_length (iv_use_p, data->iv_uses);
340 }
341
342 /* Ith use recorded in DATA. */
343
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
346 {
347 return VEC_index (iv_use_p, data->iv_uses, i);
348 }
349
350 /* Number of candidates recorded in DATA. */
351
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
354 {
355 return VEC_length (iv_cand_p, data->iv_candidates);
356 }
357
358 /* Ith candidate recorded in DATA. */
359
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
362 {
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
364 }
365
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
367
368 edge
369 single_dom_exit (struct loop *loop)
370 {
371 edge exit = single_exit (loop);
372
373 if (!exit)
374 return NULL;
375
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
378
379 return exit;
380 }
381
382 /* Dumps information about the induction variable IV to FILE. */
383
384 extern void dump_iv (FILE *, struct iv *);
385 void
386 dump_iv (FILE *file, struct iv *iv)
387 {
388 if (iv->ssa_name)
389 {
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
398
399 if (iv->step)
400 {
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
404
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
408 }
409 else
410 {
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->base_object)
417 {
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
425 }
426
427 /* Dumps information about the USE to FILE. */
428
429 extern void dump_use (FILE *, struct iv_use *);
430 void
431 dump_use (FILE *file, struct iv_use *use)
432 {
433 fprintf (file, "use %d\n", use->id);
434
435 switch (use->type)
436 {
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
440
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
444
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
448
449 default:
450 gcc_unreachable ();
451 }
452
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
456
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
461
462 dump_iv (file, use->iv);
463
464 if (use->related_cands)
465 {
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
468 }
469 }
470
471 /* Dumps information about the uses to FILE. */
472
473 extern void dump_uses (FILE *, struct ivopts_data *);
474 void
475 dump_uses (FILE *file, struct ivopts_data *data)
476 {
477 unsigned i;
478 struct iv_use *use;
479
480 for (i = 0; i < n_iv_uses (data); i++)
481 {
482 use = iv_use (data, i);
483
484 dump_use (file, use);
485 fprintf (file, "\n");
486 }
487 }
488
489 /* Dumps information about induction variable candidate CAND to FILE. */
490
491 extern void dump_cand (FILE *, struct iv_cand *);
492 void
493 dump_cand (FILE *file, struct iv_cand *cand)
494 {
495 struct iv *iv = cand->iv;
496
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
499
500 if (cand->depends_on)
501 {
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
504 }
505
506 if (!iv)
507 {
508 fprintf (file, " final value replacement\n");
509 return;
510 }
511
512 switch (cand->pos)
513 {
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
517
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
521
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
525 }
526
527 dump_iv (file, iv);
528 }
529
530 /* Returns the info for ssa version VER. */
531
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
534 {
535 return data->version_info + ver;
536 }
537
538 /* Returns the info for ssa name NAME. */
539
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
542 {
543 return ver_info (data, SSA_NAME_VERSION (name));
544 }
545
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
548
549 static bool
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
551 {
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
553
554 gcc_assert (bb);
555
556 if (sbb == loop->latch)
557 return true;
558
559 if (sbb != bb)
560 return false;
561
562 return stmt == last_stmt (bb);
563 }
564
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
567
568 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
570 {
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
574
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
577
578 if (stmt_bb != cand_bb)
579 return true;
580
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
584 {
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
589 }
590 }
591
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
594
595 static bool
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
597 {
598 switch (cand->pos)
599 {
600 case IP_END:
601 return false;
602
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
605
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
608
609 default:
610 gcc_unreachable ();
611 }
612 }
613
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
615
616 static bool
617 abnormal_ssa_name_p (tree exp)
618 {
619 if (!exp)
620 return false;
621
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
624
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
626 }
627
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
630
631 static bool
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
634 {
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
636 {
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
641 }
642
643 return !abnormal_ssa_name_p (*index);
644 }
645
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
648
649 bool
650 contains_abnormal_ssa_name_p (tree expr)
651 {
652 enum tree_code code;
653 enum tree_code_class codeclass;
654
655 if (!expr)
656 return false;
657
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
660
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
663
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
667
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
672
673 switch (codeclass)
674 {
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
679
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
684
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690
691 return false;
692 }
693
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
696
697 static tree
698 niter_for_exit (struct ivopts_data *data, edge exit)
699 {
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
703
704 if (!data->niters)
705 {
706 data->niters = pointer_map_create ();
707 slot = NULL;
708 }
709 else
710 slot = pointer_map_contains (data->niters, exit);
711
712 if (!slot)
713 {
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
726
727 *pointer_map_insert (data->niters, exit) = niter;
728 }
729 else
730 niter = (tree) *slot;
731
732 return niter;
733 }
734
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
738
739 static tree
740 niter_for_single_dom_exit (struct ivopts_data *data)
741 {
742 edge exit = single_dom_exit (data->current_loop);
743
744 if (!exit)
745 return NULL;
746
747 return niter_for_exit (data, exit);
748 }
749
750 /* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
752
753 static void
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
755 {
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765 }
766
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770 static tree
771 determine_base_object (tree expr)
772 {
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
775
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
781
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
784
785 switch (code)
786 {
787 case INTEGER_CST:
788 return NULL_TREE;
789
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
793
794 if (!base)
795 return expr;
796
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
799
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
802
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
805
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
810
811 default:
812 return fold_convert (ptr_type_node, expr);
813 }
814 }
815
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
818
819 static struct iv *
820 alloc_iv (tree base, tree step)
821 {
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
824
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
832
833 return iv;
834 }
835
836 /* Sets STEP and BASE for induction variable IV. */
837
838 static void
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
840 {
841 struct version_info *info = name_info (data, iv);
842
843 gcc_assert (!info->iv);
844
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
848 }
849
850 /* Finds induction variable declaration for VAR. */
851
852 static struct iv *
853 get_iv (struct ivopts_data *data, tree var)
854 {
855 basic_block bb;
856 tree type = TREE_TYPE (var);
857
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
861
862 if (!name_info (data, var)->iv)
863 {
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
865
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
869 }
870
871 return name_info (data, var)->iv;
872 }
873
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
876
877 static tree
878 determine_biv_step (gimple phi)
879 {
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
887 if (!simple_iv (loop, loop, name, &iv, true))
888 return NULL_TREE;
889
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
891 }
892
893 /* Finds basic ivs. */
894
895 static bool
896 find_bivs (struct ivopts_data *data)
897 {
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
903
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
905 {
906 phi = gsi_stmt (psi);
907
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
910
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
914
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
920
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
924 {
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
929 }
930
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
933 }
934
935 return found;
936 }
937
938 /* Marks basic ivs. */
939
940 static void
941 mark_bivs (struct ivopts_data *data)
942 {
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
949
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
951 {
952 phi = gsi_stmt (psi);
953
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
957
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
962
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
968
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
971 }
972 }
973
974 /* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
976
977 static bool
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
979 {
980 tree lhs;
981 struct loop *loop = data->current_loop;
982
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
985
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
988
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
992
993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
996
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1000
1001 return true;
1002 }
1003
1004 /* Finds general ivs in statement STMT. */
1005
1006 static void
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1008 {
1009 affine_iv iv;
1010
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1013
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1015 }
1016
1017 /* Finds general ivs in basic block BB. */
1018
1019 static void
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1021 {
1022 gimple_stmt_iterator bsi;
1023
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1026 }
1027
1028 /* Finds general ivs. */
1029
1030 static void
1031 find_givs (struct ivopts_data *data)
1032 {
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1036
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1040 }
1041
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1044
1045 static bool
1046 find_induction_variables (struct ivopts_data *data)
1047 {
1048 unsigned i;
1049 bitmap_iterator bi;
1050
1051 if (!find_bivs (data))
1052 return false;
1053
1054 find_givs (data);
1055 mark_bivs (data);
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
1059 tree niter = niter_for_single_dom_exit (data);
1060
1061 if (niter)
1062 {
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1066 };
1067
1068 fprintf (dump_file, "Induction variables:\n\n");
1069
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1071 {
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1074 }
1075 }
1076
1077 return true;
1078 }
1079
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1081
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1085 {
1086 struct iv_use *use = XCNEW (struct iv_use);
1087
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1094
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1098
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1101
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1103
1104 return use;
1105 }
1106
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1110
1111 static void
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1113 {
1114 basic_block bb;
1115 struct version_info *info;
1116
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1120
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1125
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1132 }
1133
1134 /* Checks whether the use OP is interesting and if so, records it. */
1135
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1138 {
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1143
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1146
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1150
1151 if (iv->have_use_for)
1152 {
1153 use = iv_use (data, iv->use_id);
1154
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1157 }
1158
1159 if (integer_zerop (iv->step))
1160 {
1161 record_invariant (data, op, true);
1162 return NULL;
1163 }
1164 iv->have_use_for = true;
1165
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1168
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1172
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1175
1176 return use;
1177 }
1178
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1186
1187 static bool
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1191 {
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1198
1199 if (gimple_code (stmt) == GIMPLE_COND)
1200 {
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1203 }
1204 else
1205 {
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1208 }
1209
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1212
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1217
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1222
1223 if (integer_zerop (iv0->step))
1224 {
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1228 }
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230
1231 end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1240
1241 return ret;
1242 }
1243
1244 /* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1246
1247 static void
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1249 {
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1252
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1254 {
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1258 }
1259
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1263 }
1264
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1268
1269 bool
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1271 {
1272 basic_block def_bb;
1273 unsigned i, len;
1274
1275 gcc_assert (loop_depth (loop) > 0);
1276
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1279
1280 if (TREE_CODE (expr) == SSA_NAME)
1281 {
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1286
1287 return true;
1288 }
1289
1290 if (!EXPR_P (expr))
1291 return false;
1292
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1297
1298 return true;
1299 }
1300
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1304
1305 bool
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1307 {
1308 unsigned i;
1309 tree lhs;
1310
1311 gcc_assert (loop_depth (loop) > 0);
1312
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1315 {
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1319 }
1320
1321 return true;
1322 }
1323
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1327
1328 struct ifs_ivopts_data
1329 {
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1333 };
1334
1335 static bool
1336 idx_find_step (tree base, tree *idx, void *data)
1337 {
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1342
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1346
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1350 {
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1353 }
1354
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1360 {
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1365
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1368
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1372 }
1373
1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1376
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1380
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1384 *idx = iv->base;
1385
1386 if (integer_zerop (iv->step))
1387 return true;
1388
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1390 {
1391 step = array_ref_element_size (base);
1392
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
1396 }
1397 else
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1400
1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
1406 {
1407 /* The index might wrap. */
1408 return false;
1409 }
1410
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1413
1414 return true;
1415 }
1416
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1419
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *vdata)
1423 {
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1427 {
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 }
1431 return true;
1432 }
1433
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1438
1439 static bool
1440 constant_multiple_of (tree top, tree bot, double_int *mul)
1441 {
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1446
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1449
1450 if (operand_equal_p (top, bot, 0))
1451 {
1452 *mul = double_int_one;
1453 return true;
1454 }
1455
1456 code = TREE_CODE (top);
1457 switch (code)
1458 {
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1463
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1466
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1470
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1476
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1481
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1485
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1493
1494 default:
1495 return false;
1496 }
1497 }
1498
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1500
1501 static bool
1502 may_be_unaligned_p (tree ref, tree step)
1503 {
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1512
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1517
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1525
1526 if (mode != BLKmode)
1527 {
1528 double_int mul;
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1531
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0)
1535 return true;
1536
1537 if (!constant_multiple_of (step, al, &mul))
1538 return true;
1539 }
1540
1541 return false;
1542 }
1543
1544 /* Return true if EXPR may be non-addressable. */
1545
1546 static bool
1547 may_be_nonaddressable_p (tree expr)
1548 {
1549 switch (TREE_CODE (expr))
1550 {
1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1554 return false;
1555
1556 case COMPONENT_REF:
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1559
1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1567 return true;
1568
1569 /* ... fall through ... */
1570
1571 case ARRAY_REF:
1572 case ARRAY_RANGE_REF:
1573 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1574
1575 CASE_CONVERT:
1576 return true;
1577
1578 default:
1579 break;
1580 }
1581
1582 return false;
1583 }
1584
1585 /* Finds addresses in *OP_P inside STMT. */
1586
1587 static void
1588 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1589 {
1590 tree base = *op_p, step = build_int_cst (sizetype, 0);
1591 struct iv *civ;
1592 struct ifs_ivopts_data ifs_ivopts_data;
1593
1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
1596 if (gimple_has_volatile_ops (stmt))
1597 goto fail;
1598
1599 /* Ignore bitfields for now. Not really something terribly complicated
1600 to handle. TODO. */
1601 if (TREE_CODE (base) == BIT_FIELD_REF)
1602 goto fail;
1603
1604 base = unshare_expr (base);
1605
1606 if (TREE_CODE (base) == TARGET_MEM_REF)
1607 {
1608 tree type = build_pointer_type (TREE_TYPE (base));
1609 tree astep;
1610
1611 if (TMR_BASE (base)
1612 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1613 {
1614 civ = get_iv (data, TMR_BASE (base));
1615 if (!civ)
1616 goto fail;
1617
1618 TMR_BASE (base) = civ->base;
1619 step = civ->step;
1620 }
1621 if (TMR_INDEX (base)
1622 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1623 {
1624 civ = get_iv (data, TMR_INDEX (base));
1625 if (!civ)
1626 goto fail;
1627
1628 TMR_INDEX (base) = civ->base;
1629 astep = civ->step;
1630
1631 if (astep)
1632 {
1633 if (TMR_STEP (base))
1634 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1635
1636 step = fold_build2 (PLUS_EXPR, type, step, astep);
1637 }
1638 }
1639
1640 if (integer_zerop (step))
1641 goto fail;
1642 base = tree_mem_ref_addr (type, base);
1643 }
1644 else
1645 {
1646 ifs_ivopts_data.ivopts_data = data;
1647 ifs_ivopts_data.stmt = stmt;
1648 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1649 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1650 || integer_zerop (ifs_ivopts_data.step))
1651 goto fail;
1652 step = ifs_ivopts_data.step;
1653
1654 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1655 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1656
1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base))
1660 goto fail;
1661
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
1664 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1665 goto fail;
1666
1667 base = build_fold_addr_expr (base);
1668
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base) == ADDR_EXPR)
1672 {
1673 tree *ref = &TREE_OPERAND (base, 0);
1674 while (handled_component_p (*ref))
1675 ref = &TREE_OPERAND (*ref, 0);
1676 if (TREE_CODE (*ref) == INDIRECT_REF)
1677 *ref = fold_indirect_ref (*ref);
1678 }
1679 }
1680
1681 civ = alloc_iv (base, step);
1682 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1683 return;
1684
1685 fail:
1686 for_each_index (op_p, idx_record_use, data);
1687 }
1688
1689 /* Finds and records invariants used in STMT. */
1690
1691 static void
1692 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1693 {
1694 ssa_op_iter iter;
1695 use_operand_p use_p;
1696 tree op;
1697
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699 {
1700 op = USE_FROM_PTR (use_p);
1701 record_invariant (data, op, false);
1702 }
1703 }
1704
1705 /* Finds interesting uses of induction variables in the statement STMT. */
1706
1707 static void
1708 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1709 {
1710 struct iv *iv;
1711 tree op, *lhs, *rhs;
1712 ssa_op_iter iter;
1713 use_operand_p use_p;
1714 enum tree_code code;
1715
1716 find_invariants_stmt (data, stmt);
1717
1718 if (gimple_code (stmt) == GIMPLE_COND)
1719 {
1720 find_interesting_uses_cond (data, stmt);
1721 return;
1722 }
1723
1724 if (is_gimple_assign (stmt))
1725 {
1726 lhs = gimple_assign_lhs_ptr (stmt);
1727 rhs = gimple_assign_rhs1_ptr (stmt);
1728
1729 if (TREE_CODE (*lhs) == SSA_NAME)
1730 {
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1733
1734 iv = get_iv (data, *lhs);
1735
1736 if (iv && !integer_zerop (iv->step))
1737 return;
1738 }
1739
1740 code = gimple_assign_rhs_code (stmt);
1741 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs)
1743 || is_gimple_val (*rhs)))
1744 {
1745 if (REFERENCE_CLASS_P (*rhs))
1746 find_interesting_uses_address (data, stmt, rhs);
1747 else
1748 find_interesting_uses_op (data, *rhs);
1749
1750 if (REFERENCE_CLASS_P (*lhs))
1751 find_interesting_uses_address (data, stmt, lhs);
1752 return;
1753 }
1754 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1755 {
1756 find_interesting_uses_cond (data, stmt);
1757 return;
1758 }
1759
1760 /* TODO -- we should also handle address uses of type
1761
1762 memory = call (whatever);
1763
1764 and
1765
1766 call (memory). */
1767 }
1768
1769 if (gimple_code (stmt) == GIMPLE_PHI
1770 && gimple_bb (stmt) == data->current_loop->header)
1771 {
1772 iv = get_iv (data, PHI_RESULT (stmt));
1773
1774 if (iv && !integer_zerop (iv->step))
1775 return;
1776 }
1777
1778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1779 {
1780 op = USE_FROM_PTR (use_p);
1781
1782 if (TREE_CODE (op) != SSA_NAME)
1783 continue;
1784
1785 iv = get_iv (data, op);
1786 if (!iv)
1787 continue;
1788
1789 find_interesting_uses_op (data, op);
1790 }
1791 }
1792
1793 /* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1795
1796 static void
1797 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1798 {
1799 gimple phi;
1800 gimple_stmt_iterator psi;
1801 tree def;
1802
1803 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1804 {
1805 phi = gsi_stmt (psi);
1806 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1807 if (is_gimple_reg (def))
1808 find_interesting_uses_op (data, def);
1809 }
1810 }
1811
1812 /* Finds uses of the induction variables that are interesting. */
1813
1814 static void
1815 find_interesting_uses (struct ivopts_data *data)
1816 {
1817 basic_block bb;
1818 gimple_stmt_iterator bsi;
1819 basic_block *body = get_loop_body (data->current_loop);
1820 unsigned i;
1821 struct version_info *info;
1822 edge e;
1823
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Uses:\n\n");
1826
1827 for (i = 0; i < data->current_loop->num_nodes; i++)
1828 {
1829 edge_iterator ei;
1830 bb = body[i];
1831
1832 FOR_EACH_EDGE (e, ei, bb->succs)
1833 if (e->dest != EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1835 find_interesting_uses_outside (data, e);
1836
1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1841 }
1842
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1844 {
1845 bitmap_iterator bi;
1846
1847 fprintf (dump_file, "\n");
1848
1849 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1850 {
1851 info = ver_info (data, i);
1852 if (info->inv_id)
1853 {
1854 fprintf (dump_file, " ");
1855 print_generic_expr (dump_file, info->name, TDF_SLIM);
1856 fprintf (dump_file, " is invariant (%d)%s\n",
1857 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1858 }
1859 }
1860
1861 fprintf (dump_file, "\n");
1862 }
1863
1864 free (body);
1865 }
1866
1867 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
1870
1871 static tree
1872 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1873 unsigned HOST_WIDE_INT *offset)
1874 {
1875 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1876 enum tree_code code;
1877 tree type, orig_type = TREE_TYPE (expr);
1878 unsigned HOST_WIDE_INT off0, off1, st;
1879 tree orig_expr = expr;
1880
1881 STRIP_NOPS (expr);
1882
1883 type = TREE_TYPE (expr);
1884 code = TREE_CODE (expr);
1885 *offset = 0;
1886
1887 switch (code)
1888 {
1889 case INTEGER_CST:
1890 if (!cst_and_fits_in_hwi (expr)
1891 || integer_zerop (expr))
1892 return orig_expr;
1893
1894 *offset = int_cst_value (expr);
1895 return build_int_cst (orig_type, 0);
1896
1897 case POINTER_PLUS_EXPR:
1898 case PLUS_EXPR:
1899 case MINUS_EXPR:
1900 op0 = TREE_OPERAND (expr, 0);
1901 op1 = TREE_OPERAND (expr, 1);
1902
1903 op0 = strip_offset_1 (op0, false, false, &off0);
1904 op1 = strip_offset_1 (op1, false, false, &off1);
1905
1906 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && op1 == TREE_OPERAND (expr, 1))
1909 return orig_expr;
1910
1911 if (integer_zerop (op1))
1912 expr = op0;
1913 else if (integer_zerop (op0))
1914 {
1915 if (code == MINUS_EXPR)
1916 expr = fold_build1 (NEGATE_EXPR, type, op1);
1917 else
1918 expr = op1;
1919 }
1920 else
1921 expr = fold_build2 (code, type, op0, op1);
1922
1923 return fold_convert (orig_type, expr);
1924
1925 case ARRAY_REF:
1926 case ARRAY_RANGE_REF:
1927 if (!inside_addr)
1928 return orig_expr;
1929
1930 step = array_ref_element_size (expr);
1931 if (!cst_and_fits_in_hwi (step))
1932 break;
1933
1934 st = int_cst_value (step);
1935 op1 = TREE_OPERAND (expr, 1);
1936 op1 = strip_offset_1 (op1, false, false, &off1);
1937 *offset = off1 * st;
1938
1939 if (top_compref
1940 && integer_zerop (op1))
1941 {
1942 /* Strip the component reference completely. */
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1945 *offset += off0;
1946 return op0;
1947 }
1948 break;
1949
1950 case COMPONENT_REF:
1951 if (!inside_addr)
1952 return orig_expr;
1953
1954 tmp = component_ref_field_offset (expr);
1955 if (top_compref
1956 && cst_and_fits_in_hwi (tmp))
1957 {
1958 /* Strip the component reference completely. */
1959 op0 = TREE_OPERAND (expr, 0);
1960 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1961 *offset = off0 + int_cst_value (tmp);
1962 return op0;
1963 }
1964 break;
1965
1966 case ADDR_EXPR:
1967 op0 = TREE_OPERAND (expr, 0);
1968 op0 = strip_offset_1 (op0, true, true, &off0);
1969 *offset += off0;
1970
1971 if (op0 == TREE_OPERAND (expr, 0))
1972 return orig_expr;
1973
1974 expr = build_fold_addr_expr (op0);
1975 return fold_convert (orig_type, expr);
1976
1977 case INDIRECT_REF:
1978 inside_addr = false;
1979 break;
1980
1981 default:
1982 return orig_expr;
1983 }
1984
1985 /* Default handling of expressions for that we want to recurse into
1986 the first operand. */
1987 op0 = TREE_OPERAND (expr, 0);
1988 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1989 *offset += off0;
1990
1991 if (op0 == TREE_OPERAND (expr, 0)
1992 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1993 return orig_expr;
1994
1995 expr = copy_node (expr);
1996 TREE_OPERAND (expr, 0) = op0;
1997 if (op1)
1998 TREE_OPERAND (expr, 1) = op1;
1999
2000 /* Inside address, we might strip the top level component references,
2001 thus changing type of the expression. Handling of ADDR_EXPR
2002 will fix that. */
2003 expr = fold_convert (orig_type, expr);
2004
2005 return expr;
2006 }
2007
2008 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2009
2010 static tree
2011 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2012 {
2013 return strip_offset_1 (expr, false, false, offset);
2014 }
2015
2016 /* Returns variant of TYPE that can be used as base for different uses.
2017 We return unsigned type with the same precision, which avoids problems
2018 with overflows. */
2019
2020 static tree
2021 generic_type_for (tree type)
2022 {
2023 if (POINTER_TYPE_P (type))
2024 return unsigned_type_for (type);
2025
2026 if (TYPE_UNSIGNED (type))
2027 return type;
2028
2029 return unsigned_type_for (type);
2030 }
2031
2032 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2033 the bitmap to that we should store it. */
2034
2035 static struct ivopts_data *fd_ivopts_data;
2036 static tree
2037 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2038 {
2039 bitmap *depends_on = (bitmap *) data;
2040 struct version_info *info;
2041
2042 if (TREE_CODE (*expr_p) != SSA_NAME)
2043 return NULL_TREE;
2044 info = name_info (fd_ivopts_data, *expr_p);
2045
2046 if (!info->inv_id || info->has_nonlin_use)
2047 return NULL_TREE;
2048
2049 if (!*depends_on)
2050 *depends_on = BITMAP_ALLOC (NULL);
2051 bitmap_set_bit (*depends_on, info->inv_id);
2052
2053 return NULL_TREE;
2054 }
2055
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2059 replacement of the final value of the iv by a direct computation. */
2060
2061 static struct iv_cand *
2062 add_candidate_1 (struct ivopts_data *data,
2063 tree base, tree step, bool important, enum iv_position pos,
2064 struct iv_use *use, gimple incremented_at)
2065 {
2066 unsigned i;
2067 struct iv_cand *cand = NULL;
2068 tree type, orig_type;
2069
2070 if (base)
2071 {
2072 orig_type = TREE_TYPE (base);
2073 type = generic_type_for (orig_type);
2074 if (type != orig_type)
2075 {
2076 base = fold_convert (type, base);
2077 step = fold_convert (type, step);
2078 }
2079 }
2080
2081 for (i = 0; i < n_iv_cands (data); i++)
2082 {
2083 cand = iv_cand (data, i);
2084
2085 if (cand->pos != pos)
2086 continue;
2087
2088 if (cand->incremented_at != incremented_at)
2089 continue;
2090
2091 if (!cand->iv)
2092 {
2093 if (!base && !step)
2094 break;
2095
2096 continue;
2097 }
2098
2099 if (!base && !step)
2100 continue;
2101
2102 if (operand_equal_p (base, cand->iv->base, 0)
2103 && operand_equal_p (step, cand->iv->step, 0))
2104 break;
2105 }
2106
2107 if (i == n_iv_cands (data))
2108 {
2109 cand = XCNEW (struct iv_cand);
2110 cand->id = i;
2111
2112 if (!base && !step)
2113 cand->iv = NULL;
2114 else
2115 cand->iv = alloc_iv (base, step);
2116
2117 cand->pos = pos;
2118 if (pos != IP_ORIGINAL && cand->iv)
2119 {
2120 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2121 cand->var_after = cand->var_before;
2122 }
2123 cand->important = important;
2124 cand->incremented_at = incremented_at;
2125 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2126
2127 if (step
2128 && TREE_CODE (step) != INTEGER_CST)
2129 {
2130 fd_ivopts_data = data;
2131 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2132 }
2133
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 dump_cand (dump_file, cand);
2136 }
2137
2138 if (important && !cand->important)
2139 {
2140 cand->important = true;
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2143 }
2144
2145 if (use)
2146 {
2147 bitmap_set_bit (use->related_cands, i);
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 fprintf (dump_file, "Candidate %d is related to use %d\n",
2150 cand->id, use->id);
2151 }
2152
2153 return cand;
2154 }
2155
2156 /* Returns true if incrementing the induction variable at the end of the LOOP
2157 is allowed.
2158
2159 The purpose is to avoid splitting latch edge with a biv increment, thus
2160 creating a jump, possibly confusing other optimization passes and leaving
2161 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2162 is not available (so we do not have a better alternative), or if the latch
2163 edge is already nonempty. */
2164
2165 static bool
2166 allow_ip_end_pos_p (struct loop *loop)
2167 {
2168 if (!ip_normal_pos (loop))
2169 return true;
2170
2171 if (!empty_block_p (ip_end_pos (loop)))
2172 return true;
2173
2174 return false;
2175 }
2176
2177 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2178 position to POS. If USE is not NULL, the candidate is set as related to
2179 it. The candidate computation is scheduled on all available positions. */
2180
2181 static void
2182 add_candidate (struct ivopts_data *data,
2183 tree base, tree step, bool important, struct iv_use *use)
2184 {
2185 if (ip_normal_pos (data->current_loop))
2186 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2187 if (ip_end_pos (data->current_loop)
2188 && allow_ip_end_pos_p (data->current_loop))
2189 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2190 }
2191
2192 /* Add a standard "0 + 1 * iteration" iv candidate for a
2193 type with SIZE bits. */
2194
2195 static void
2196 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2197 unsigned int size)
2198 {
2199 tree type = lang_hooks.types.type_for_size (size, true);
2200 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2201 true, NULL);
2202 }
2203
2204 /* Adds standard iv candidates. */
2205
2206 static void
2207 add_standard_iv_candidates (struct ivopts_data *data)
2208 {
2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2210
2211 /* The same for a double-integer type if it is still fast enough. */
2212 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2213 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2214 }
2215
2216
2217 /* Adds candidates bases on the old induction variable IV. */
2218
2219 static void
2220 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2221 {
2222 gimple phi;
2223 tree def;
2224 struct iv_cand *cand;
2225
2226 add_candidate (data, iv->base, iv->step, true, NULL);
2227
2228 /* The same, but with initial value zero. */
2229 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2230 add_candidate (data, size_int (0), iv->step, true, NULL);
2231 else
2232 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2233 iv->step, true, NULL);
2234
2235 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2236 if (gimple_code (phi) == GIMPLE_PHI)
2237 {
2238 /* Additionally record the possibility of leaving the original iv
2239 untouched. */
2240 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2241 cand = add_candidate_1 (data,
2242 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2243 SSA_NAME_DEF_STMT (def));
2244 cand->var_before = iv->ssa_name;
2245 cand->var_after = def;
2246 }
2247 }
2248
2249 /* Adds candidates based on the old induction variables. */
2250
2251 static void
2252 add_old_ivs_candidates (struct ivopts_data *data)
2253 {
2254 unsigned i;
2255 struct iv *iv;
2256 bitmap_iterator bi;
2257
2258 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2259 {
2260 iv = ver_info (data, i)->iv;
2261 if (iv && iv->biv_p && !integer_zerop (iv->step))
2262 add_old_iv_candidates (data, iv);
2263 }
2264 }
2265
2266 /* Adds candidates based on the value of the induction variable IV and USE. */
2267
2268 static void
2269 add_iv_value_candidates (struct ivopts_data *data,
2270 struct iv *iv, struct iv_use *use)
2271 {
2272 unsigned HOST_WIDE_INT offset;
2273 tree base;
2274 tree basetype;
2275
2276 add_candidate (data, iv->base, iv->step, false, use);
2277
2278 /* The same, but with initial value zero. Make such variable important,
2279 since it is generic enough so that possibly many uses may be based
2280 on it. */
2281 basetype = TREE_TYPE (iv->base);
2282 if (POINTER_TYPE_P (basetype))
2283 basetype = sizetype;
2284 add_candidate (data, build_int_cst (basetype, 0),
2285 iv->step, true, use);
2286
2287 /* Third, try removing the constant offset. Make sure to even
2288 add a candidate for &a[0] vs. (T *)&a. */
2289 base = strip_offset (iv->base, &offset);
2290 if (offset
2291 || base != iv->base)
2292 add_candidate (data, base, iv->step, false, use);
2293 }
2294
2295 /* Adds candidates based on the uses. */
2296
2297 static void
2298 add_derived_ivs_candidates (struct ivopts_data *data)
2299 {
2300 unsigned i;
2301
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 {
2304 struct iv_use *use = iv_use (data, i);
2305
2306 if (!use)
2307 continue;
2308
2309 switch (use->type)
2310 {
2311 case USE_NONLINEAR_EXPR:
2312 case USE_COMPARE:
2313 case USE_ADDRESS:
2314 /* Just add the ivs based on the value of the iv used here. */
2315 add_iv_value_candidates (data, use->iv, use);
2316 break;
2317
2318 default:
2319 gcc_unreachable ();
2320 }
2321 }
2322 }
2323
2324 /* Record important candidates and add them to related_cands bitmaps
2325 if needed. */
2326
2327 static void
2328 record_important_candidates (struct ivopts_data *data)
2329 {
2330 unsigned i;
2331 struct iv_use *use;
2332
2333 for (i = 0; i < n_iv_cands (data); i++)
2334 {
2335 struct iv_cand *cand = iv_cand (data, i);
2336
2337 if (cand->important)
2338 bitmap_set_bit (data->important_candidates, i);
2339 }
2340
2341 data->consider_all_candidates = (n_iv_cands (data)
2342 <= CONSIDER_ALL_CANDIDATES_BOUND);
2343
2344 if (data->consider_all_candidates)
2345 {
2346 /* We will not need "related_cands" bitmaps in this case,
2347 so release them to decrease peak memory consumption. */
2348 for (i = 0; i < n_iv_uses (data); i++)
2349 {
2350 use = iv_use (data, i);
2351 BITMAP_FREE (use->related_cands);
2352 }
2353 }
2354 else
2355 {
2356 /* Add important candidates to the related_cands bitmaps. */
2357 for (i = 0; i < n_iv_uses (data); i++)
2358 bitmap_ior_into (iv_use (data, i)->related_cands,
2359 data->important_candidates);
2360 }
2361 }
2362
2363 /* Finds the candidates for the induction variables. */
2364
2365 static void
2366 find_iv_candidates (struct ivopts_data *data)
2367 {
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data);
2370
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data);
2373
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data);
2376
2377 /* Record the important candidates. */
2378 record_important_candidates (data);
2379 }
2380
2381 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */
2384
2385 static void
2386 alloc_use_cost_map (struct ivopts_data *data)
2387 {
2388 unsigned i, size, s, j;
2389
2390 for (i = 0; i < n_iv_uses (data); i++)
2391 {
2392 struct iv_use *use = iv_use (data, i);
2393 bitmap_iterator bi;
2394
2395 if (data->consider_all_candidates)
2396 size = n_iv_cands (data);
2397 else
2398 {
2399 s = 0;
2400 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2401 {
2402 s++;
2403 }
2404
2405 /* Round up to the power of two, so that moduling by it is fast. */
2406 for (size = 1; size < s; size <<= 1)
2407 continue;
2408 }
2409
2410 use->n_map_members = size;
2411 use->cost_map = XCNEWVEC (struct cost_pair, size);
2412 }
2413 }
2414
2415 /* Returns description of computation cost of expression whose runtime
2416 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2417
2418 static comp_cost
2419 new_cost (unsigned runtime, unsigned complexity)
2420 {
2421 comp_cost cost;
2422
2423 cost.cost = runtime;
2424 cost.complexity = complexity;
2425
2426 return cost;
2427 }
2428
2429 /* Adds costs COST1 and COST2. */
2430
2431 static comp_cost
2432 add_costs (comp_cost cost1, comp_cost cost2)
2433 {
2434 cost1.cost += cost2.cost;
2435 cost1.complexity += cost2.complexity;
2436
2437 return cost1;
2438 }
2439 /* Subtracts costs COST1 and COST2. */
2440
2441 static comp_cost
2442 sub_costs (comp_cost cost1, comp_cost cost2)
2443 {
2444 cost1.cost -= cost2.cost;
2445 cost1.complexity -= cost2.complexity;
2446
2447 return cost1;
2448 }
2449
2450 /* Returns a negative number if COST1 < COST2, a positive number if
2451 COST1 > COST2, and 0 if COST1 = COST2. */
2452
2453 static int
2454 compare_costs (comp_cost cost1, comp_cost cost2)
2455 {
2456 if (cost1.cost == cost2.cost)
2457 return cost1.complexity - cost2.complexity;
2458
2459 return cost1.cost - cost2.cost;
2460 }
2461
2462 /* Returns true if COST is infinite. */
2463
2464 static bool
2465 infinite_cost_p (comp_cost cost)
2466 {
2467 return cost.cost == INFTY;
2468 }
2469
2470 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2471 on invariants DEPENDS_ON and that the value used in expressing it
2472 is VALUE.*/
2473
2474 static void
2475 set_use_iv_cost (struct ivopts_data *data,
2476 struct iv_use *use, struct iv_cand *cand,
2477 comp_cost cost, bitmap depends_on, tree value)
2478 {
2479 unsigned i, s;
2480
2481 if (infinite_cost_p (cost))
2482 {
2483 BITMAP_FREE (depends_on);
2484 return;
2485 }
2486
2487 if (data->consider_all_candidates)
2488 {
2489 use->cost_map[cand->id].cand = cand;
2490 use->cost_map[cand->id].cost = cost;
2491 use->cost_map[cand->id].depends_on = depends_on;
2492 use->cost_map[cand->id].value = value;
2493 return;
2494 }
2495
2496 /* n_map_members is a power of two, so this computes modulo. */
2497 s = cand->id & (use->n_map_members - 1);
2498 for (i = s; i < use->n_map_members; i++)
2499 if (!use->cost_map[i].cand)
2500 goto found;
2501 for (i = 0; i < s; i++)
2502 if (!use->cost_map[i].cand)
2503 goto found;
2504
2505 gcc_unreachable ();
2506
2507 found:
2508 use->cost_map[i].cand = cand;
2509 use->cost_map[i].cost = cost;
2510 use->cost_map[i].depends_on = depends_on;
2511 use->cost_map[i].value = value;
2512 }
2513
2514 /* Gets cost of (USE, CANDIDATE) pair. */
2515
2516 static struct cost_pair *
2517 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2518 struct iv_cand *cand)
2519 {
2520 unsigned i, s;
2521 struct cost_pair *ret;
2522
2523 if (!cand)
2524 return NULL;
2525
2526 if (data->consider_all_candidates)
2527 {
2528 ret = use->cost_map + cand->id;
2529 if (!ret->cand)
2530 return NULL;
2531
2532 return ret;
2533 }
2534
2535 /* n_map_members is a power of two, so this computes modulo. */
2536 s = cand->id & (use->n_map_members - 1);
2537 for (i = s; i < use->n_map_members; i++)
2538 if (use->cost_map[i].cand == cand)
2539 return use->cost_map + i;
2540
2541 for (i = 0; i < s; i++)
2542 if (use->cost_map[i].cand == cand)
2543 return use->cost_map + i;
2544
2545 return NULL;
2546 }
2547
2548 /* Returns estimate on cost of computing SEQ. */
2549
2550 static unsigned
2551 seq_cost (rtx seq, bool speed)
2552 {
2553 unsigned cost = 0;
2554 rtx set;
2555
2556 for (; seq; seq = NEXT_INSN (seq))
2557 {
2558 set = single_set (seq);
2559 if (set)
2560 cost += rtx_cost (set, SET,speed);
2561 else
2562 cost++;
2563 }
2564
2565 return cost;
2566 }
2567
2568 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2569 static rtx
2570 produce_memory_decl_rtl (tree obj, int *regno)
2571 {
2572 rtx x;
2573
2574 gcc_assert (obj);
2575 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2576 {
2577 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2578 x = gen_rtx_SYMBOL_REF (Pmode, name);
2579 SET_SYMBOL_REF_DECL (x, obj);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 targetm.encode_section_info (obj, x, true);
2582 }
2583 else
2584 {
2585 x = gen_raw_REG (Pmode, (*regno)++);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2587 }
2588
2589 return x;
2590 }
2591
2592 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2593 walk_tree. DATA contains the actual fake register number. */
2594
2595 static tree
2596 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2597 {
2598 tree obj = NULL_TREE;
2599 rtx x = NULL_RTX;
2600 int *regno = (int *) data;
2601
2602 switch (TREE_CODE (*expr_p))
2603 {
2604 case ADDR_EXPR:
2605 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2606 handled_component_p (*expr_p);
2607 expr_p = &TREE_OPERAND (*expr_p, 0))
2608 continue;
2609 obj = *expr_p;
2610 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2611 x = produce_memory_decl_rtl (obj, regno);
2612 break;
2613
2614 case SSA_NAME:
2615 *ws = 0;
2616 obj = SSA_NAME_VAR (*expr_p);
2617 if (!DECL_RTL_SET_P (obj))
2618 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2619 break;
2620
2621 case VAR_DECL:
2622 case PARM_DECL:
2623 case RESULT_DECL:
2624 *ws = 0;
2625 obj = *expr_p;
2626
2627 if (DECL_RTL_SET_P (obj))
2628 break;
2629
2630 if (DECL_MODE (obj) == BLKmode)
2631 x = produce_memory_decl_rtl (obj, regno);
2632 else
2633 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2634
2635 break;
2636
2637 default:
2638 break;
2639 }
2640
2641 if (x)
2642 {
2643 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2644 SET_DECL_RTL (obj, x);
2645 }
2646
2647 return NULL_TREE;
2648 }
2649
2650 /* Determines cost of the computation of EXPR. */
2651
2652 static unsigned
2653 computation_cost (tree expr, bool speed)
2654 {
2655 rtx seq, rslt;
2656 tree type = TREE_TYPE (expr);
2657 unsigned cost;
2658 /* Avoid using hard regs in ways which may be unsupported. */
2659 int regno = LAST_VIRTUAL_REGISTER + 1;
2660 enum function_frequency real_frequency = cfun->function_frequency;
2661
2662 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2663 crtl->maybe_hot_insn_p = speed;
2664 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2665 start_sequence ();
2666 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2667 seq = get_insns ();
2668 end_sequence ();
2669 default_rtl_profile ();
2670 cfun->function_frequency = real_frequency;
2671
2672 cost = seq_cost (seq, speed);
2673 if (MEM_P (rslt))
2674 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2675
2676 return cost;
2677 }
2678
2679 /* Returns variable containing the value of candidate CAND at statement AT. */
2680
2681 static tree
2682 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2683 {
2684 if (stmt_after_increment (loop, cand, stmt))
2685 return cand->var_after;
2686 else
2687 return cand->var_before;
2688 }
2689
2690 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2691 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2692
2693 int
2694 tree_int_cst_sign_bit (const_tree t)
2695 {
2696 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2697 unsigned HOST_WIDE_INT w;
2698
2699 if (bitno < HOST_BITS_PER_WIDE_INT)
2700 w = TREE_INT_CST_LOW (t);
2701 else
2702 {
2703 w = TREE_INT_CST_HIGH (t);
2704 bitno -= HOST_BITS_PER_WIDE_INT;
2705 }
2706
2707 return (w >> bitno) & 1;
2708 }
2709
2710 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2711 same precision that is at least as wide as the precision of TYPE, stores
2712 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2713 type of A and B. */
2714
2715 static tree
2716 determine_common_wider_type (tree *a, tree *b)
2717 {
2718 tree wider_type = NULL;
2719 tree suba, subb;
2720 tree atype = TREE_TYPE (*a);
2721
2722 if (CONVERT_EXPR_P (*a))
2723 {
2724 suba = TREE_OPERAND (*a, 0);
2725 wider_type = TREE_TYPE (suba);
2726 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2727 return atype;
2728 }
2729 else
2730 return atype;
2731
2732 if (CONVERT_EXPR_P (*b))
2733 {
2734 subb = TREE_OPERAND (*b, 0);
2735 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2736 return atype;
2737 }
2738 else
2739 return atype;
2740
2741 *a = suba;
2742 *b = subb;
2743 return wider_type;
2744 }
2745
2746 /* Determines the expression by that USE is expressed from induction variable
2747 CAND at statement AT in LOOP. The expression is stored in a decomposed
2748 form into AFF. Returns false if USE cannot be expressed using CAND. */
2749
2750 static bool
2751 get_computation_aff (struct loop *loop,
2752 struct iv_use *use, struct iv_cand *cand, gimple at,
2753 struct affine_tree_combination *aff)
2754 {
2755 tree ubase = use->iv->base;
2756 tree ustep = use->iv->step;
2757 tree cbase = cand->iv->base;
2758 tree cstep = cand->iv->step, cstep_common;
2759 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2760 tree common_type, var;
2761 tree uutype;
2762 aff_tree cbase_aff, var_aff;
2763 double_int rat;
2764
2765 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2766 {
2767 /* We do not have a precision to express the values of use. */
2768 return false;
2769 }
2770
2771 var = var_at_stmt (loop, cand, at);
2772 uutype = unsigned_type_for (utype);
2773
2774 /* If the conversion is not noop, perform it. */
2775 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2776 {
2777 cstep = fold_convert (uutype, cstep);
2778 cbase = fold_convert (uutype, cbase);
2779 var = fold_convert (uutype, var);
2780 }
2781
2782 if (!constant_multiple_of (ustep, cstep, &rat))
2783 return false;
2784
2785 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2786 type, we achieve better folding by computing their difference in this
2787 wider type, and cast the result to UUTYPE. We do not need to worry about
2788 overflows, as all the arithmetics will in the end be performed in UUTYPE
2789 anyway. */
2790 common_type = determine_common_wider_type (&ubase, &cbase);
2791
2792 /* use = ubase - ratio * cbase + ratio * var. */
2793 tree_to_aff_combination (ubase, common_type, aff);
2794 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2795 tree_to_aff_combination (var, uutype, &var_aff);
2796
2797 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop, cand, at))
2799 {
2800 aff_tree cstep_aff;
2801
2802 if (common_type != uutype)
2803 cstep_common = fold_convert (common_type, cstep);
2804 else
2805 cstep_common = cstep;
2806
2807 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2808 aff_combination_add (&cbase_aff, &cstep_aff);
2809 }
2810
2811 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2812 aff_combination_add (aff, &cbase_aff);
2813 if (common_type != uutype)
2814 aff_combination_convert (aff, uutype);
2815
2816 aff_combination_scale (&var_aff, rat);
2817 aff_combination_add (aff, &var_aff);
2818
2819 return true;
2820 }
2821
2822 /* Determines the expression by that USE is expressed from induction variable
2823 CAND at statement AT in LOOP. The computation is unshared. */
2824
2825 static tree
2826 get_computation_at (struct loop *loop,
2827 struct iv_use *use, struct iv_cand *cand, gimple at)
2828 {
2829 aff_tree aff;
2830 tree type = TREE_TYPE (use->iv->base);
2831
2832 if (!get_computation_aff (loop, use, cand, at, &aff))
2833 return NULL_TREE;
2834 unshare_aff_combination (&aff);
2835 return fold_convert (type, aff_combination_to_tree (&aff));
2836 }
2837
2838 /* Determines the expression by that USE is expressed from induction variable
2839 CAND in LOOP. The computation is unshared. */
2840
2841 static tree
2842 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2843 {
2844 return get_computation_at (loop, use, cand, use->stmt);
2845 }
2846
2847 /* Returns cost of addition in MODE. */
2848
2849 static unsigned
2850 add_cost (enum machine_mode mode, bool speed)
2851 {
2852 static unsigned costs[NUM_MACHINE_MODES];
2853 rtx seq;
2854 unsigned cost;
2855
2856 if (costs[mode])
2857 return costs[mode];
2858
2859 start_sequence ();
2860 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2861 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2862 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2863 NULL_RTX);
2864 seq = get_insns ();
2865 end_sequence ();
2866
2867 cost = seq_cost (seq, speed);
2868 if (!cost)
2869 cost = 1;
2870
2871 costs[mode] = cost;
2872
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode), cost);
2876 return cost;
2877 }
2878
2879 /* Entry in a hashtable of already known costs for multiplication. */
2880 struct mbc_entry
2881 {
2882 HOST_WIDE_INT cst; /* The constant to multiply by. */
2883 enum machine_mode mode; /* In mode. */
2884 unsigned cost; /* The cost. */
2885 };
2886
2887 /* Counts hash value for the ENTRY. */
2888
2889 static hashval_t
2890 mbc_entry_hash (const void *entry)
2891 {
2892 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2893
2894 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2895 }
2896
2897 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2898
2899 static int
2900 mbc_entry_eq (const void *entry1, const void *entry2)
2901 {
2902 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2903 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2904
2905 return (e1->mode == e2->mode
2906 && e1->cst == e2->cst);
2907 }
2908
2909 /* Returns cost of multiplication by constant CST in MODE. */
2910
2911 unsigned
2912 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2913 {
2914 static htab_t costs;
2915 struct mbc_entry **cached, act;
2916 rtx seq;
2917 unsigned cost;
2918
2919 if (!costs)
2920 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2921
2922 act.mode = mode;
2923 act.cst = cst;
2924 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2925 if (*cached)
2926 return (*cached)->cost;
2927
2928 *cached = XNEW (struct mbc_entry);
2929 (*cached)->mode = mode;
2930 (*cached)->cst = cst;
2931
2932 start_sequence ();
2933 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_int_mode (cst, mode), NULL_RTX, 0);
2935 seq = get_insns ();
2936 end_sequence ();
2937
2938 cost = seq_cost (seq, speed);
2939
2940 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2942 (int) cst, GET_MODE_NAME (mode), cost);
2943
2944 (*cached)->cost = cost;
2945
2946 return cost;
2947 }
2948
2949 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */
2951
2952 bool
2953 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2954 {
2955 #define MAX_RATIO 128
2956 static sbitmap valid_mult[MAX_MACHINE_MODE];
2957
2958 if (!valid_mult[mode])
2959 {
2960 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2961 rtx addr;
2962 HOST_WIDE_INT i;
2963
2964 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2965 sbitmap_zero (valid_mult[mode]);
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2967 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2968 {
2969 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2970 if (memory_address_p (mode, addr))
2971 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2972 }
2973
2974 if (dump_file && (dump_flags & TDF_DETAILS))
2975 {
2976 fprintf (dump_file, " allowed multipliers:");
2977 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2978 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2979 fprintf (dump_file, " %d", (int) i);
2980 fprintf (dump_file, "\n");
2981 fprintf (dump_file, "\n");
2982 }
2983 }
2984
2985 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2986 return false;
2987
2988 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2989 }
2990
2991 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2993 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE.
2995
2996 TODO -- there must be some better way. This all is quite crude. */
2997
2998 static comp_cost
2999 get_address_cost (bool symbol_present, bool var_present,
3000 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3001 enum machine_mode mem_mode,
3002 bool speed)
3003 {
3004 static bool initialized[MAX_MACHINE_MODE];
3005 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3006 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3007 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3008 unsigned cost, acost, complexity;
3009 bool offset_p, ratio_p;
3010 HOST_WIDE_INT s_offset;
3011 unsigned HOST_WIDE_INT mask;
3012 unsigned bits;
3013
3014 if (!initialized[mem_mode])
3015 {
3016 HOST_WIDE_INT i;
3017 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3018 int old_cse_not_expected;
3019 unsigned sym_p, var_p, off_p, rat_p, add_c;
3020 rtx seq, addr, base;
3021 rtx reg0, reg1;
3022
3023 initialized[mem_mode] = true;
3024
3025 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3026
3027 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3028 for (i = start; i <= 1 << 20; i <<= 1)
3029 {
3030 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3031 if (!memory_address_p (mem_mode, addr))
3032 break;
3033 }
3034 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3035 off[mem_mode] = max_offset[mem_mode];
3036
3037 for (i = start; i <= 1 << 20; i <<= 1)
3038 {
3039 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3040 if (!memory_address_p (mem_mode, addr))
3041 break;
3042 }
3043 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3044
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3046 {
3047 fprintf (dump_file, "get_address_cost:\n");
3048 fprintf (dump_file, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode),
3050 (int) min_offset[mem_mode]);
3051 fprintf (dump_file, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode),
3053 (int) max_offset[mem_mode]);
3054 }
3055
3056 rat[mem_mode] = 1;
3057 for (i = 2; i <= MAX_RATIO; i++)
3058 if (multiplier_allowed_in_address_p (i, mem_mode))
3059 {
3060 rat[mem_mode] = i;
3061 break;
3062 }
3063
3064 /* Compute the cost of various addressing modes. */
3065 acost = 0;
3066 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3067 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3068
3069 for (i = 0; i < 16; i++)
3070 {
3071 sym_p = i & 1;
3072 var_p = (i >> 1) & 1;
3073 off_p = (i >> 2) & 1;
3074 rat_p = (i >> 3) & 1;
3075
3076 addr = reg0;
3077 if (rat_p)
3078 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3079 gen_int_mode (rat[mem_mode], Pmode));
3080
3081 if (var_p)
3082 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3083
3084 if (sym_p)
3085 {
3086 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3087 /* ??? We can run into trouble with some backends by presenting
3088 it with symbols which haven't been properly passed through
3089 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3092
3093 if (off_p)
3094 base = gen_rtx_fmt_e (CONST, Pmode,
3095 gen_rtx_fmt_ee (PLUS, Pmode,
3096 base,
3097 gen_int_mode (off[mem_mode],
3098 Pmode)));
3099 }
3100 else if (off_p)
3101 base = gen_int_mode (off[mem_mode], Pmode);
3102 else
3103 base = NULL_RTX;
3104
3105 if (base)
3106 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3107
3108 start_sequence ();
3109 /* To avoid splitting addressing modes, pretend that no cse will
3110 follow. */
3111 old_cse_not_expected = cse_not_expected;
3112 cse_not_expected = true;
3113 addr = memory_address (mem_mode, addr);
3114 cse_not_expected = old_cse_not_expected;
3115 seq = get_insns ();
3116 end_sequence ();
3117
3118 acost = seq_cost (seq, speed);
3119 acost += address_cost (addr, mem_mode, speed);
3120
3121 if (!acost)
3122 acost = 1;
3123 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3124 }
3125
3126 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the
3129 loop (and quite likely we have it in register already), so it does not
3130 make much sense to penalize them too heavily. So make some final
3131 tweaks for the SYMBOL_PRESENT modes:
3132
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */
3138 add_c = add_cost (Pmode, speed);
3139 for (i = 0; i < 8; i++)
3140 {
3141 var_p = i & 1;
3142 off_p = (i >> 1) & 1;
3143 rat_p = (i >> 2) & 1;
3144
3145 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3146 if (var_p)
3147 acost += add_c;
3148
3149 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3150 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3151 }
3152
3153 if (dump_file && (dump_flags & TDF_DETAILS))
3154 {
3155 fprintf (dump_file, "Address costs:\n");
3156
3157 for (i = 0; i < 16; i++)
3158 {
3159 sym_p = i & 1;
3160 var_p = (i >> 1) & 1;
3161 off_p = (i >> 2) & 1;
3162 rat_p = (i >> 3) & 1;
3163
3164 fprintf (dump_file, " ");
3165 if (sym_p)
3166 fprintf (dump_file, "sym + ");
3167 if (var_p)
3168 fprintf (dump_file, "var + ");
3169 if (off_p)
3170 fprintf (dump_file, "cst + ");
3171 if (rat_p)
3172 fprintf (dump_file, "rat * ");
3173
3174 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3175 fprintf (dump_file, "index costs %d\n", acost);
3176 }
3177 fprintf (dump_file, "\n");
3178 }
3179 }
3180
3181 bits = GET_MODE_BITSIZE (Pmode);
3182 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3183 offset &= mask;
3184 if ((offset >> (bits - 1) & 1))
3185 offset |= ~mask;
3186 s_offset = offset;
3187
3188 cost = 0;
3189 offset_p = (s_offset != 0
3190 && min_offset[mem_mode] <= s_offset
3191 && s_offset <= max_offset[mem_mode]);
3192 ratio_p = (ratio != 1
3193 && multiplier_allowed_in_address_p (ratio, mem_mode));
3194
3195 if (ratio != 1 && !ratio_p)
3196 cost += multiply_by_cost (ratio, Pmode, speed);
3197
3198 if (s_offset && !offset_p && !symbol_present)
3199 cost += add_cost (Pmode, speed);
3200
3201 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3202 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3203 return new_cost (cost + acost, complexity);
3204 }
3205
3206 /* Estimates cost of forcing expression EXPR into a variable. */
3207
3208 static comp_cost
3209 force_expr_to_var_cost (tree expr, bool speed)
3210 {
3211 static bool costs_initialized = false;
3212 static unsigned integer_cost [2];
3213 static unsigned symbol_cost [2];
3214 static unsigned address_cost [2];
3215 tree op0, op1;
3216 comp_cost cost0, cost1, cost;
3217 enum machine_mode mode;
3218
3219 if (!costs_initialized)
3220 {
3221 tree type = build_pointer_type (integer_type_node);
3222 tree var, addr;
3223 rtx x;
3224 int i;
3225
3226 var = create_tmp_var_raw (integer_type_node, "test_var");
3227 TREE_STATIC (var) = 1;
3228 x = produce_memory_decl_rtl (var, NULL);
3229 SET_DECL_RTL (var, x);
3230
3231 addr = build1 (ADDR_EXPR, type, var);
3232
3233
3234 for (i = 0; i < 2; i++)
3235 {
3236 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3237 2000), i);
3238
3239 symbol_cost[i] = computation_cost (addr, i) + 1;
3240
3241 address_cost[i]
3242 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3243 addr,
3244 build_int_cst (sizetype, 2000)), i) + 1;
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 {
3247 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3248 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3249 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3250 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3251 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3252 fprintf (dump_file, "\n");
3253 }
3254 }
3255
3256 costs_initialized = true;
3257 }
3258
3259 STRIP_NOPS (expr);
3260
3261 if (SSA_VAR_P (expr))
3262 return zero_cost;
3263
3264 if (is_gimple_min_invariant (expr))
3265 {
3266 if (TREE_CODE (expr) == INTEGER_CST)
3267 return new_cost (integer_cost [speed], 0);
3268
3269 if (TREE_CODE (expr) == ADDR_EXPR)
3270 {
3271 tree obj = TREE_OPERAND (expr, 0);
3272
3273 if (TREE_CODE (obj) == VAR_DECL
3274 || TREE_CODE (obj) == PARM_DECL
3275 || TREE_CODE (obj) == RESULT_DECL)
3276 return new_cost (symbol_cost [speed], 0);
3277 }
3278
3279 return new_cost (address_cost [speed], 0);
3280 }
3281
3282 switch (TREE_CODE (expr))
3283 {
3284 case POINTER_PLUS_EXPR:
3285 case PLUS_EXPR:
3286 case MINUS_EXPR:
3287 case MULT_EXPR:
3288 op0 = TREE_OPERAND (expr, 0);
3289 op1 = TREE_OPERAND (expr, 1);
3290 STRIP_NOPS (op0);
3291 STRIP_NOPS (op1);
3292
3293 if (is_gimple_val (op0))
3294 cost0 = zero_cost;
3295 else
3296 cost0 = force_expr_to_var_cost (op0, speed);
3297
3298 if (is_gimple_val (op1))
3299 cost1 = zero_cost;
3300 else
3301 cost1 = force_expr_to_var_cost (op1, speed);
3302
3303 break;
3304
3305 default:
3306 /* Just an arbitrary value, FIXME. */
3307 return new_cost (target_spill_cost[speed], 0);
3308 }
3309
3310 mode = TYPE_MODE (TREE_TYPE (expr));
3311 switch (TREE_CODE (expr))
3312 {
3313 case POINTER_PLUS_EXPR:
3314 case PLUS_EXPR:
3315 case MINUS_EXPR:
3316 cost = new_cost (add_cost (mode, speed), 0);
3317 break;
3318
3319 case MULT_EXPR:
3320 if (cst_and_fits_in_hwi (op0))
3321 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3322 else if (cst_and_fits_in_hwi (op1))
3323 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3324 else
3325 return new_cost (target_spill_cost [speed], 0);
3326 break;
3327
3328 default:
3329 gcc_unreachable ();
3330 }
3331
3332 cost = add_costs (cost, cost0);
3333 cost = add_costs (cost, cost1);
3334
3335 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */
3339 if (cost.cost > target_spill_cost [speed])
3340 cost.cost = target_spill_cost [speed];
3341
3342 return cost;
3343 }
3344
3345 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3347
3348 static comp_cost
3349 force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3351 {
3352 if (depends_on)
3353 {
3354 fd_ivopts_data = data;
3355 walk_tree (&expr, find_depends, depends_on, NULL);
3356 }
3357
3358 return force_expr_to_var_cost (expr, data->speed);
3359 }
3360
3361 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3362 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3363 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3365
3366 static comp_cost
3367 split_address_cost (struct ivopts_data *data,
3368 tree addr, bool *symbol_present, bool *var_present,
3369 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3370 {
3371 tree core;
3372 HOST_WIDE_INT bitsize;
3373 HOST_WIDE_INT bitpos;
3374 tree toffset;
3375 enum machine_mode mode;
3376 int unsignedp, volatilep;
3377
3378 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3379 &unsignedp, &volatilep, false);
3380
3381 if (toffset != 0
3382 || bitpos % BITS_PER_UNIT != 0
3383 || TREE_CODE (core) != VAR_DECL)
3384 {
3385 *symbol_present = false;
3386 *var_present = true;
3387 fd_ivopts_data = data;
3388 walk_tree (&addr, find_depends, depends_on, NULL);
3389 return new_cost (target_spill_cost[data->speed], 0);
3390 }
3391
3392 *offset += bitpos / BITS_PER_UNIT;
3393 if (TREE_STATIC (core)
3394 || DECL_EXTERNAL (core))
3395 {
3396 *symbol_present = true;
3397 *var_present = false;
3398 return zero_cost;
3399 }
3400
3401 *symbol_present = false;
3402 *var_present = true;
3403 return zero_cost;
3404 }
3405
3406 /* Estimates cost of expressing difference of addresses E1 - E2 as
3407 var + symbol + offset. The value of offset is added to OFFSET,
3408 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3409 part is missing. DEPENDS_ON is a set of the invariants the computation
3410 depends on. */
3411
3412 static comp_cost
3413 ptr_difference_cost (struct ivopts_data *data,
3414 tree e1, tree e2, bool *symbol_present, bool *var_present,
3415 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3416 {
3417 HOST_WIDE_INT diff = 0;
3418 comp_cost cost;
3419 bool speed = optimize_loop_for_speed_p (data->current_loop);
3420
3421 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3422
3423 if (ptr_difference_const (e1, e2, &diff))
3424 {
3425 *offset += diff;
3426 *symbol_present = false;
3427 *var_present = false;
3428 return zero_cost;
3429 }
3430
3431 if (integer_zerop (e2))
3432 return split_address_cost (data, TREE_OPERAND (e1, 0),
3433 symbol_present, var_present, offset, depends_on);
3434
3435 *symbol_present = false;
3436 *var_present = true;
3437
3438 cost = force_var_cost (data, e1, depends_on);
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440 cost.cost += add_cost (Pmode, speed);
3441
3442 return cost;
3443 }
3444
3445 /* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3448 part is missing. DEPENDS_ON is a set of the invariants the computation
3449 depends on. */
3450
3451 static comp_cost
3452 difference_cost (struct ivopts_data *data,
3453 tree e1, tree e2, bool *symbol_present, bool *var_present,
3454 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3455 {
3456 comp_cost cost;
3457 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3458 unsigned HOST_WIDE_INT off1, off2;
3459
3460 e1 = strip_offset (e1, &off1);
3461 e2 = strip_offset (e2, &off2);
3462 *offset += off1 - off2;
3463
3464 STRIP_NOPS (e1);
3465 STRIP_NOPS (e2);
3466
3467 if (TREE_CODE (e1) == ADDR_EXPR)
3468 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3469 depends_on);
3470 *symbol_present = false;
3471
3472 if (operand_equal_p (e1, e2, 0))
3473 {
3474 *var_present = false;
3475 return zero_cost;
3476 }
3477 *var_present = true;
3478 if (integer_zerop (e2))
3479 return force_var_cost (data, e1, depends_on);
3480
3481 if (integer_zerop (e1))
3482 {
3483 cost = force_var_cost (data, e2, depends_on);
3484 cost.cost += multiply_by_cost (-1, mode, data->speed);
3485
3486 return cost;
3487 }
3488
3489 cost = force_var_cost (data, e1, depends_on);
3490 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3491 cost.cost += add_cost (mode, data->speed);
3492
3493 return cost;
3494 }
3495
3496 /* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */
3501
3502 static comp_cost
3503 get_computation_cost_at (struct ivopts_data *data,
3504 struct iv_use *use, struct iv_cand *cand,
3505 bool address_p, bitmap *depends_on, gimple at)
3506 {
3507 tree ubase = use->iv->base, ustep = use->iv->step;
3508 tree cbase, cstep;
3509 tree utype = TREE_TYPE (ubase), ctype;
3510 unsigned HOST_WIDE_INT cstepi, offset = 0;
3511 HOST_WIDE_INT ratio, aratio;
3512 bool var_present, symbol_present;
3513 comp_cost cost;
3514 unsigned n_sums;
3515 double_int rat;
3516 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3517
3518 *depends_on = NULL;
3519
3520 /* Only consider real candidates. */
3521 if (!cand->iv)
3522 return infinite_cost;
3523
3524 cbase = cand->iv->base;
3525 cstep = cand->iv->step;
3526 ctype = TREE_TYPE (cbase);
3527
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3529 {
3530 /* We do not have a precision to express the values of use. */
3531 return infinite_cost;
3532 }
3533
3534 if (address_p)
3535 {
3536 /* Do not try to express address of an object with computation based
3537 on address of a different object. This may cause problems in rtl
3538 level alias analysis (that does not expect this to be happening,
3539 as this is illegal in C), and would be unlikely to be useful
3540 anyway. */
3541 if (use->iv->base_object
3542 && cand->iv->base_object
3543 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3544 return infinite_cost;
3545 }
3546
3547 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3548 {
3549 /* TODO -- add direct handling of this case. */
3550 goto fallback;
3551 }
3552
3553 /* CSTEPI is removed from the offset in case statement is after the
3554 increment. If the step is not constant, we use zero instead.
3555 This is a bit imprecise (there is the extra addition), but
3556 redundancy elimination is likely to transform the code so that
3557 it uses value of the variable before increment anyway,
3558 so it is not that much unrealistic. */
3559 if (cst_and_fits_in_hwi (cstep))
3560 cstepi = int_cst_value (cstep);
3561 else
3562 cstepi = 0;
3563
3564 if (!constant_multiple_of (ustep, cstep, &rat))
3565 return infinite_cost;
3566
3567 if (double_int_fits_in_shwi_p (rat))
3568 ratio = double_int_to_shwi (rat);
3569 else
3570 return infinite_cost;
3571
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like
3574
3575 ubase - ratio * cbase + ratio * var
3576
3577 (also holds in the case ratio == -1, TODO. */
3578
3579 if (cst_and_fits_in_hwi (cbase))
3580 {
3581 offset = - ratio * int_cst_value (cbase);
3582 cost = difference_cost (data,
3583 ubase, build_int_cst (utype, 0),
3584 &symbol_present, &var_present, &offset,
3585 depends_on);
3586 }
3587 else if (ratio == 1)
3588 {
3589 cost = difference_cost (data,
3590 ubase, cbase,
3591 &symbol_present, &var_present, &offset,
3592 depends_on);
3593 }
3594 else
3595 {
3596 cost = force_var_cost (data, cbase, depends_on);
3597 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3598 cost = add_costs (cost,
3599 difference_cost (data,
3600 ubase, build_int_cst (utype, 0),
3601 &symbol_present, &var_present,
3602 &offset, depends_on));
3603 }
3604
3605 /* If we are after the increment, the value of the candidate is higher by
3606 one iteration. */
3607 if (stmt_after_increment (data->current_loop, cand, at))
3608 offset -= ratio * cstepi;
3609
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address,
3612 find the cost of addressing this. */
3613 if (address_p)
3614 return add_costs (cost, get_address_cost (symbol_present, var_present,
3615 offset, ratio,
3616 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3617
3618 /* Otherwise estimate the costs for computing the expression. */
3619 aratio = ratio > 0 ? ratio : -ratio;
3620 if (!symbol_present && !var_present && !offset)
3621 {
3622 if (ratio != 1)
3623 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3624
3625 return cost;
3626 }
3627
3628 if (aratio != 1)
3629 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3630
3631 n_sums = 1;
3632 if (var_present
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present || offset))
3635 n_sums++;
3636
3637 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */
3639 if (offset)
3640 cost.complexity++;
3641
3642 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3643 return cost;
3644
3645 fallback:
3646 {
3647 /* Just get the expression, expand it and measure the cost. */
3648 tree comp = get_computation_at (data->current_loop, use, cand, at);
3649
3650 if (!comp)
3651 return infinite_cost;
3652
3653 if (address_p)
3654 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3655
3656 return new_cost (computation_cost (comp, speed), 0);
3657 }
3658 }
3659
3660 /* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in
3664 DEPENDS_ON. */
3665
3666 static comp_cost
3667 get_computation_cost (struct ivopts_data *data,
3668 struct iv_use *use, struct iv_cand *cand,
3669 bool address_p, bitmap *depends_on)
3670 {
3671 return get_computation_cost_at (data,
3672 use, cand, address_p, depends_on, use->stmt);
3673 }
3674
3675 /* Determines cost of basing replacement of USE on CAND in a generic
3676 expression. */
3677
3678 static bool
3679 determine_use_iv_cost_generic (struct ivopts_data *data,
3680 struct iv_use *use, struct iv_cand *cand)
3681 {
3682 bitmap depends_on;
3683 comp_cost cost;
3684
3685 /* The simple case first -- if we need to express value of the preserved
3686 original biv, the cost is 0. This also prevents us from counting the
3687 cost of increment twice -- once at this use and once in the cost of
3688 the candidate. */
3689 if (cand->pos == IP_ORIGINAL
3690 && cand->incremented_at == use->stmt)
3691 {
3692 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3693 return true;
3694 }
3695
3696 cost = get_computation_cost (data, use, cand, false, &depends_on);
3697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3698
3699 return !infinite_cost_p (cost);
3700 }
3701
3702 /* Determines cost of basing replacement of USE on CAND in an address. */
3703
3704 static bool
3705 determine_use_iv_cost_address (struct ivopts_data *data,
3706 struct iv_use *use, struct iv_cand *cand)
3707 {
3708 bitmap depends_on;
3709 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3710
3711 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3712
3713 return !infinite_cost_p (cost);
3714 }
3715
3716 /* Computes value of candidate CAND at position AT in iteration NITER, and
3717 stores it to VAL. */
3718
3719 static void
3720 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3721 aff_tree *val)
3722 {
3723 aff_tree step, delta, nit;
3724 struct iv *iv = cand->iv;
3725 tree type = TREE_TYPE (iv->base);
3726 tree steptype = type;
3727 if (POINTER_TYPE_P (type))
3728 steptype = sizetype;
3729
3730 tree_to_aff_combination (iv->step, steptype, &step);
3731 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3732 aff_combination_convert (&nit, steptype);
3733 aff_combination_mult (&nit, &step, &delta);
3734 if (stmt_after_increment (loop, cand, at))
3735 aff_combination_add (&delta, &step);
3736
3737 tree_to_aff_combination (iv->base, type, val);
3738 aff_combination_add (val, &delta);
3739 }
3740
3741 /* Returns period of induction variable iv. */
3742
3743 static tree
3744 iv_period (struct iv *iv)
3745 {
3746 tree step = iv->step, period, type;
3747 tree pow2div;
3748
3749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3750
3751 /* Period of the iv is gcd (step, type range). Since type range is power
3752 of two, it suffices to determine the maximum power of two that divides
3753 step. */
3754 pow2div = num_ending_zeros (step);
3755 type = unsigned_type_for (TREE_TYPE (step));
3756
3757 period = build_low_bits_mask (type,
3758 (TYPE_PRECISION (type)
3759 - tree_low_cst (pow2div, 1)));
3760
3761 return period;
3762 }
3763
3764 /* Returns the comparison operator used when eliminating the iv USE. */
3765
3766 static enum tree_code
3767 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3768 {
3769 struct loop *loop = data->current_loop;
3770 basic_block ex_bb;
3771 edge exit;
3772
3773 ex_bb = gimple_bb (use->stmt);
3774 exit = EDGE_SUCC (ex_bb, 0);
3775 if (flow_bb_inside_loop_p (loop, exit->dest))
3776 exit = EDGE_SUCC (ex_bb, 1);
3777
3778 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3779 }
3780
3781 /* Check whether it is possible to express the condition in USE by comparison
3782 of candidate CAND. If so, store the value compared with to BOUND. */
3783
3784 static bool
3785 may_eliminate_iv (struct ivopts_data *data,
3786 struct iv_use *use, struct iv_cand *cand, tree *bound)
3787 {
3788 basic_block ex_bb;
3789 edge exit;
3790 tree nit, period;
3791 struct loop *loop = data->current_loop;
3792 aff_tree bnd;
3793
3794 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3795 return false;
3796
3797 /* For now works only for exits that dominate the loop latch.
3798 TODO: extend to other conditions inside loop body. */
3799 ex_bb = gimple_bb (use->stmt);
3800 if (use->stmt != last_stmt (ex_bb)
3801 || gimple_code (use->stmt) != GIMPLE_COND
3802 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3803 return false;
3804
3805 exit = EDGE_SUCC (ex_bb, 0);
3806 if (flow_bb_inside_loop_p (loop, exit->dest))
3807 exit = EDGE_SUCC (ex_bb, 1);
3808 if (flow_bb_inside_loop_p (loop, exit->dest))
3809 return false;
3810
3811 nit = niter_for_exit (data, exit);
3812 if (!nit)
3813 return false;
3814
3815 /* Determine whether we can use the variable to test the exit condition.
3816 This is the case iff the period of the induction variable is greater
3817 than the number of iterations for which the exit condition is true. */
3818 period = iv_period (cand->iv);
3819
3820 /* If the number of iterations is constant, compare against it directly. */
3821 if (TREE_CODE (nit) == INTEGER_CST)
3822 {
3823 if (!tree_int_cst_lt (nit, period))
3824 return false;
3825 }
3826
3827 /* If not, and if this is the only possible exit of the loop, see whether
3828 we can get a conservative estimate on the number of iterations of the
3829 entire loop and compare against that instead. */
3830 else if (loop_only_exit_p (loop, exit))
3831 {
3832 double_int period_value, max_niter;
3833 if (!estimated_loop_iterations (loop, true, &max_niter))
3834 return false;
3835 period_value = tree_to_double_int (period);
3836 if (double_int_ucmp (max_niter, period_value) >= 0)
3837 return false;
3838 }
3839
3840 /* Otherwise, punt. */
3841 else
3842 return false;
3843
3844 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3845
3846 *bound = aff_combination_to_tree (&bnd);
3847 /* It is unlikely that computing the number of iterations using division
3848 would be more profitable than keeping the original induction variable. */
3849 if (expression_expensive_p (*bound))
3850 return false;
3851 return true;
3852 }
3853
3854 /* Determines cost of basing replacement of USE on CAND in a condition. */
3855
3856 static bool
3857 determine_use_iv_cost_condition (struct ivopts_data *data,
3858 struct iv_use *use, struct iv_cand *cand)
3859 {
3860 tree bound = NULL_TREE;
3861 struct iv *cmp_iv;
3862 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3863 comp_cost elim_cost, express_cost, cost;
3864 bool ok;
3865
3866 /* Only consider real candidates. */
3867 if (!cand->iv)
3868 {
3869 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3870 return false;
3871 }
3872
3873 /* Try iv elimination. */
3874 if (may_eliminate_iv (data, use, cand, &bound))
3875 {
3876 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3877 /* The bound is a loop invariant, so it will be only computed
3878 once. */
3879 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3880 }
3881 else
3882 elim_cost = infinite_cost;
3883
3884 /* Try expressing the original giv. If it is compared with an invariant,
3885 note that we cannot get rid of it. */
3886 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3887 gcc_assert (ok);
3888
3889 express_cost = get_computation_cost (data, use, cand, false,
3890 &depends_on_express);
3891 fd_ivopts_data = data;
3892 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3893
3894 /* Choose the better approach, preferring the eliminated IV. */
3895 if (compare_costs (elim_cost, express_cost) <= 0)
3896 {
3897 cost = elim_cost;
3898 depends_on = depends_on_elim;
3899 depends_on_elim = NULL;
3900 }
3901 else
3902 {
3903 cost = express_cost;
3904 depends_on = depends_on_express;
3905 depends_on_express = NULL;
3906 bound = NULL_TREE;
3907 }
3908
3909 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3910
3911 if (depends_on_elim)
3912 BITMAP_FREE (depends_on_elim);
3913 if (depends_on_express)
3914 BITMAP_FREE (depends_on_express);
3915
3916 return !infinite_cost_p (cost);
3917 }
3918
3919 /* Determines cost of basing replacement of USE on CAND. Returns false
3920 if USE cannot be based on CAND. */
3921
3922 static bool
3923 determine_use_iv_cost (struct ivopts_data *data,
3924 struct iv_use *use, struct iv_cand *cand)
3925 {
3926 switch (use->type)
3927 {
3928 case USE_NONLINEAR_EXPR:
3929 return determine_use_iv_cost_generic (data, use, cand);
3930
3931 case USE_ADDRESS:
3932 return determine_use_iv_cost_address (data, use, cand);
3933
3934 case USE_COMPARE:
3935 return determine_use_iv_cost_condition (data, use, cand);
3936
3937 default:
3938 gcc_unreachable ();
3939 }
3940 }
3941
3942 /* Determines costs of basing the use of the iv on an iv candidate. */
3943
3944 static void
3945 determine_use_iv_costs (struct ivopts_data *data)
3946 {
3947 unsigned i, j;
3948 struct iv_use *use;
3949 struct iv_cand *cand;
3950 bitmap to_clear = BITMAP_ALLOC (NULL);
3951
3952 alloc_use_cost_map (data);
3953
3954 for (i = 0; i < n_iv_uses (data); i++)
3955 {
3956 use = iv_use (data, i);
3957
3958 if (data->consider_all_candidates)
3959 {
3960 for (j = 0; j < n_iv_cands (data); j++)
3961 {
3962 cand = iv_cand (data, j);
3963 determine_use_iv_cost (data, use, cand);
3964 }
3965 }
3966 else
3967 {
3968 bitmap_iterator bi;
3969
3970 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3971 {
3972 cand = iv_cand (data, j);
3973 if (!determine_use_iv_cost (data, use, cand))
3974 bitmap_set_bit (to_clear, j);
3975 }
3976
3977 /* Remove the candidates for that the cost is infinite from
3978 the list of related candidates. */
3979 bitmap_and_compl_into (use->related_cands, to_clear);
3980 bitmap_clear (to_clear);
3981 }
3982 }
3983
3984 BITMAP_FREE (to_clear);
3985
3986 if (dump_file && (dump_flags & TDF_DETAILS))
3987 {
3988 fprintf (dump_file, "Use-candidate costs:\n");
3989
3990 for (i = 0; i < n_iv_uses (data); i++)
3991 {
3992 use = iv_use (data, i);
3993
3994 fprintf (dump_file, "Use %d:\n", i);
3995 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3996 for (j = 0; j < use->n_map_members; j++)
3997 {
3998 if (!use->cost_map[j].cand
3999 || infinite_cost_p (use->cost_map[j].cost))
4000 continue;
4001
4002 fprintf (dump_file, " %d\t%d\t%d\t",
4003 use->cost_map[j].cand->id,
4004 use->cost_map[j].cost.cost,
4005 use->cost_map[j].cost.complexity);
4006 if (use->cost_map[j].depends_on)
4007 bitmap_print (dump_file,
4008 use->cost_map[j].depends_on, "","");
4009 fprintf (dump_file, "\n");
4010 }
4011
4012 fprintf (dump_file, "\n");
4013 }
4014 fprintf (dump_file, "\n");
4015 }
4016 }
4017
4018 /* Determines cost of the candidate CAND. */
4019
4020 static void
4021 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4022 {
4023 comp_cost cost_base;
4024 unsigned cost, cost_step;
4025 tree base;
4026
4027 if (!cand->iv)
4028 {
4029 cand->cost = 0;
4030 return;
4031 }
4032
4033 /* There are two costs associated with the candidate -- its increment
4034 and its initialization. The second is almost negligible for any loop
4035 that rolls enough, so we take it just very little into account. */
4036
4037 base = cand->iv->base;
4038 cost_base = force_var_cost (data, base, NULL);
4039 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4040
4041 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4042
4043 /* Prefer the original ivs unless we may gain something by replacing it.
4044 The reason is to make debugging simpler; so this is not relevant for
4045 artificial ivs created by other optimization passes. */
4046 if (cand->pos != IP_ORIGINAL
4047 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4048 cost++;
4049
4050 /* Prefer not to insert statements into latch unless there are some
4051 already (so that we do not create unnecessary jumps). */
4052 if (cand->pos == IP_END
4053 && empty_block_p (ip_end_pos (data->current_loop)))
4054 cost++;
4055
4056 cand->cost = cost;
4057 }
4058
4059 /* Determines costs of computation of the candidates. */
4060
4061 static void
4062 determine_iv_costs (struct ivopts_data *data)
4063 {
4064 unsigned i;
4065
4066 if (dump_file && (dump_flags & TDF_DETAILS))
4067 {
4068 fprintf (dump_file, "Candidate costs:\n");
4069 fprintf (dump_file, " cand\tcost\n");
4070 }
4071
4072 for (i = 0; i < n_iv_cands (data); i++)
4073 {
4074 struct iv_cand *cand = iv_cand (data, i);
4075
4076 determine_iv_cost (data, cand);
4077
4078 if (dump_file && (dump_flags & TDF_DETAILS))
4079 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4080 }
4081
4082 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, "\n");
4084 }
4085
4086 /* Calculates cost for having SIZE induction variables. */
4087
4088 static unsigned
4089 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4090 {
4091 /* We add size to the cost, so that we prefer eliminating ivs
4092 if possible. */
4093 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4094 }
4095
4096 /* For each size of the induction variable set determine the penalty. */
4097
4098 static void
4099 determine_set_costs (struct ivopts_data *data)
4100 {
4101 unsigned j, n;
4102 gimple phi;
4103 gimple_stmt_iterator psi;
4104 tree op;
4105 struct loop *loop = data->current_loop;
4106 bitmap_iterator bi;
4107
4108 /* We use the following model (definitely improvable, especially the
4109 cost function -- TODO):
4110
4111 We estimate the number of registers available (using MD data), name it A.
4112
4113 We estimate the number of registers used by the loop, name it U. This
4114 number is obtained as the number of loop phi nodes (not counting virtual
4115 registers and bivs) + the number of variables from outside of the loop.
4116
4117 We set a reserve R (free regs that are used for temporary computations,
4118 etc.). For now the reserve is a constant 3.
4119
4120 Let I be the number of induction variables.
4121
4122 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4123 make a lot of ivs without a reason).
4124 -- if A - R < U + I <= A, the cost is I * PRES_COST
4125 -- if U + I > A, the cost is I * PRES_COST and
4126 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4127
4128 if (dump_file && (dump_flags & TDF_DETAILS))
4129 {
4130 fprintf (dump_file, "Global costs:\n");
4131 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4132 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4133 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4134 }
4135
4136 n = 0;
4137 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4138 {
4139 phi = gsi_stmt (psi);
4140 op = PHI_RESULT (phi);
4141
4142 if (!is_gimple_reg (op))
4143 continue;
4144
4145 if (get_iv (data, op))
4146 continue;
4147
4148 n++;
4149 }
4150
4151 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4152 {
4153 struct version_info *info = ver_info (data, j);
4154
4155 if (info->inv_id && info->has_nonlin_use)
4156 n++;
4157 }
4158
4159 data->regs_used = n;
4160 if (dump_file && (dump_flags & TDF_DETAILS))
4161 fprintf (dump_file, " regs_used %d\n", n);
4162
4163 if (dump_file && (dump_flags & TDF_DETAILS))
4164 {
4165 fprintf (dump_file, " cost for size:\n");
4166 fprintf (dump_file, " ivs\tcost\n");
4167 for (j = 0; j <= 2 * target_avail_regs; j++)
4168 fprintf (dump_file, " %d\t%d\n", j,
4169 ivopts_global_cost_for_size (data, j));
4170 fprintf (dump_file, "\n");
4171 }
4172 }
4173
4174 /* Returns true if A is a cheaper cost pair than B. */
4175
4176 static bool
4177 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4178 {
4179 int cmp;
4180
4181 if (!a)
4182 return false;
4183
4184 if (!b)
4185 return true;
4186
4187 cmp = compare_costs (a->cost, b->cost);
4188 if (cmp < 0)
4189 return true;
4190
4191 if (cmp > 0)
4192 return false;
4193
4194 /* In case the costs are the same, prefer the cheaper candidate. */
4195 if (a->cand->cost < b->cand->cost)
4196 return true;
4197
4198 return false;
4199 }
4200
4201 /* Computes the cost field of IVS structure. */
4202
4203 static void
4204 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4205 {
4206 comp_cost cost = ivs->cand_use_cost;
4207 cost.cost += ivs->cand_cost;
4208 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4209
4210 ivs->cost = cost;
4211 }
4212
4213 /* Remove invariants in set INVS to set IVS. */
4214
4215 static void
4216 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4217 {
4218 bitmap_iterator bi;
4219 unsigned iid;
4220
4221 if (!invs)
4222 return;
4223
4224 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4225 {
4226 ivs->n_invariant_uses[iid]--;
4227 if (ivs->n_invariant_uses[iid] == 0)
4228 ivs->n_regs--;
4229 }
4230 }
4231
4232 /* Set USE not to be expressed by any candidate in IVS. */
4233
4234 static void
4235 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4236 struct iv_use *use)
4237 {
4238 unsigned uid = use->id, cid;
4239 struct cost_pair *cp;
4240
4241 cp = ivs->cand_for_use[uid];
4242 if (!cp)
4243 return;
4244 cid = cp->cand->id;
4245
4246 ivs->bad_uses++;
4247 ivs->cand_for_use[uid] = NULL;
4248 ivs->n_cand_uses[cid]--;
4249
4250 if (ivs->n_cand_uses[cid] == 0)
4251 {
4252 bitmap_clear_bit (ivs->cands, cid);
4253 /* Do not count the pseudocandidates. */
4254 if (cp->cand->iv)
4255 ivs->n_regs--;
4256 ivs->n_cands--;
4257 ivs->cand_cost -= cp->cand->cost;
4258
4259 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4260 }
4261
4262 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4263
4264 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4265 iv_ca_recount_cost (data, ivs);
4266 }
4267
4268 /* Add invariants in set INVS to set IVS. */
4269
4270 static void
4271 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4272 {
4273 bitmap_iterator bi;
4274 unsigned iid;
4275
4276 if (!invs)
4277 return;
4278
4279 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4280 {
4281 ivs->n_invariant_uses[iid]++;
4282 if (ivs->n_invariant_uses[iid] == 1)
4283 ivs->n_regs++;
4284 }
4285 }
4286
4287 /* Set cost pair for USE in set IVS to CP. */
4288
4289 static void
4290 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4291 struct iv_use *use, struct cost_pair *cp)
4292 {
4293 unsigned uid = use->id, cid;
4294
4295 if (ivs->cand_for_use[uid] == cp)
4296 return;
4297
4298 if (ivs->cand_for_use[uid])
4299 iv_ca_set_no_cp (data, ivs, use);
4300
4301 if (cp)
4302 {
4303 cid = cp->cand->id;
4304
4305 ivs->bad_uses--;
4306 ivs->cand_for_use[uid] = cp;
4307 ivs->n_cand_uses[cid]++;
4308 if (ivs->n_cand_uses[cid] == 1)
4309 {
4310 bitmap_set_bit (ivs->cands, cid);
4311 /* Do not count the pseudocandidates. */
4312 if (cp->cand->iv)
4313 ivs->n_regs++;
4314 ivs->n_cands++;
4315 ivs->cand_cost += cp->cand->cost;
4316
4317 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4318 }
4319
4320 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4321 iv_ca_set_add_invariants (ivs, cp->depends_on);
4322 iv_ca_recount_cost (data, ivs);
4323 }
4324 }
4325
4326 /* Extend set IVS by expressing USE by some of the candidates in it
4327 if possible. */
4328
4329 static void
4330 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4331 struct iv_use *use)
4332 {
4333 struct cost_pair *best_cp = NULL, *cp;
4334 bitmap_iterator bi;
4335 unsigned i;
4336
4337 gcc_assert (ivs->upto >= use->id);
4338
4339 if (ivs->upto == use->id)
4340 {
4341 ivs->upto++;
4342 ivs->bad_uses++;
4343 }
4344
4345 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4346 {
4347 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4348
4349 if (cheaper_cost_pair (cp, best_cp))
4350 best_cp = cp;
4351 }
4352
4353 iv_ca_set_cp (data, ivs, use, best_cp);
4354 }
4355
4356 /* Get cost for assignment IVS. */
4357
4358 static comp_cost
4359 iv_ca_cost (struct iv_ca *ivs)
4360 {
4361 /* This was a conditional expression but it triggered a bug in
4362 Sun C 5.5. */
4363 if (ivs->bad_uses)
4364 return infinite_cost;
4365 else
4366 return ivs->cost;
4367 }
4368
4369 /* Returns true if all dependences of CP are among invariants in IVS. */
4370
4371 static bool
4372 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4373 {
4374 unsigned i;
4375 bitmap_iterator bi;
4376
4377 if (!cp->depends_on)
4378 return true;
4379
4380 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4381 {
4382 if (ivs->n_invariant_uses[i] == 0)
4383 return false;
4384 }
4385
4386 return true;
4387 }
4388
4389 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4390 it before NEXT_CHANGE. */
4391
4392 static struct iv_ca_delta *
4393 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4394 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4395 {
4396 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4397
4398 change->use = use;
4399 change->old_cp = old_cp;
4400 change->new_cp = new_cp;
4401 change->next_change = next_change;
4402
4403 return change;
4404 }
4405
4406 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4407 are rewritten. */
4408
4409 static struct iv_ca_delta *
4410 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4411 {
4412 struct iv_ca_delta *last;
4413
4414 if (!l2)
4415 return l1;
4416
4417 if (!l1)
4418 return l2;
4419
4420 for (last = l1; last->next_change; last = last->next_change)
4421 continue;
4422 last->next_change = l2;
4423
4424 return l1;
4425 }
4426
4427 /* Returns candidate by that USE is expressed in IVS. */
4428
4429 static struct cost_pair *
4430 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4431 {
4432 return ivs->cand_for_use[use->id];
4433 }
4434
4435 /* Reverse the list of changes DELTA, forming the inverse to it. */
4436
4437 static struct iv_ca_delta *
4438 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4439 {
4440 struct iv_ca_delta *act, *next, *prev = NULL;
4441 struct cost_pair *tmp;
4442
4443 for (act = delta; act; act = next)
4444 {
4445 next = act->next_change;
4446 act->next_change = prev;
4447 prev = act;
4448
4449 tmp = act->old_cp;
4450 act->old_cp = act->new_cp;
4451 act->new_cp = tmp;
4452 }
4453
4454 return prev;
4455 }
4456
4457 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4458 reverted instead. */
4459
4460 static void
4461 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4462 struct iv_ca_delta *delta, bool forward)
4463 {
4464 struct cost_pair *from, *to;
4465 struct iv_ca_delta *act;
4466
4467 if (!forward)
4468 delta = iv_ca_delta_reverse (delta);
4469
4470 for (act = delta; act; act = act->next_change)
4471 {
4472 from = act->old_cp;
4473 to = act->new_cp;
4474 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4475 iv_ca_set_cp (data, ivs, act->use, to);
4476 }
4477
4478 if (!forward)
4479 iv_ca_delta_reverse (delta);
4480 }
4481
4482 /* Returns true if CAND is used in IVS. */
4483
4484 static bool
4485 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4486 {
4487 return ivs->n_cand_uses[cand->id] > 0;
4488 }
4489
4490 /* Returns number of induction variable candidates in the set IVS. */
4491
4492 static unsigned
4493 iv_ca_n_cands (struct iv_ca *ivs)
4494 {
4495 return ivs->n_cands;
4496 }
4497
4498 /* Free the list of changes DELTA. */
4499
4500 static void
4501 iv_ca_delta_free (struct iv_ca_delta **delta)
4502 {
4503 struct iv_ca_delta *act, *next;
4504
4505 for (act = *delta; act; act = next)
4506 {
4507 next = act->next_change;
4508 free (act);
4509 }
4510
4511 *delta = NULL;
4512 }
4513
4514 /* Allocates new iv candidates assignment. */
4515
4516 static struct iv_ca *
4517 iv_ca_new (struct ivopts_data *data)
4518 {
4519 struct iv_ca *nw = XNEW (struct iv_ca);
4520
4521 nw->upto = 0;
4522 nw->bad_uses = 0;
4523 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4524 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4525 nw->cands = BITMAP_ALLOC (NULL);
4526 nw->n_cands = 0;
4527 nw->n_regs = 0;
4528 nw->cand_use_cost = zero_cost;
4529 nw->cand_cost = 0;
4530 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4531 nw->cost = zero_cost;
4532
4533 return nw;
4534 }
4535
4536 /* Free memory occupied by the set IVS. */
4537
4538 static void
4539 iv_ca_free (struct iv_ca **ivs)
4540 {
4541 free ((*ivs)->cand_for_use);
4542 free ((*ivs)->n_cand_uses);
4543 BITMAP_FREE ((*ivs)->cands);
4544 free ((*ivs)->n_invariant_uses);
4545 free (*ivs);
4546 *ivs = NULL;
4547 }
4548
4549 /* Dumps IVS to FILE. */
4550
4551 static void
4552 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4553 {
4554 const char *pref = " invariants ";
4555 unsigned i;
4556 comp_cost cost = iv_ca_cost (ivs);
4557
4558 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4559 bitmap_print (file, ivs->cands, " candidates ","\n");
4560
4561 for (i = 1; i <= data->max_inv_id; i++)
4562 if (ivs->n_invariant_uses[i])
4563 {
4564 fprintf (file, "%s%d", pref, i);
4565 pref = ", ";
4566 }
4567 fprintf (file, "\n");
4568 }
4569
4570 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4571 new set, and store differences in DELTA. Number of induction variables
4572 in the new set is stored to N_IVS. */
4573
4574 static comp_cost
4575 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4576 struct iv_cand *cand, struct iv_ca_delta **delta,
4577 unsigned *n_ivs)
4578 {
4579 unsigned i;
4580 comp_cost cost;
4581 struct iv_use *use;
4582 struct cost_pair *old_cp, *new_cp;
4583
4584 *delta = NULL;
4585 for (i = 0; i < ivs->upto; i++)
4586 {
4587 use = iv_use (data, i);
4588 old_cp = iv_ca_cand_for_use (ivs, use);
4589
4590 if (old_cp
4591 && old_cp->cand == cand)
4592 continue;
4593
4594 new_cp = get_use_iv_cost (data, use, cand);
4595 if (!new_cp)
4596 continue;
4597
4598 if (!iv_ca_has_deps (ivs, new_cp))
4599 continue;
4600
4601 if (!cheaper_cost_pair (new_cp, old_cp))
4602 continue;
4603
4604 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4605 }
4606
4607 iv_ca_delta_commit (data, ivs, *delta, true);
4608 cost = iv_ca_cost (ivs);
4609 if (n_ivs)
4610 *n_ivs = iv_ca_n_cands (ivs);
4611 iv_ca_delta_commit (data, ivs, *delta, false);
4612
4613 return cost;
4614 }
4615
4616 /* Try narrowing set IVS by removing CAND. Return the cost of
4617 the new set and store the differences in DELTA. */
4618
4619 static comp_cost
4620 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4621 struct iv_cand *cand, struct iv_ca_delta **delta)
4622 {
4623 unsigned i, ci;
4624 struct iv_use *use;
4625 struct cost_pair *old_cp, *new_cp, *cp;
4626 bitmap_iterator bi;
4627 struct iv_cand *cnd;
4628 comp_cost cost;
4629
4630 *delta = NULL;
4631 for (i = 0; i < n_iv_uses (data); i++)
4632 {
4633 use = iv_use (data, i);
4634
4635 old_cp = iv_ca_cand_for_use (ivs, use);
4636 if (old_cp->cand != cand)
4637 continue;
4638
4639 new_cp = NULL;
4640
4641 if (data->consider_all_candidates)
4642 {
4643 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4644 {
4645 if (ci == cand->id)
4646 continue;
4647
4648 cnd = iv_cand (data, ci);
4649
4650 cp = get_use_iv_cost (data, use, cnd);
4651 if (!cp)
4652 continue;
4653 if (!iv_ca_has_deps (ivs, cp))
4654 continue;
4655
4656 if (!cheaper_cost_pair (cp, new_cp))
4657 continue;
4658
4659 new_cp = cp;
4660 }
4661 }
4662 else
4663 {
4664 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4665 {
4666 if (ci == cand->id)
4667 continue;
4668
4669 cnd = iv_cand (data, ci);
4670
4671 cp = get_use_iv_cost (data, use, cnd);
4672 if (!cp)
4673 continue;
4674 if (!iv_ca_has_deps (ivs, cp))
4675 continue;
4676
4677 if (!cheaper_cost_pair (cp, new_cp))
4678 continue;
4679
4680 new_cp = cp;
4681 }
4682 }
4683
4684 if (!new_cp)
4685 {
4686 iv_ca_delta_free (delta);
4687 return infinite_cost;
4688 }
4689
4690 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4691 }
4692
4693 iv_ca_delta_commit (data, ivs, *delta, true);
4694 cost = iv_ca_cost (ivs);
4695 iv_ca_delta_commit (data, ivs, *delta, false);
4696
4697 return cost;
4698 }
4699
4700 /* Try optimizing the set of candidates IVS by removing candidates different
4701 from to EXCEPT_CAND from it. Return cost of the new set, and store
4702 differences in DELTA. */
4703
4704 static comp_cost
4705 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4706 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4707 {
4708 bitmap_iterator bi;
4709 struct iv_ca_delta *act_delta, *best_delta;
4710 unsigned i;
4711 comp_cost best_cost, acost;
4712 struct iv_cand *cand;
4713
4714 best_delta = NULL;
4715 best_cost = iv_ca_cost (ivs);
4716
4717 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4718 {
4719 cand = iv_cand (data, i);
4720
4721 if (cand == except_cand)
4722 continue;
4723
4724 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4725
4726 if (compare_costs (acost, best_cost) < 0)
4727 {
4728 best_cost = acost;
4729 iv_ca_delta_free (&best_delta);
4730 best_delta = act_delta;
4731 }
4732 else
4733 iv_ca_delta_free (&act_delta);
4734 }
4735
4736 if (!best_delta)
4737 {
4738 *delta = NULL;
4739 return best_cost;
4740 }
4741
4742 /* Recurse to possibly remove other unnecessary ivs. */
4743 iv_ca_delta_commit (data, ivs, best_delta, true);
4744 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4745 iv_ca_delta_commit (data, ivs, best_delta, false);
4746 *delta = iv_ca_delta_join (best_delta, *delta);
4747 return best_cost;
4748 }
4749
4750 /* Tries to extend the sets IVS in the best possible way in order
4751 to express the USE. */
4752
4753 static bool
4754 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4755 struct iv_use *use)
4756 {
4757 comp_cost best_cost, act_cost;
4758 unsigned i;
4759 bitmap_iterator bi;
4760 struct iv_cand *cand;
4761 struct iv_ca_delta *best_delta = NULL, *act_delta;
4762 struct cost_pair *cp;
4763
4764 iv_ca_add_use (data, ivs, use);
4765 best_cost = iv_ca_cost (ivs);
4766
4767 cp = iv_ca_cand_for_use (ivs, use);
4768 if (cp)
4769 {
4770 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4771 iv_ca_set_no_cp (data, ivs, use);
4772 }
4773
4774 /* First try important candidates not based on any memory object. Only if
4775 this fails, try the specific ones. Rationale -- in loops with many
4776 variables the best choice often is to use just one generic biv. If we
4777 added here many ivs specific to the uses, the optimization algorithm later
4778 would be likely to get stuck in a local minimum, thus causing us to create
4779 too many ivs. The approach from few ivs to more seems more likely to be
4780 successful -- starting from few ivs, replacing an expensive use by a
4781 specific iv should always be a win. */
4782 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4783 {
4784 cand = iv_cand (data, i);
4785
4786 if (cand->iv->base_object != NULL_TREE)
4787 continue;
4788
4789 if (iv_ca_cand_used_p (ivs, cand))
4790 continue;
4791
4792 cp = get_use_iv_cost (data, use, cand);
4793 if (!cp)
4794 continue;
4795
4796 iv_ca_set_cp (data, ivs, use, cp);
4797 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4798 iv_ca_set_no_cp (data, ivs, use);
4799 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4800
4801 if (compare_costs (act_cost, best_cost) < 0)
4802 {
4803 best_cost = act_cost;
4804
4805 iv_ca_delta_free (&best_delta);
4806 best_delta = act_delta;
4807 }
4808 else
4809 iv_ca_delta_free (&act_delta);
4810 }
4811
4812 if (infinite_cost_p (best_cost))
4813 {
4814 for (i = 0; i < use->n_map_members; i++)
4815 {
4816 cp = use->cost_map + i;
4817 cand = cp->cand;
4818 if (!cand)
4819 continue;
4820
4821 /* Already tried this. */
4822 if (cand->important && cand->iv->base_object == NULL_TREE)
4823 continue;
4824
4825 if (iv_ca_cand_used_p (ivs, cand))
4826 continue;
4827
4828 act_delta = NULL;
4829 iv_ca_set_cp (data, ivs, use, cp);
4830 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4831 iv_ca_set_no_cp (data, ivs, use);
4832 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4833 cp, act_delta);
4834
4835 if (compare_costs (act_cost, best_cost) < 0)
4836 {
4837 best_cost = act_cost;
4838
4839 if (best_delta)
4840 iv_ca_delta_free (&best_delta);
4841 best_delta = act_delta;
4842 }
4843 else
4844 iv_ca_delta_free (&act_delta);
4845 }
4846 }
4847
4848 iv_ca_delta_commit (data, ivs, best_delta, true);
4849 iv_ca_delta_free (&best_delta);
4850
4851 return !infinite_cost_p (best_cost);
4852 }
4853
4854 /* Finds an initial assignment of candidates to uses. */
4855
4856 static struct iv_ca *
4857 get_initial_solution (struct ivopts_data *data)
4858 {
4859 struct iv_ca *ivs = iv_ca_new (data);
4860 unsigned i;
4861
4862 for (i = 0; i < n_iv_uses (data); i++)
4863 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4864 {
4865 iv_ca_free (&ivs);
4866 return NULL;
4867 }
4868
4869 return ivs;
4870 }
4871
4872 /* Tries to improve set of induction variables IVS. */
4873
4874 static bool
4875 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4876 {
4877 unsigned i, n_ivs;
4878 comp_cost acost, best_cost = iv_ca_cost (ivs);
4879 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4880 struct iv_cand *cand;
4881
4882 /* Try extending the set of induction variables by one. */
4883 for (i = 0; i < n_iv_cands (data); i++)
4884 {
4885 cand = iv_cand (data, i);
4886
4887 if (iv_ca_cand_used_p (ivs, cand))
4888 continue;
4889
4890 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4891 if (!act_delta)
4892 continue;
4893
4894 /* If we successfully added the candidate and the set is small enough,
4895 try optimizing it by removing other candidates. */
4896 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4897 {
4898 iv_ca_delta_commit (data, ivs, act_delta, true);
4899 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4900 iv_ca_delta_commit (data, ivs, act_delta, false);
4901 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4902 }
4903
4904 if (compare_costs (acost, best_cost) < 0)
4905 {
4906 best_cost = acost;
4907 iv_ca_delta_free (&best_delta);
4908 best_delta = act_delta;
4909 }
4910 else
4911 iv_ca_delta_free (&act_delta);
4912 }
4913
4914 if (!best_delta)
4915 {
4916 /* Try removing the candidates from the set instead. */
4917 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4918
4919 /* Nothing more we can do. */
4920 if (!best_delta)
4921 return false;
4922 }
4923
4924 iv_ca_delta_commit (data, ivs, best_delta, true);
4925 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4926 iv_ca_delta_free (&best_delta);
4927 return true;
4928 }
4929
4930 /* Attempts to find the optimal set of induction variables. We do simple
4931 greedy heuristic -- we try to replace at most one candidate in the selected
4932 solution and remove the unused ivs while this improves the cost. */
4933
4934 static struct iv_ca *
4935 find_optimal_iv_set (struct ivopts_data *data)
4936 {
4937 unsigned i;
4938 struct iv_ca *set;
4939 struct iv_use *use;
4940
4941 /* Get the initial solution. */
4942 set = get_initial_solution (data);
4943 if (!set)
4944 {
4945 if (dump_file && (dump_flags & TDF_DETAILS))
4946 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4947 return NULL;
4948 }
4949
4950 if (dump_file && (dump_flags & TDF_DETAILS))
4951 {
4952 fprintf (dump_file, "Initial set of candidates:\n");
4953 iv_ca_dump (data, dump_file, set);
4954 }
4955
4956 while (try_improve_iv_set (data, set))
4957 {
4958 if (dump_file && (dump_flags & TDF_DETAILS))
4959 {
4960 fprintf (dump_file, "Improved to:\n");
4961 iv_ca_dump (data, dump_file, set);
4962 }
4963 }
4964
4965 if (dump_file && (dump_flags & TDF_DETAILS))
4966 {
4967 comp_cost cost = iv_ca_cost (set);
4968 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4969 }
4970
4971 for (i = 0; i < n_iv_uses (data); i++)
4972 {
4973 use = iv_use (data, i);
4974 use->selected = iv_ca_cand_for_use (set, use)->cand;
4975 }
4976
4977 return set;
4978 }
4979
4980 /* Creates a new induction variable corresponding to CAND. */
4981
4982 static void
4983 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4984 {
4985 gimple_stmt_iterator incr_pos;
4986 tree base;
4987 bool after = false;
4988
4989 if (!cand->iv)
4990 return;
4991
4992 switch (cand->pos)
4993 {
4994 case IP_NORMAL:
4995 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4996 break;
4997
4998 case IP_END:
4999 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5000 after = true;
5001 break;
5002
5003 case IP_ORIGINAL:
5004 /* Mark that the iv is preserved. */
5005 name_info (data, cand->var_before)->preserve_biv = true;
5006 name_info (data, cand->var_after)->preserve_biv = true;
5007
5008 /* Rewrite the increment so that it uses var_before directly. */
5009 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5010
5011 return;
5012 }
5013
5014 gimple_add_tmp_var (cand->var_before);
5015 add_referenced_var (cand->var_before);
5016
5017 base = unshare_expr (cand->iv->base);
5018
5019 create_iv (base, unshare_expr (cand->iv->step),
5020 cand->var_before, data->current_loop,
5021 &incr_pos, after, &cand->var_before, &cand->var_after);
5022 }
5023
5024 /* Creates new induction variables described in SET. */
5025
5026 static void
5027 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5028 {
5029 unsigned i;
5030 struct iv_cand *cand;
5031 bitmap_iterator bi;
5032
5033 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5034 {
5035 cand = iv_cand (data, i);
5036 create_new_iv (data, cand);
5037 }
5038 }
5039
5040 /* Returns the phi-node in BB with result RESULT. */
5041
5042 static gimple
5043 get_phi_with_result (basic_block bb, tree result)
5044 {
5045 gimple_stmt_iterator i = gsi_start_phis (bb);
5046
5047 for (; !gsi_end_p (i); gsi_next (&i))
5048 if (gimple_phi_result (gsi_stmt (i)) == result)
5049 return gsi_stmt (i);
5050
5051 gcc_unreachable ();
5052 return NULL;
5053 }
5054
5055
5056 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5057 is true, remove also the ssa name defined by the statement. */
5058
5059 static void
5060 remove_statement (gimple stmt, bool including_defined_name)
5061 {
5062 if (gimple_code (stmt) == GIMPLE_PHI)
5063 {
5064 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5065 gimple_phi_result (stmt));
5066 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5067 remove_phi_node (&bsi, including_defined_name);
5068 }
5069 else
5070 {
5071 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5072 gsi_remove (&bsi, true);
5073 release_defs (stmt);
5074 }
5075 }
5076
5077 /* Rewrites USE (definition of iv used in a nonlinear expression)
5078 using candidate CAND. */
5079
5080 static void
5081 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5082 struct iv_use *use, struct iv_cand *cand)
5083 {
5084 tree comp;
5085 tree op, tgt;
5086 gimple ass;
5087 gimple_stmt_iterator bsi;
5088
5089 /* An important special case -- if we are asked to express value of
5090 the original iv by itself, just exit; there is no need to
5091 introduce a new computation (that might also need casting the
5092 variable to unsigned and back). */
5093 if (cand->pos == IP_ORIGINAL
5094 && cand->incremented_at == use->stmt)
5095 {
5096 tree step, ctype, utype;
5097 enum tree_code incr_code = PLUS_EXPR, old_code;
5098
5099 gcc_assert (is_gimple_assign (use->stmt));
5100 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5101
5102 step = cand->iv->step;
5103 ctype = TREE_TYPE (step);
5104 utype = TREE_TYPE (cand->var_after);
5105 if (TREE_CODE (step) == NEGATE_EXPR)
5106 {
5107 incr_code = MINUS_EXPR;
5108 step = TREE_OPERAND (step, 0);
5109 }
5110
5111 /* Check whether we may leave the computation unchanged.
5112 This is the case only if it does not rely on other
5113 computations in the loop -- otherwise, the computation
5114 we rely upon may be removed in remove_unused_ivs,
5115 thus leading to ICE. */
5116 old_code = gimple_assign_rhs_code (use->stmt);
5117 if (old_code == PLUS_EXPR
5118 || old_code == MINUS_EXPR
5119 || old_code == POINTER_PLUS_EXPR)
5120 {
5121 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5122 op = gimple_assign_rhs2 (use->stmt);
5123 else if (old_code != MINUS_EXPR
5124 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5125 op = gimple_assign_rhs1 (use->stmt);
5126 else
5127 op = NULL_TREE;
5128 }
5129 else
5130 op = NULL_TREE;
5131
5132 if (op
5133 && (TREE_CODE (op) == INTEGER_CST
5134 || operand_equal_p (op, step, 0)))
5135 return;
5136
5137 /* Otherwise, add the necessary computations to express
5138 the iv. */
5139 op = fold_convert (ctype, cand->var_before);
5140 comp = fold_convert (utype,
5141 build2 (incr_code, ctype, op,
5142 unshare_expr (step)));
5143 }
5144 else
5145 {
5146 comp = get_computation (data->current_loop, use, cand);
5147 gcc_assert (comp != NULL_TREE);
5148 }
5149
5150 switch (gimple_code (use->stmt))
5151 {
5152 case GIMPLE_PHI:
5153 tgt = PHI_RESULT (use->stmt);
5154
5155 /* If we should keep the biv, do not replace it. */
5156 if (name_info (data, tgt)->preserve_biv)
5157 return;
5158
5159 bsi = gsi_after_labels (gimple_bb (use->stmt));
5160 break;
5161
5162 case GIMPLE_ASSIGN:
5163 tgt = gimple_assign_lhs (use->stmt);
5164 bsi = gsi_for_stmt (use->stmt);
5165 break;
5166
5167 default:
5168 gcc_unreachable ();
5169 }
5170
5171 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5172 true, GSI_SAME_STMT);
5173
5174 if (gimple_code (use->stmt) == GIMPLE_PHI)
5175 {
5176 ass = gimple_build_assign (tgt, op);
5177 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5178 remove_statement (use->stmt, false);
5179 }
5180 else
5181 {
5182 gimple_assign_set_rhs_from_tree (&bsi, op);
5183 use->stmt = gsi_stmt (bsi);
5184 }
5185 }
5186
5187 /* Replaces ssa name in index IDX by its basic variable. Callback for
5188 for_each_index. */
5189
5190 static bool
5191 idx_remove_ssa_names (tree base, tree *idx,
5192 void *data ATTRIBUTE_UNUSED)
5193 {
5194 tree *op;
5195
5196 if (TREE_CODE (*idx) == SSA_NAME)
5197 *idx = SSA_NAME_VAR (*idx);
5198
5199 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5200 {
5201 op = &TREE_OPERAND (base, 2);
5202 if (*op
5203 && TREE_CODE (*op) == SSA_NAME)
5204 *op = SSA_NAME_VAR (*op);
5205 op = &TREE_OPERAND (base, 3);
5206 if (*op
5207 && TREE_CODE (*op) == SSA_NAME)
5208 *op = SSA_NAME_VAR (*op);
5209 }
5210
5211 return true;
5212 }
5213
5214 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5215
5216 static tree
5217 unshare_and_remove_ssa_names (tree ref)
5218 {
5219 ref = unshare_expr (ref);
5220 for_each_index (&ref, idx_remove_ssa_names, NULL);
5221
5222 return ref;
5223 }
5224
5225 /* Copies the reference information from OLD_REF to NEW_REF. */
5226
5227 static void
5228 copy_ref_info (tree new_ref, tree old_ref)
5229 {
5230 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5231 copy_mem_ref_info (new_ref, old_ref);
5232 else
5233 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5234 }
5235
5236 /* Rewrites USE (address that is an iv) using candidate CAND. */
5237
5238 static void
5239 rewrite_use_address (struct ivopts_data *data,
5240 struct iv_use *use, struct iv_cand *cand)
5241 {
5242 aff_tree aff;
5243 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5244 tree ref;
5245 bool ok;
5246
5247 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5248 gcc_assert (ok);
5249 unshare_aff_combination (&aff);
5250
5251 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5252 copy_ref_info (ref, *use->op_p);
5253 *use->op_p = ref;
5254 }
5255
5256 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5257 candidate CAND. */
5258
5259 static void
5260 rewrite_use_compare (struct ivopts_data *data,
5261 struct iv_use *use, struct iv_cand *cand)
5262 {
5263 tree comp, *var_p, op, bound;
5264 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5265 enum tree_code compare;
5266 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5267 bool ok;
5268
5269 bound = cp->value;
5270 if (bound)
5271 {
5272 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5273 tree var_type = TREE_TYPE (var);
5274 gimple_seq stmts;
5275
5276 compare = iv_elimination_compare (data, use);
5277 bound = unshare_expr (fold_convert (var_type, bound));
5278 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5279 if (stmts)
5280 gsi_insert_seq_on_edge_immediate (
5281 loop_preheader_edge (data->current_loop),
5282 stmts);
5283
5284 gimple_cond_set_lhs (use->stmt, var);
5285 gimple_cond_set_code (use->stmt, compare);
5286 gimple_cond_set_rhs (use->stmt, op);
5287 return;
5288 }
5289
5290 /* The induction variable elimination failed; just express the original
5291 giv. */
5292 comp = get_computation (data->current_loop, use, cand);
5293 gcc_assert (comp != NULL_TREE);
5294
5295 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5296 gcc_assert (ok);
5297
5298 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5299 true, GSI_SAME_STMT);
5300 }
5301
5302 /* Rewrites USE using candidate CAND. */
5303
5304 static void
5305 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5306 {
5307 switch (use->type)
5308 {
5309 case USE_NONLINEAR_EXPR:
5310 rewrite_use_nonlinear_expr (data, use, cand);
5311 break;
5312
5313 case USE_ADDRESS:
5314 rewrite_use_address (data, use, cand);
5315 break;
5316
5317 case USE_COMPARE:
5318 rewrite_use_compare (data, use, cand);
5319 break;
5320
5321 default:
5322 gcc_unreachable ();
5323 }
5324
5325 update_stmt (use->stmt);
5326 }
5327
5328 /* Rewrite the uses using the selected induction variables. */
5329
5330 static void
5331 rewrite_uses (struct ivopts_data *data)
5332 {
5333 unsigned i;
5334 struct iv_cand *cand;
5335 struct iv_use *use;
5336
5337 for (i = 0; i < n_iv_uses (data); i++)
5338 {
5339 use = iv_use (data, i);
5340 cand = use->selected;
5341 gcc_assert (cand);
5342
5343 rewrite_use (data, use, cand);
5344 }
5345 }
5346
5347 /* Removes the ivs that are not used after rewriting. */
5348
5349 static void
5350 remove_unused_ivs (struct ivopts_data *data)
5351 {
5352 unsigned j;
5353 bitmap_iterator bi;
5354
5355 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5356 {
5357 struct version_info *info;
5358
5359 info = ver_info (data, j);
5360 if (info->iv
5361 && !integer_zerop (info->iv->step)
5362 && !info->inv_id
5363 && !info->iv->have_use_for
5364 && !info->preserve_biv)
5365 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5366 }
5367 }
5368
5369 /* Frees data allocated by the optimization of a single loop. */
5370
5371 static void
5372 free_loop_data (struct ivopts_data *data)
5373 {
5374 unsigned i, j;
5375 bitmap_iterator bi;
5376 tree obj;
5377
5378 if (data->niters)
5379 {
5380 pointer_map_destroy (data->niters);
5381 data->niters = NULL;
5382 }
5383
5384 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5385 {
5386 struct version_info *info;
5387
5388 info = ver_info (data, i);
5389 if (info->iv)
5390 free (info->iv);
5391 info->iv = NULL;
5392 info->has_nonlin_use = false;
5393 info->preserve_biv = false;
5394 info->inv_id = 0;
5395 }
5396 bitmap_clear (data->relevant);
5397 bitmap_clear (data->important_candidates);
5398
5399 for (i = 0; i < n_iv_uses (data); i++)
5400 {
5401 struct iv_use *use = iv_use (data, i);
5402
5403 free (use->iv);
5404 BITMAP_FREE (use->related_cands);
5405 for (j = 0; j < use->n_map_members; j++)
5406 if (use->cost_map[j].depends_on)
5407 BITMAP_FREE (use->cost_map[j].depends_on);
5408 free (use->cost_map);
5409 free (use);
5410 }
5411 VEC_truncate (iv_use_p, data->iv_uses, 0);
5412
5413 for (i = 0; i < n_iv_cands (data); i++)
5414 {
5415 struct iv_cand *cand = iv_cand (data, i);
5416
5417 if (cand->iv)
5418 free (cand->iv);
5419 if (cand->depends_on)
5420 BITMAP_FREE (cand->depends_on);
5421 free (cand);
5422 }
5423 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5424
5425 if (data->version_info_size < num_ssa_names)
5426 {
5427 data->version_info_size = 2 * num_ssa_names;
5428 free (data->version_info);
5429 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5430 }
5431
5432 data->max_inv_id = 0;
5433
5434 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5435 SET_DECL_RTL (obj, NULL_RTX);
5436
5437 VEC_truncate (tree, decl_rtl_to_reset, 0);
5438 }
5439
5440 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5441 loop tree. */
5442
5443 static void
5444 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5445 {
5446 free_loop_data (data);
5447 free (data->version_info);
5448 BITMAP_FREE (data->relevant);
5449 BITMAP_FREE (data->important_candidates);
5450
5451 VEC_free (tree, heap, decl_rtl_to_reset);
5452 VEC_free (iv_use_p, heap, data->iv_uses);
5453 VEC_free (iv_cand_p, heap, data->iv_candidates);
5454 }
5455
5456 /* Optimizes the LOOP. Returns true if anything changed. */
5457
5458 static bool
5459 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5460 {
5461 bool changed = false;
5462 struct iv_ca *iv_ca;
5463 edge exit;
5464
5465 gcc_assert (!data->niters);
5466 data->current_loop = loop;
5467 data->speed = optimize_loop_for_speed_p (loop);
5468
5469 if (dump_file && (dump_flags & TDF_DETAILS))
5470 {
5471 fprintf (dump_file, "Processing loop %d\n", loop->num);
5472
5473 exit = single_dom_exit (loop);
5474 if (exit)
5475 {
5476 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5477 exit->src->index, exit->dest->index);
5478 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5479 fprintf (dump_file, "\n");
5480 }
5481
5482 fprintf (dump_file, "\n");
5483 }
5484
5485 /* For each ssa name determines whether it behaves as an induction variable
5486 in some loop. */
5487 if (!find_induction_variables (data))
5488 goto finish;
5489
5490 /* Finds interesting uses (item 1). */
5491 find_interesting_uses (data);
5492 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5493 goto finish;
5494
5495 /* Finds candidates for the induction variables (item 2). */
5496 find_iv_candidates (data);
5497
5498 /* Calculates the costs (item 3, part 1). */
5499 determine_use_iv_costs (data);
5500 determine_iv_costs (data);
5501 determine_set_costs (data);
5502
5503 /* Find the optimal set of induction variables (item 3, part 2). */
5504 iv_ca = find_optimal_iv_set (data);
5505 if (!iv_ca)
5506 goto finish;
5507 changed = true;
5508
5509 /* Create the new induction variables (item 4, part 1). */
5510 create_new_ivs (data, iv_ca);
5511 iv_ca_free (&iv_ca);
5512
5513 /* Rewrite the uses (item 4, part 2). */
5514 rewrite_uses (data);
5515
5516 /* Remove the ivs that are unused after rewriting. */
5517 remove_unused_ivs (data);
5518
5519 /* We have changed the structure of induction variables; it might happen
5520 that definitions in the scev database refer to some of them that were
5521 eliminated. */
5522 scev_reset ();
5523
5524 finish:
5525 free_loop_data (data);
5526
5527 return changed;
5528 }
5529
5530 /* Main entry point. Optimizes induction variables in loops. */
5531
5532 void
5533 tree_ssa_iv_optimize (void)
5534 {
5535 struct loop *loop;
5536 struct ivopts_data data;
5537 loop_iterator li;
5538
5539 tree_ssa_iv_optimize_init (&data);
5540
5541 /* Optimize the loops starting with the innermost ones. */
5542 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5543 {
5544 if (dump_file && (dump_flags & TDF_DETAILS))
5545 flow_loop_dump (loop, dump_file, NULL, 1);
5546
5547 tree_ssa_iv_optimize_loop (&data, loop);
5548 }
5549
5550 tree_ssa_iv_optimize_finalize (&data);
5551 }