re PR tree-optimization/56118 (Piecewise vector / complex initialization from constan...
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
105
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109
110 /* The infinite cost. */
111 #define INFTY 10000000
112
113 #define AVG_LOOP_NITER(LOOP) 5
114
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
118
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
121 {
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
125
126 return niter;
127 }
128
129 /* Representation of the induction variable. */
130 struct iv
131 {
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
142 };
143
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
146 {
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
153 };
154
155 /* Types of uses. */
156 enum use_type
157 {
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
161 };
162
163 /* Cost of a computation. */
164 struct comp_cost
165 {
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
171 };
172
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
175
176 /* The candidate - cost pair. */
177 struct cost_pair
178 {
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
188 };
189
190 /* Use. */
191 struct iv_use
192 {
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
201
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
205
206 struct iv_cand *selected;
207 /* The selected candidate. */
208
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
248 };
249
250 /* Loop invariant expression hashtable entry. */
251 struct iv_inv_expr_ent
252 {
253 tree expr;
254 int id;
255 hashval_t hash;
256 };
257
258 /* The data used by the induction variable optimizations. */
259
260 /* Hashtable helpers. */
261
262 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
263 {
264 static inline hashval_t hash (const iv_inv_expr_ent *);
265 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
266 };
267
268 /* Hash function for loop invariant expressions. */
269
270 inline hashval_t
271 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
272 {
273 return expr->hash;
274 }
275
276 /* Hash table equality function for expressions. */
277
278 inline bool
279 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
280 const iv_inv_expr_ent *expr2)
281 {
282 return expr1->hash == expr2->hash
283 && operand_equal_p (expr1->expr, expr2->expr, 0);
284 }
285
286 struct ivopts_data
287 {
288 /* The currently optimized loop. */
289 struct loop *current_loop;
290 source_location loop_loc;
291
292 /* Numbers of iterations for all exits of the current loop. */
293 hash_map<edge, tree_niter_desc *> *niters;
294
295 /* Number of registers used in it. */
296 unsigned regs_used;
297
298 /* The size of version_info array allocated. */
299 unsigned version_info_size;
300
301 /* The array of information for the ssa names. */
302 struct version_info *version_info;
303
304 /* The hashtable of loop invariant expressions created
305 by ivopt. */
306 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
307
308 /* Loop invariant expression id. */
309 int inv_expr_id;
310
311 /* The bitmap of indices in version_info whose value was changed. */
312 bitmap relevant;
313
314 /* The uses of induction variables. */
315 vec<iv_use *> iv_uses;
316
317 /* The candidates. */
318 vec<iv_cand *> iv_candidates;
319
320 /* A bitmap of important candidates. */
321 bitmap important_candidates;
322
323 /* Cache used by tree_to_aff_combination_expand. */
324 hash_map<tree, name_expansion *> *name_expansion_cache;
325
326 /* The maximum invariant id. */
327 unsigned max_inv_id;
328
329 /* Number of no_overflow BIVs which are not used in memory address. */
330 unsigned bivs_not_used_in_addr;
331
332 /* Obstack for iv structure. */
333 struct obstack iv_obstack;
334
335 /* Whether to consider just related and important candidates when replacing a
336 use. */
337 bool consider_all_candidates;
338
339 /* Are we optimizing for speed? */
340 bool speed;
341
342 /* Whether the loop body includes any function calls. */
343 bool body_includes_call;
344
345 /* Whether the loop body can only be exited via single exit. */
346 bool loop_single_exit_p;
347 };
348
349 /* An assignment of iv candidates to uses. */
350
351 struct iv_ca
352 {
353 /* The number of uses covered by the assignment. */
354 unsigned upto;
355
356 /* Number of uses that cannot be expressed by the candidates in the set. */
357 unsigned bad_uses;
358
359 /* Candidate assigned to a use, together with the related costs. */
360 struct cost_pair **cand_for_use;
361
362 /* Number of times each candidate is used. */
363 unsigned *n_cand_uses;
364
365 /* The candidates used. */
366 bitmap cands;
367
368 /* The number of candidates in the set. */
369 unsigned n_cands;
370
371 /* Total number of registers needed. */
372 unsigned n_regs;
373
374 /* Total cost of expressing uses. */
375 comp_cost cand_use_cost;
376
377 /* Total cost of candidates. */
378 unsigned cand_cost;
379
380 /* Number of times each invariant is used. */
381 unsigned *n_invariant_uses;
382
383 /* The array holding the number of uses of each loop
384 invariant expressions created by ivopt. */
385 unsigned *used_inv_expr;
386
387 /* The number of created loop invariants. */
388 unsigned num_used_inv_expr;
389
390 /* Total cost of the assignment. */
391 comp_cost cost;
392 };
393
394 /* Difference of two iv candidate assignments. */
395
396 struct iv_ca_delta
397 {
398 /* Changed use. */
399 struct iv_use *use;
400
401 /* An old assignment (for rollback purposes). */
402 struct cost_pair *old_cp;
403
404 /* A new assignment. */
405 struct cost_pair *new_cp;
406
407 /* Next change in the list. */
408 struct iv_ca_delta *next_change;
409 };
410
411 /* Bound on number of candidates below that all candidates are considered. */
412
413 #define CONSIDER_ALL_CANDIDATES_BOUND \
414 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
415
416 /* If there are more iv occurrences, we just give up (it is quite unlikely that
417 optimizing such a loop would help, and it would take ages). */
418
419 #define MAX_CONSIDERED_USES \
420 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
421
422 /* If there are at most this number of ivs in the set, try removing unnecessary
423 ivs from the set always. */
424
425 #define ALWAYS_PRUNE_CAND_SET_BOUND \
426 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
427
428 /* The list of trees for that the decl_rtl field must be reset is stored
429 here. */
430
431 static vec<tree> decl_rtl_to_reset;
432
433 static comp_cost force_expr_to_var_cost (tree, bool);
434
435 /* Number of uses recorded in DATA. */
436
437 static inline unsigned
438 n_iv_uses (struct ivopts_data *data)
439 {
440 return data->iv_uses.length ();
441 }
442
443 /* Ith use recorded in DATA. */
444
445 static inline struct iv_use *
446 iv_use (struct ivopts_data *data, unsigned i)
447 {
448 return data->iv_uses[i];
449 }
450
451 /* Number of candidates recorded in DATA. */
452
453 static inline unsigned
454 n_iv_cands (struct ivopts_data *data)
455 {
456 return data->iv_candidates.length ();
457 }
458
459 /* Ith candidate recorded in DATA. */
460
461 static inline struct iv_cand *
462 iv_cand (struct ivopts_data *data, unsigned i)
463 {
464 return data->iv_candidates[i];
465 }
466
467 /* The single loop exit if it dominates the latch, NULL otherwise. */
468
469 edge
470 single_dom_exit (struct loop *loop)
471 {
472 edge exit = single_exit (loop);
473
474 if (!exit)
475 return NULL;
476
477 if (!just_once_each_iteration_p (loop, exit->src))
478 return NULL;
479
480 return exit;
481 }
482
483 /* Dumps information about the induction variable IV to FILE. */
484
485 void
486 dump_iv (FILE *file, struct iv *iv, bool dump_name)
487 {
488 if (iv->ssa_name && dump_name)
489 {
490 fprintf (file, "ssa name ");
491 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
492 fprintf (file, "\n");
493 }
494
495 fprintf (file, " type ");
496 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
497 fprintf (file, "\n");
498
499 if (iv->step)
500 {
501 fprintf (file, " base ");
502 print_generic_expr (file, iv->base, TDF_SLIM);
503 fprintf (file, "\n");
504
505 fprintf (file, " step ");
506 print_generic_expr (file, iv->step, TDF_SLIM);
507 fprintf (file, "\n");
508 }
509 else
510 {
511 fprintf (file, " invariant ");
512 print_generic_expr (file, iv->base, TDF_SLIM);
513 fprintf (file, "\n");
514 }
515
516 if (iv->base_object)
517 {
518 fprintf (file, " base object ");
519 print_generic_expr (file, iv->base_object, TDF_SLIM);
520 fprintf (file, "\n");
521 }
522
523 if (iv->biv_p)
524 fprintf (file, " is a biv\n");
525
526 if (iv->no_overflow)
527 fprintf (file, " iv doesn't overflow wrto loop niter\n");
528 }
529
530 /* Dumps information about the USE to FILE. */
531
532 void
533 dump_use (FILE *file, struct iv_use *use)
534 {
535 fprintf (file, "use %d", use->id);
536 if (use->sub_id)
537 fprintf (file, ".%d", use->sub_id);
538
539 fprintf (file, "\n");
540
541 switch (use->type)
542 {
543 case USE_NONLINEAR_EXPR:
544 fprintf (file, " generic\n");
545 break;
546
547 case USE_ADDRESS:
548 fprintf (file, " address\n");
549 break;
550
551 case USE_COMPARE:
552 fprintf (file, " compare\n");
553 break;
554
555 default:
556 gcc_unreachable ();
557 }
558
559 fprintf (file, " in statement ");
560 print_gimple_stmt (file, use->stmt, 0, 0);
561 fprintf (file, "\n");
562
563 fprintf (file, " at position ");
564 if (use->op_p)
565 print_generic_expr (file, *use->op_p, TDF_SLIM);
566 fprintf (file, "\n");
567
568 dump_iv (file, use->iv, false);
569
570 if (use->related_cands)
571 {
572 fprintf (file, " related candidates ");
573 dump_bitmap (file, use->related_cands);
574 }
575 }
576
577 /* Dumps information about the uses to FILE. */
578
579 void
580 dump_uses (FILE *file, struct ivopts_data *data)
581 {
582 unsigned i;
583 struct iv_use *use;
584
585 for (i = 0; i < n_iv_uses (data); i++)
586 {
587 use = iv_use (data, i);
588 do
589 {
590 dump_use (file, use);
591 use = use->next;
592 }
593 while (use);
594 fprintf (file, "\n");
595 }
596 }
597
598 /* Dumps information about induction variable candidate CAND to FILE. */
599
600 void
601 dump_cand (FILE *file, struct iv_cand *cand)
602 {
603 struct iv *iv = cand->iv;
604
605 fprintf (file, "candidate %d%s\n",
606 cand->id, cand->important ? " (important)" : "");
607
608 if (cand->depends_on)
609 {
610 fprintf (file, " depends on ");
611 dump_bitmap (file, cand->depends_on);
612 }
613
614 if (!iv)
615 {
616 fprintf (file, " final value replacement\n");
617 return;
618 }
619
620 if (cand->var_before)
621 {
622 fprintf (file, " var_before ");
623 print_generic_expr (file, cand->var_before, TDF_SLIM);
624 fprintf (file, "\n");
625 }
626 if (cand->var_after)
627 {
628 fprintf (file, " var_after ");
629 print_generic_expr (file, cand->var_after, TDF_SLIM);
630 fprintf (file, "\n");
631 }
632
633 switch (cand->pos)
634 {
635 case IP_NORMAL:
636 fprintf (file, " incremented before exit test\n");
637 break;
638
639 case IP_BEFORE_USE:
640 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
641 break;
642
643 case IP_AFTER_USE:
644 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
645 break;
646
647 case IP_END:
648 fprintf (file, " incremented at end\n");
649 break;
650
651 case IP_ORIGINAL:
652 fprintf (file, " original biv\n");
653 break;
654 }
655
656 dump_iv (file, iv, false);
657 }
658
659 /* Returns the info for ssa version VER. */
660
661 static inline struct version_info *
662 ver_info (struct ivopts_data *data, unsigned ver)
663 {
664 return data->version_info + ver;
665 }
666
667 /* Returns the info for ssa name NAME. */
668
669 static inline struct version_info *
670 name_info (struct ivopts_data *data, tree name)
671 {
672 return ver_info (data, SSA_NAME_VERSION (name));
673 }
674
675 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
676 emitted in LOOP. */
677
678 static bool
679 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
680 {
681 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
682
683 gcc_assert (bb);
684
685 if (sbb == loop->latch)
686 return true;
687
688 if (sbb != bb)
689 return false;
690
691 return stmt == last_stmt (bb);
692 }
693
694 /* Returns true if STMT if after the place where the original induction
695 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
696 if the positions are identical. */
697
698 static bool
699 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
700 {
701 basic_block cand_bb = gimple_bb (cand->incremented_at);
702 basic_block stmt_bb = gimple_bb (stmt);
703
704 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
705 return false;
706
707 if (stmt_bb != cand_bb)
708 return true;
709
710 if (true_if_equal
711 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
712 return true;
713 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
714 }
715
716 /* Returns true if STMT if after the place where the induction variable
717 CAND is incremented in LOOP. */
718
719 static bool
720 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
721 {
722 switch (cand->pos)
723 {
724 case IP_END:
725 return false;
726
727 case IP_NORMAL:
728 return stmt_after_ip_normal_pos (loop, stmt);
729
730 case IP_ORIGINAL:
731 case IP_AFTER_USE:
732 return stmt_after_inc_pos (cand, stmt, false);
733
734 case IP_BEFORE_USE:
735 return stmt_after_inc_pos (cand, stmt, true);
736
737 default:
738 gcc_unreachable ();
739 }
740 }
741
742 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
743
744 static bool
745 abnormal_ssa_name_p (tree exp)
746 {
747 if (!exp)
748 return false;
749
750 if (TREE_CODE (exp) != SSA_NAME)
751 return false;
752
753 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
754 }
755
756 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
757 abnormal phi node. Callback for for_each_index. */
758
759 static bool
760 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
761 void *data ATTRIBUTE_UNUSED)
762 {
763 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
764 {
765 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
766 return false;
767 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
768 return false;
769 }
770
771 return !abnormal_ssa_name_p (*index);
772 }
773
774 /* Returns true if EXPR contains a ssa name that occurs in an
775 abnormal phi node. */
776
777 bool
778 contains_abnormal_ssa_name_p (tree expr)
779 {
780 enum tree_code code;
781 enum tree_code_class codeclass;
782
783 if (!expr)
784 return false;
785
786 code = TREE_CODE (expr);
787 codeclass = TREE_CODE_CLASS (code);
788
789 if (code == SSA_NAME)
790 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
791
792 if (code == INTEGER_CST
793 || is_gimple_min_invariant (expr))
794 return false;
795
796 if (code == ADDR_EXPR)
797 return !for_each_index (&TREE_OPERAND (expr, 0),
798 idx_contains_abnormal_ssa_name_p,
799 NULL);
800
801 if (code == COND_EXPR)
802 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
803 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
804 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
805
806 switch (codeclass)
807 {
808 case tcc_binary:
809 case tcc_comparison:
810 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
811 return true;
812
813 /* Fallthru. */
814 case tcc_unary:
815 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
816 return true;
817
818 break;
819
820 default:
821 gcc_unreachable ();
822 }
823
824 return false;
825 }
826
827 /* Returns the structure describing number of iterations determined from
828 EXIT of DATA->current_loop, or NULL if something goes wrong. */
829
830 static struct tree_niter_desc *
831 niter_for_exit (struct ivopts_data *data, edge exit)
832 {
833 struct tree_niter_desc *desc;
834 tree_niter_desc **slot;
835
836 if (!data->niters)
837 {
838 data->niters = new hash_map<edge, tree_niter_desc *>;
839 slot = NULL;
840 }
841 else
842 slot = data->niters->get (exit);
843
844 if (!slot)
845 {
846 /* Try to determine number of iterations. We cannot safely work with ssa
847 names that appear in phi nodes on abnormal edges, so that we do not
848 create overlapping life ranges for them (PR 27283). */
849 desc = XNEW (struct tree_niter_desc);
850 if (!number_of_iterations_exit (data->current_loop,
851 exit, desc, true)
852 || contains_abnormal_ssa_name_p (desc->niter))
853 {
854 XDELETE (desc);
855 desc = NULL;
856 }
857 data->niters->put (exit, desc);
858 }
859 else
860 desc = *slot;
861
862 return desc;
863 }
864
865 /* Returns the structure describing number of iterations determined from
866 single dominating exit of DATA->current_loop, or NULL if something
867 goes wrong. */
868
869 static struct tree_niter_desc *
870 niter_for_single_dom_exit (struct ivopts_data *data)
871 {
872 edge exit = single_dom_exit (data->current_loop);
873
874 if (!exit)
875 return NULL;
876
877 return niter_for_exit (data, exit);
878 }
879
880 /* Initializes data structures used by the iv optimization pass, stored
881 in DATA. */
882
883 static void
884 tree_ssa_iv_optimize_init (struct ivopts_data *data)
885 {
886 data->version_info_size = 2 * num_ssa_names;
887 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
888 data->relevant = BITMAP_ALLOC (NULL);
889 data->important_candidates = BITMAP_ALLOC (NULL);
890 data->max_inv_id = 0;
891 data->niters = NULL;
892 data->iv_uses.create (20);
893 data->iv_candidates.create (20);
894 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
895 data->inv_expr_id = 0;
896 data->name_expansion_cache = NULL;
897 decl_rtl_to_reset.create (20);
898 gcc_obstack_init (&data->iv_obstack);
899 }
900
901 /* Returns a memory object to that EXPR points. In case we are able to
902 determine that it does not point to any such object, NULL is returned. */
903
904 static tree
905 determine_base_object (tree expr)
906 {
907 enum tree_code code = TREE_CODE (expr);
908 tree base, obj;
909
910 /* If this is a pointer casted to any type, we need to determine
911 the base object for the pointer; so handle conversions before
912 throwing away non-pointer expressions. */
913 if (CONVERT_EXPR_P (expr))
914 return determine_base_object (TREE_OPERAND (expr, 0));
915
916 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
917 return NULL_TREE;
918
919 switch (code)
920 {
921 case INTEGER_CST:
922 return NULL_TREE;
923
924 case ADDR_EXPR:
925 obj = TREE_OPERAND (expr, 0);
926 base = get_base_address (obj);
927
928 if (!base)
929 return expr;
930
931 if (TREE_CODE (base) == MEM_REF)
932 return determine_base_object (TREE_OPERAND (base, 0));
933
934 return fold_convert (ptr_type_node,
935 build_fold_addr_expr (base));
936
937 case POINTER_PLUS_EXPR:
938 return determine_base_object (TREE_OPERAND (expr, 0));
939
940 case PLUS_EXPR:
941 case MINUS_EXPR:
942 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
943 gcc_unreachable ();
944
945 default:
946 return fold_convert (ptr_type_node, expr);
947 }
948 }
949
950 /* Return true if address expression with non-DECL_P operand appears
951 in EXPR. */
952
953 static bool
954 contain_complex_addr_expr (tree expr)
955 {
956 bool res = false;
957
958 STRIP_NOPS (expr);
959 switch (TREE_CODE (expr))
960 {
961 case POINTER_PLUS_EXPR:
962 case PLUS_EXPR:
963 case MINUS_EXPR:
964 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
965 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
966 break;
967
968 case ADDR_EXPR:
969 return (!DECL_P (TREE_OPERAND (expr, 0)));
970
971 default:
972 return false;
973 }
974
975 return res;
976 }
977
978 /* Allocates an induction variable with given initial value BASE and step STEP
979 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
980
981 static struct iv *
982 alloc_iv (struct ivopts_data *data, tree base, tree step,
983 bool no_overflow = false)
984 {
985 tree expr = base;
986 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
987 sizeof (struct iv));
988 gcc_assert (step != NULL_TREE);
989
990 /* Lower address expression in base except ones with DECL_P as operand.
991 By doing this:
992 1) More accurate cost can be computed for address expressions;
993 2) Duplicate candidates won't be created for bases in different
994 forms, like &a[0] and &a. */
995 STRIP_NOPS (expr);
996 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
997 || contain_complex_addr_expr (expr))
998 {
999 aff_tree comb;
1000 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1001 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1002 }
1003
1004 iv->base = base;
1005 iv->base_object = determine_base_object (base);
1006 iv->step = step;
1007 iv->biv_p = false;
1008 iv->have_use_for = false;
1009 iv->use_id = 0;
1010 iv->ssa_name = NULL_TREE;
1011 iv->no_overflow = no_overflow;
1012 iv->have_address_use = false;
1013
1014 return iv;
1015 }
1016
1017 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1018 doesn't overflow. */
1019
1020 static void
1021 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1022 bool no_overflow)
1023 {
1024 struct version_info *info = name_info (data, iv);
1025
1026 gcc_assert (!info->iv);
1027
1028 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1029 info->iv = alloc_iv (data, base, step, no_overflow);
1030 info->iv->ssa_name = iv;
1031 }
1032
1033 /* Finds induction variable declaration for VAR. */
1034
1035 static struct iv *
1036 get_iv (struct ivopts_data *data, tree var)
1037 {
1038 basic_block bb;
1039 tree type = TREE_TYPE (var);
1040
1041 if (!POINTER_TYPE_P (type)
1042 && !INTEGRAL_TYPE_P (type))
1043 return NULL;
1044
1045 if (!name_info (data, var)->iv)
1046 {
1047 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1048
1049 if (!bb
1050 || !flow_bb_inside_loop_p (data->current_loop, bb))
1051 set_iv (data, var, var, build_int_cst (type, 0), true);
1052 }
1053
1054 return name_info (data, var)->iv;
1055 }
1056
1057 /* Return the first non-invariant ssa var found in EXPR. */
1058
1059 static tree
1060 extract_single_var_from_expr (tree expr)
1061 {
1062 int i, n;
1063 tree tmp;
1064 enum tree_code code;
1065
1066 if (!expr || is_gimple_min_invariant (expr))
1067 return NULL;
1068
1069 code = TREE_CODE (expr);
1070 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1071 {
1072 n = TREE_OPERAND_LENGTH (expr);
1073 for (i = 0; i < n; i++)
1074 {
1075 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1076
1077 if (tmp)
1078 return tmp;
1079 }
1080 }
1081 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1082 }
1083
1084 /* Finds basic ivs. */
1085
1086 static bool
1087 find_bivs (struct ivopts_data *data)
1088 {
1089 gphi *phi;
1090 affine_iv iv;
1091 tree step, type, base, stop;
1092 bool found = false;
1093 struct loop *loop = data->current_loop;
1094 gphi_iterator psi;
1095
1096 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1097 {
1098 phi = psi.phi ();
1099
1100 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1101 continue;
1102
1103 if (virtual_operand_p (PHI_RESULT (phi)))
1104 continue;
1105
1106 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1107 continue;
1108
1109 if (integer_zerop (iv.step))
1110 continue;
1111
1112 step = iv.step;
1113 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1114 /* Stop expanding iv base at the first ssa var referred by iv step.
1115 Ideally we should stop at any ssa var, because that's expensive
1116 and unusual to happen, we just do it on the first one.
1117
1118 See PR64705 for the rationale. */
1119 stop = extract_single_var_from_expr (step);
1120 base = expand_simple_operations (base, stop);
1121 if (contains_abnormal_ssa_name_p (base)
1122 || contains_abnormal_ssa_name_p (step))
1123 continue;
1124
1125 type = TREE_TYPE (PHI_RESULT (phi));
1126 base = fold_convert (type, base);
1127 if (step)
1128 {
1129 if (POINTER_TYPE_P (type))
1130 step = convert_to_ptrofftype (step);
1131 else
1132 step = fold_convert (type, step);
1133 }
1134
1135 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1136 found = true;
1137 }
1138
1139 return found;
1140 }
1141
1142 /* Marks basic ivs. */
1143
1144 static void
1145 mark_bivs (struct ivopts_data *data)
1146 {
1147 gphi *phi;
1148 gimple *def;
1149 tree var;
1150 struct iv *iv, *incr_iv;
1151 struct loop *loop = data->current_loop;
1152 basic_block incr_bb;
1153 gphi_iterator psi;
1154
1155 data->bivs_not_used_in_addr = 0;
1156 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1157 {
1158 phi = psi.phi ();
1159
1160 iv = get_iv (data, PHI_RESULT (phi));
1161 if (!iv)
1162 continue;
1163
1164 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1165 def = SSA_NAME_DEF_STMT (var);
1166 /* Don't mark iv peeled from other one as biv. */
1167 if (def
1168 && gimple_code (def) == GIMPLE_PHI
1169 && gimple_bb (def) == loop->header)
1170 continue;
1171
1172 incr_iv = get_iv (data, var);
1173 if (!incr_iv)
1174 continue;
1175
1176 /* If the increment is in the subloop, ignore it. */
1177 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1178 if (incr_bb->loop_father != data->current_loop
1179 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1180 continue;
1181
1182 iv->biv_p = true;
1183 incr_iv->biv_p = true;
1184 if (iv->no_overflow)
1185 data->bivs_not_used_in_addr++;
1186 if (incr_iv->no_overflow)
1187 data->bivs_not_used_in_addr++;
1188 }
1189 }
1190
1191 /* Checks whether STMT defines a linear induction variable and stores its
1192 parameters to IV. */
1193
1194 static bool
1195 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1196 {
1197 tree lhs, stop;
1198 struct loop *loop = data->current_loop;
1199
1200 iv->base = NULL_TREE;
1201 iv->step = NULL_TREE;
1202
1203 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1204 return false;
1205
1206 lhs = gimple_assign_lhs (stmt);
1207 if (TREE_CODE (lhs) != SSA_NAME)
1208 return false;
1209
1210 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1211 return false;
1212
1213 /* Stop expanding iv base at the first ssa var referred by iv step.
1214 Ideally we should stop at any ssa var, because that's expensive
1215 and unusual to happen, we just do it on the first one.
1216
1217 See PR64705 for the rationale. */
1218 stop = extract_single_var_from_expr (iv->step);
1219 iv->base = expand_simple_operations (iv->base, stop);
1220 if (contains_abnormal_ssa_name_p (iv->base)
1221 || contains_abnormal_ssa_name_p (iv->step))
1222 return false;
1223
1224 /* If STMT could throw, then do not consider STMT as defining a GIV.
1225 While this will suppress optimizations, we can not safely delete this
1226 GIV and associated statements, even if it appears it is not used. */
1227 if (stmt_could_throw_p (stmt))
1228 return false;
1229
1230 return true;
1231 }
1232
1233 /* Finds general ivs in statement STMT. */
1234
1235 static void
1236 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1237 {
1238 affine_iv iv;
1239
1240 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1241 return;
1242
1243 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1244 }
1245
1246 /* Finds general ivs in basic block BB. */
1247
1248 static void
1249 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1250 {
1251 gimple_stmt_iterator bsi;
1252
1253 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1254 find_givs_in_stmt (data, gsi_stmt (bsi));
1255 }
1256
1257 /* Finds general ivs. */
1258
1259 static void
1260 find_givs (struct ivopts_data *data)
1261 {
1262 struct loop *loop = data->current_loop;
1263 basic_block *body = get_loop_body_in_dom_order (loop);
1264 unsigned i;
1265
1266 for (i = 0; i < loop->num_nodes; i++)
1267 find_givs_in_bb (data, body[i]);
1268 free (body);
1269 }
1270
1271 /* For each ssa name defined in LOOP determines whether it is an induction
1272 variable and if so, its initial value and step. */
1273
1274 static bool
1275 find_induction_variables (struct ivopts_data *data)
1276 {
1277 unsigned i;
1278 bitmap_iterator bi;
1279
1280 if (!find_bivs (data))
1281 return false;
1282
1283 find_givs (data);
1284 mark_bivs (data);
1285
1286 if (dump_file && (dump_flags & TDF_DETAILS))
1287 {
1288 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1289
1290 if (niter)
1291 {
1292 fprintf (dump_file, " number of iterations ");
1293 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1294 if (!integer_zerop (niter->may_be_zero))
1295 {
1296 fprintf (dump_file, "; zero if ");
1297 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1298 }
1299 fprintf (dump_file, "\n\n");
1300 };
1301
1302 fprintf (dump_file, "Induction variables:\n\n");
1303
1304 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1305 {
1306 if (ver_info (data, i)->iv)
1307 dump_iv (dump_file, ver_info (data, i)->iv, true);
1308 }
1309 }
1310
1311 return true;
1312 }
1313
1314 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1315 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1316 is the const offset stripped from IV base. For uses of other types,
1317 ADDR_BASE and ADDR_OFFSET are zero by default. */
1318
1319 static struct iv_use *
1320 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1321 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1322 unsigned HOST_WIDE_INT addr_offset = 0)
1323 {
1324 struct iv_use *use = XCNEW (struct iv_use);
1325
1326 use->id = n_iv_uses (data);
1327 use->sub_id = 0;
1328 use->type = use_type;
1329 use->iv = iv;
1330 use->stmt = stmt;
1331 use->op_p = use_p;
1332 use->related_cands = BITMAP_ALLOC (NULL);
1333 use->next = NULL;
1334 use->addr_base = addr_base;
1335 use->addr_offset = addr_offset;
1336
1337 data->iv_uses.safe_push (use);
1338
1339 return use;
1340 }
1341
1342 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1343 The sub use is recorded under the one whose use id is ID_GROUP. */
1344
1345 static struct iv_use *
1346 record_sub_use (struct ivopts_data *data, tree *use_p,
1347 struct iv *iv, gimple *stmt, enum use_type use_type,
1348 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1349 unsigned int id_group)
1350 {
1351 struct iv_use *use = XCNEW (struct iv_use);
1352 struct iv_use *group = iv_use (data, id_group);
1353
1354 use->id = group->id;
1355 use->sub_id = 0;
1356 use->type = use_type;
1357 use->iv = iv;
1358 use->stmt = stmt;
1359 use->op_p = use_p;
1360 use->related_cands = NULL;
1361 use->addr_base = addr_base;
1362 use->addr_offset = addr_offset;
1363
1364 /* Sub use list is maintained in offset ascending order. */
1365 if (addr_offset <= group->addr_offset)
1366 {
1367 use->related_cands = group->related_cands;
1368 group->related_cands = NULL;
1369 use->next = group;
1370 data->iv_uses[id_group] = use;
1371 }
1372 else
1373 {
1374 struct iv_use *pre;
1375 do
1376 {
1377 pre = group;
1378 group = group->next;
1379 }
1380 while (group && addr_offset > group->addr_offset);
1381 use->next = pre->next;
1382 pre->next = use;
1383 }
1384
1385 return use;
1386 }
1387
1388 /* Checks whether OP is a loop-level invariant and if so, records it.
1389 NONLINEAR_USE is true if the invariant is used in a way we do not
1390 handle specially. */
1391
1392 static void
1393 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1394 {
1395 basic_block bb;
1396 struct version_info *info;
1397
1398 if (TREE_CODE (op) != SSA_NAME
1399 || virtual_operand_p (op))
1400 return;
1401
1402 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1403 if (bb
1404 && flow_bb_inside_loop_p (data->current_loop, bb))
1405 return;
1406
1407 info = name_info (data, op);
1408 info->name = op;
1409 info->has_nonlin_use |= nonlinear_use;
1410 if (!info->inv_id)
1411 info->inv_id = ++data->max_inv_id;
1412 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1413 }
1414
1415 /* Checks whether the use OP is interesting and if so, records it. */
1416
1417 static struct iv_use *
1418 find_interesting_uses_op (struct ivopts_data *data, tree op)
1419 {
1420 struct iv *iv;
1421 gimple *stmt;
1422 struct iv_use *use;
1423
1424 if (TREE_CODE (op) != SSA_NAME)
1425 return NULL;
1426
1427 iv = get_iv (data, op);
1428 if (!iv)
1429 return NULL;
1430
1431 if (iv->have_use_for)
1432 {
1433 use = iv_use (data, iv->use_id);
1434
1435 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1436 return use;
1437 }
1438
1439 if (integer_zerop (iv->step))
1440 {
1441 record_invariant (data, op, true);
1442 return NULL;
1443 }
1444 iv->have_use_for = true;
1445
1446 stmt = SSA_NAME_DEF_STMT (op);
1447 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1448 || is_gimple_assign (stmt));
1449
1450 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1451 iv->use_id = use->id;
1452
1453 return use;
1454 }
1455
1456 /* Given a condition in statement STMT, checks whether it is a compare
1457 of an induction variable and an invariant. If this is the case,
1458 CONTROL_VAR is set to location of the iv, BOUND to the location of
1459 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1460 induction variable descriptions, and true is returned. If this is not
1461 the case, CONTROL_VAR and BOUND are set to the arguments of the
1462 condition and false is returned. */
1463
1464 static bool
1465 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1466 tree **control_var, tree **bound,
1467 struct iv **iv_var, struct iv **iv_bound)
1468 {
1469 /* The objects returned when COND has constant operands. */
1470 static struct iv const_iv;
1471 static tree zero;
1472 tree *op0 = &zero, *op1 = &zero;
1473 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1474 bool ret = false;
1475
1476 if (gimple_code (stmt) == GIMPLE_COND)
1477 {
1478 gcond *cond_stmt = as_a <gcond *> (stmt);
1479 op0 = gimple_cond_lhs_ptr (cond_stmt);
1480 op1 = gimple_cond_rhs_ptr (cond_stmt);
1481 }
1482 else
1483 {
1484 op0 = gimple_assign_rhs1_ptr (stmt);
1485 op1 = gimple_assign_rhs2_ptr (stmt);
1486 }
1487
1488 zero = integer_zero_node;
1489 const_iv.step = integer_zero_node;
1490
1491 if (TREE_CODE (*op0) == SSA_NAME)
1492 iv0 = get_iv (data, *op0);
1493 if (TREE_CODE (*op1) == SSA_NAME)
1494 iv1 = get_iv (data, *op1);
1495
1496 /* Exactly one of the compared values must be an iv, and the other one must
1497 be an invariant. */
1498 if (!iv0 || !iv1)
1499 goto end;
1500
1501 if (integer_zerop (iv0->step))
1502 {
1503 /* Control variable may be on the other side. */
1504 std::swap (op0, op1);
1505 std::swap (iv0, iv1);
1506 }
1507 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1508
1509 end:
1510 if (control_var)
1511 *control_var = op0;
1512 if (iv_var)
1513 *iv_var = iv0;
1514 if (bound)
1515 *bound = op1;
1516 if (iv_bound)
1517 *iv_bound = iv1;
1518
1519 return ret;
1520 }
1521
1522 /* Checks whether the condition in STMT is interesting and if so,
1523 records it. */
1524
1525 static void
1526 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1527 {
1528 tree *var_p, *bound_p;
1529 struct iv *var_iv;
1530
1531 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1532 {
1533 find_interesting_uses_op (data, *var_p);
1534 find_interesting_uses_op (data, *bound_p);
1535 return;
1536 }
1537
1538 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1539 }
1540
1541 /* Returns the outermost loop EXPR is obviously invariant in
1542 relative to the loop LOOP, i.e. if all its operands are defined
1543 outside of the returned loop. Returns NULL if EXPR is not
1544 even obviously invariant in LOOP. */
1545
1546 struct loop *
1547 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1548 {
1549 basic_block def_bb;
1550 unsigned i, len;
1551
1552 if (is_gimple_min_invariant (expr))
1553 return current_loops->tree_root;
1554
1555 if (TREE_CODE (expr) == SSA_NAME)
1556 {
1557 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1558 if (def_bb)
1559 {
1560 if (flow_bb_inside_loop_p (loop, def_bb))
1561 return NULL;
1562 return superloop_at_depth (loop,
1563 loop_depth (def_bb->loop_father) + 1);
1564 }
1565
1566 return current_loops->tree_root;
1567 }
1568
1569 if (!EXPR_P (expr))
1570 return NULL;
1571
1572 unsigned maxdepth = 0;
1573 len = TREE_OPERAND_LENGTH (expr);
1574 for (i = 0; i < len; i++)
1575 {
1576 struct loop *ivloop;
1577 if (!TREE_OPERAND (expr, i))
1578 continue;
1579
1580 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1581 if (!ivloop)
1582 return NULL;
1583 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1584 }
1585
1586 return superloop_at_depth (loop, maxdepth);
1587 }
1588
1589 /* Returns true if expression EXPR is obviously invariant in LOOP,
1590 i.e. if all its operands are defined outside of the LOOP. LOOP
1591 should not be the function body. */
1592
1593 bool
1594 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1595 {
1596 basic_block def_bb;
1597 unsigned i, len;
1598
1599 gcc_assert (loop_depth (loop) > 0);
1600
1601 if (is_gimple_min_invariant (expr))
1602 return true;
1603
1604 if (TREE_CODE (expr) == SSA_NAME)
1605 {
1606 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1607 if (def_bb
1608 && flow_bb_inside_loop_p (loop, def_bb))
1609 return false;
1610
1611 return true;
1612 }
1613
1614 if (!EXPR_P (expr))
1615 return false;
1616
1617 len = TREE_OPERAND_LENGTH (expr);
1618 for (i = 0; i < len; i++)
1619 if (TREE_OPERAND (expr, i)
1620 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1621 return false;
1622
1623 return true;
1624 }
1625
1626 /* Given expression EXPR which computes inductive values with respect
1627 to loop recorded in DATA, this function returns biv from which EXPR
1628 is derived by tracing definition chains of ssa variables in EXPR. */
1629
1630 static struct iv*
1631 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1632 {
1633 struct iv *iv;
1634 unsigned i, n;
1635 tree e2, e1;
1636 enum tree_code code;
1637 gimple *stmt;
1638
1639 if (expr == NULL_TREE)
1640 return NULL;
1641
1642 if (is_gimple_min_invariant (expr))
1643 return NULL;
1644
1645 code = TREE_CODE (expr);
1646 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1647 {
1648 n = TREE_OPERAND_LENGTH (expr);
1649 for (i = 0; i < n; i++)
1650 {
1651 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1652 if (iv)
1653 return iv;
1654 }
1655 }
1656
1657 /* Stop if it's not ssa name. */
1658 if (code != SSA_NAME)
1659 return NULL;
1660
1661 iv = get_iv (data, expr);
1662 if (!iv || integer_zerop (iv->step))
1663 return NULL;
1664 else if (iv->biv_p)
1665 return iv;
1666
1667 stmt = SSA_NAME_DEF_STMT (expr);
1668 if (gphi *phi = dyn_cast <gphi *> (stmt))
1669 {
1670 ssa_op_iter iter;
1671 use_operand_p use_p;
1672
1673 if (virtual_operand_p (gimple_phi_result (phi)))
1674 return NULL;
1675
1676 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1677 {
1678 tree use = USE_FROM_PTR (use_p);
1679 iv = find_deriving_biv_for_expr (data, use);
1680 if (iv)
1681 return iv;
1682 }
1683 return NULL;
1684 }
1685 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1686 return NULL;
1687
1688 e1 = gimple_assign_rhs1 (stmt);
1689 code = gimple_assign_rhs_code (stmt);
1690 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1691 return find_deriving_biv_for_expr (data, e1);
1692
1693 switch (code)
1694 {
1695 case MULT_EXPR:
1696 case PLUS_EXPR:
1697 case MINUS_EXPR:
1698 case POINTER_PLUS_EXPR:
1699 /* Increments, decrements and multiplications by a constant
1700 are simple. */
1701 e2 = gimple_assign_rhs2 (stmt);
1702 iv = find_deriving_biv_for_expr (data, e2);
1703 if (iv)
1704 return iv;
1705
1706 /* Fallthru. */
1707 CASE_CONVERT:
1708 /* Casts are simple. */
1709 return find_deriving_biv_for_expr (data, e1);
1710
1711 default:
1712 break;
1713 }
1714
1715 return NULL;
1716 }
1717
1718 /* Record BIV, its predecessor and successor that they are used in
1719 address type uses. */
1720
1721 static void
1722 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1723 {
1724 unsigned i;
1725 tree type, base_1, base_2;
1726 bitmap_iterator bi;
1727
1728 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1729 || biv->have_address_use || !biv->no_overflow)
1730 return;
1731
1732 type = TREE_TYPE (biv->base);
1733 if (!INTEGRAL_TYPE_P (type))
1734 return;
1735
1736 biv->have_address_use = true;
1737 data->bivs_not_used_in_addr--;
1738 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1739 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1740 {
1741 struct iv *iv = ver_info (data, i)->iv;
1742
1743 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1744 || iv->have_address_use || !iv->no_overflow)
1745 continue;
1746
1747 if (type != TREE_TYPE (iv->base)
1748 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1749 continue;
1750
1751 if (!operand_equal_p (biv->step, iv->step, 0))
1752 continue;
1753
1754 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1755 if (operand_equal_p (base_1, iv->base, 0)
1756 || operand_equal_p (base_2, biv->base, 0))
1757 {
1758 iv->have_address_use = true;
1759 data->bivs_not_used_in_addr--;
1760 }
1761 }
1762 }
1763
1764 /* Cumulates the steps of indices into DATA and replaces their values with the
1765 initial ones. Returns false when the value of the index cannot be determined.
1766 Callback for for_each_index. */
1767
1768 struct ifs_ivopts_data
1769 {
1770 struct ivopts_data *ivopts_data;
1771 gimple *stmt;
1772 tree step;
1773 };
1774
1775 static bool
1776 idx_find_step (tree base, tree *idx, void *data)
1777 {
1778 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1779 struct iv *iv;
1780 bool use_overflow_semantics = false;
1781 tree step, iv_base, iv_step, lbound, off;
1782 struct loop *loop = dta->ivopts_data->current_loop;
1783
1784 /* If base is a component ref, require that the offset of the reference
1785 be invariant. */
1786 if (TREE_CODE (base) == COMPONENT_REF)
1787 {
1788 off = component_ref_field_offset (base);
1789 return expr_invariant_in_loop_p (loop, off);
1790 }
1791
1792 /* If base is array, first check whether we will be able to move the
1793 reference out of the loop (in order to take its address in strength
1794 reduction). In order for this to work we need both lower bound
1795 and step to be loop invariants. */
1796 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1797 {
1798 /* Moreover, for a range, the size needs to be invariant as well. */
1799 if (TREE_CODE (base) == ARRAY_RANGE_REF
1800 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1801 return false;
1802
1803 step = array_ref_element_size (base);
1804 lbound = array_ref_low_bound (base);
1805
1806 if (!expr_invariant_in_loop_p (loop, step)
1807 || !expr_invariant_in_loop_p (loop, lbound))
1808 return false;
1809 }
1810
1811 if (TREE_CODE (*idx) != SSA_NAME)
1812 return true;
1813
1814 iv = get_iv (dta->ivopts_data, *idx);
1815 if (!iv)
1816 return false;
1817
1818 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1819 *&x[0], which is not folded and does not trigger the
1820 ARRAY_REF path below. */
1821 *idx = iv->base;
1822
1823 if (integer_zerop (iv->step))
1824 return true;
1825
1826 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1827 {
1828 step = array_ref_element_size (base);
1829
1830 /* We only handle addresses whose step is an integer constant. */
1831 if (TREE_CODE (step) != INTEGER_CST)
1832 return false;
1833 }
1834 else
1835 /* The step for pointer arithmetics already is 1 byte. */
1836 step = size_one_node;
1837
1838 iv_base = iv->base;
1839 iv_step = iv->step;
1840 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1841 use_overflow_semantics = true;
1842
1843 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1844 sizetype, &iv_base, &iv_step, dta->stmt,
1845 use_overflow_semantics))
1846 {
1847 /* The index might wrap. */
1848 return false;
1849 }
1850
1851 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1852 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1853
1854 if (dta->ivopts_data->bivs_not_used_in_addr)
1855 {
1856 if (!iv->biv_p)
1857 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1858
1859 record_biv_for_address_use (dta->ivopts_data, iv);
1860 }
1861 return true;
1862 }
1863
1864 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1865 object is passed to it in DATA. */
1866
1867 static bool
1868 idx_record_use (tree base, tree *idx,
1869 void *vdata)
1870 {
1871 struct ivopts_data *data = (struct ivopts_data *) vdata;
1872 find_interesting_uses_op (data, *idx);
1873 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1874 {
1875 find_interesting_uses_op (data, array_ref_element_size (base));
1876 find_interesting_uses_op (data, array_ref_low_bound (base));
1877 }
1878 return true;
1879 }
1880
1881 /* If we can prove that TOP = cst * BOT for some constant cst,
1882 store cst to MUL and return true. Otherwise return false.
1883 The returned value is always sign-extended, regardless of the
1884 signedness of TOP and BOT. */
1885
1886 static bool
1887 constant_multiple_of (tree top, tree bot, widest_int *mul)
1888 {
1889 tree mby;
1890 enum tree_code code;
1891 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1892 widest_int res, p0, p1;
1893
1894 STRIP_NOPS (top);
1895 STRIP_NOPS (bot);
1896
1897 if (operand_equal_p (top, bot, 0))
1898 {
1899 *mul = 1;
1900 return true;
1901 }
1902
1903 code = TREE_CODE (top);
1904 switch (code)
1905 {
1906 case MULT_EXPR:
1907 mby = TREE_OPERAND (top, 1);
1908 if (TREE_CODE (mby) != INTEGER_CST)
1909 return false;
1910
1911 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1912 return false;
1913
1914 *mul = wi::sext (res * wi::to_widest (mby), precision);
1915 return true;
1916
1917 case PLUS_EXPR:
1918 case MINUS_EXPR:
1919 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1920 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1921 return false;
1922
1923 if (code == MINUS_EXPR)
1924 p1 = -p1;
1925 *mul = wi::sext (p0 + p1, precision);
1926 return true;
1927
1928 case INTEGER_CST:
1929 if (TREE_CODE (bot) != INTEGER_CST)
1930 return false;
1931
1932 p0 = widest_int::from (top, SIGNED);
1933 p1 = widest_int::from (bot, SIGNED);
1934 if (p1 == 0)
1935 return false;
1936 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1937 return res == 0;
1938
1939 default:
1940 return false;
1941 }
1942 }
1943
1944 /* Return true if memory reference REF with step STEP may be unaligned. */
1945
1946 static bool
1947 may_be_unaligned_p (tree ref, tree step)
1948 {
1949 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1950 thus they are not misaligned. */
1951 if (TREE_CODE (ref) == TARGET_MEM_REF)
1952 return false;
1953
1954 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1955 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1956 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1957
1958 unsigned HOST_WIDE_INT bitpos;
1959 unsigned int ref_align;
1960 get_object_alignment_1 (ref, &ref_align, &bitpos);
1961 if (ref_align < align
1962 || (bitpos % align) != 0
1963 || (bitpos % BITS_PER_UNIT) != 0)
1964 return true;
1965
1966 unsigned int trailing_zeros = tree_ctz (step);
1967 if (trailing_zeros < HOST_BITS_PER_INT
1968 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1969 return true;
1970
1971 return false;
1972 }
1973
1974 /* Return true if EXPR may be non-addressable. */
1975
1976 bool
1977 may_be_nonaddressable_p (tree expr)
1978 {
1979 switch (TREE_CODE (expr))
1980 {
1981 case TARGET_MEM_REF:
1982 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1983 target, thus they are always addressable. */
1984 return false;
1985
1986 case MEM_REF:
1987 /* Likewise for MEM_REFs, modulo the storage order. */
1988 return REF_REVERSE_STORAGE_ORDER (expr);
1989
1990 case BIT_FIELD_REF:
1991 if (REF_REVERSE_STORAGE_ORDER (expr))
1992 return true;
1993 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1994
1995 case COMPONENT_REF:
1996 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1997 return true;
1998 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1999 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2000
2001 case ARRAY_REF:
2002 case ARRAY_RANGE_REF:
2003 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2004 return true;
2005 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2006
2007 case VIEW_CONVERT_EXPR:
2008 /* This kind of view-conversions may wrap non-addressable objects
2009 and make them look addressable. After some processing the
2010 non-addressability may be uncovered again, causing ADDR_EXPRs
2011 of inappropriate objects to be built. */
2012 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2013 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2014 return true;
2015 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2016
2017 CASE_CONVERT:
2018 return true;
2019
2020 default:
2021 break;
2022 }
2023
2024 return false;
2025 }
2026
2027 static tree
2028 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2029
2030 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2031 If there is an existing use which has same stripped iv base and step,
2032 this function records this one as a sub use to that; otherwise records
2033 it as a normal one. */
2034
2035 static struct iv_use *
2036 record_group_use (struct ivopts_data *data, tree *use_p,
2037 struct iv *iv, gimple *stmt, enum use_type use_type)
2038 {
2039 unsigned int i;
2040 struct iv_use *use;
2041 tree addr_base;
2042 unsigned HOST_WIDE_INT addr_offset;
2043
2044 /* Only support sub use for address type uses, that is, with base
2045 object. */
2046 if (!iv->base_object)
2047 return record_use (data, use_p, iv, stmt, use_type);
2048
2049 addr_base = strip_offset (iv->base, &addr_offset);
2050 for (i = 0; i < n_iv_uses (data); i++)
2051 {
2052 use = iv_use (data, i);
2053 if (use->type != USE_ADDRESS || !use->iv->base_object)
2054 continue;
2055
2056 /* Check if it has the same stripped base and step. */
2057 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2058 && operand_equal_p (iv->step, use->iv->step, 0)
2059 && operand_equal_p (addr_base, use->addr_base, 0))
2060 break;
2061 }
2062
2063 if (i == n_iv_uses (data))
2064 return record_use (data, use_p, iv, stmt,
2065 use_type, addr_base, addr_offset);
2066 else
2067 return record_sub_use (data, use_p, iv, stmt,
2068 use_type, addr_base, addr_offset, i);
2069 }
2070
2071 /* Finds addresses in *OP_P inside STMT. */
2072
2073 static void
2074 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2075 tree *op_p)
2076 {
2077 tree base = *op_p, step = size_zero_node;
2078 struct iv *civ;
2079 struct ifs_ivopts_data ifs_ivopts_data;
2080
2081 /* Do not play with volatile memory references. A bit too conservative,
2082 perhaps, but safe. */
2083 if (gimple_has_volatile_ops (stmt))
2084 goto fail;
2085
2086 /* Ignore bitfields for now. Not really something terribly complicated
2087 to handle. TODO. */
2088 if (TREE_CODE (base) == BIT_FIELD_REF)
2089 goto fail;
2090
2091 base = unshare_expr (base);
2092
2093 if (TREE_CODE (base) == TARGET_MEM_REF)
2094 {
2095 tree type = build_pointer_type (TREE_TYPE (base));
2096 tree astep;
2097
2098 if (TMR_BASE (base)
2099 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2100 {
2101 civ = get_iv (data, TMR_BASE (base));
2102 if (!civ)
2103 goto fail;
2104
2105 TMR_BASE (base) = civ->base;
2106 step = civ->step;
2107 }
2108 if (TMR_INDEX2 (base)
2109 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2110 {
2111 civ = get_iv (data, TMR_INDEX2 (base));
2112 if (!civ)
2113 goto fail;
2114
2115 TMR_INDEX2 (base) = civ->base;
2116 step = civ->step;
2117 }
2118 if (TMR_INDEX (base)
2119 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2120 {
2121 civ = get_iv (data, TMR_INDEX (base));
2122 if (!civ)
2123 goto fail;
2124
2125 TMR_INDEX (base) = civ->base;
2126 astep = civ->step;
2127
2128 if (astep)
2129 {
2130 if (TMR_STEP (base))
2131 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2132
2133 step = fold_build2 (PLUS_EXPR, type, step, astep);
2134 }
2135 }
2136
2137 if (integer_zerop (step))
2138 goto fail;
2139 base = tree_mem_ref_addr (type, base);
2140 }
2141 else
2142 {
2143 ifs_ivopts_data.ivopts_data = data;
2144 ifs_ivopts_data.stmt = stmt;
2145 ifs_ivopts_data.step = size_zero_node;
2146 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2147 || integer_zerop (ifs_ivopts_data.step))
2148 goto fail;
2149 step = ifs_ivopts_data.step;
2150
2151 /* Check that the base expression is addressable. This needs
2152 to be done after substituting bases of IVs into it. */
2153 if (may_be_nonaddressable_p (base))
2154 goto fail;
2155
2156 /* Moreover, on strict alignment platforms, check that it is
2157 sufficiently aligned. */
2158 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2159 goto fail;
2160
2161 base = build_fold_addr_expr (base);
2162
2163 /* Substituting bases of IVs into the base expression might
2164 have caused folding opportunities. */
2165 if (TREE_CODE (base) == ADDR_EXPR)
2166 {
2167 tree *ref = &TREE_OPERAND (base, 0);
2168 while (handled_component_p (*ref))
2169 ref = &TREE_OPERAND (*ref, 0);
2170 if (TREE_CODE (*ref) == MEM_REF)
2171 {
2172 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2173 TREE_OPERAND (*ref, 0),
2174 TREE_OPERAND (*ref, 1));
2175 if (tem)
2176 *ref = tem;
2177 }
2178 }
2179 }
2180
2181 civ = alloc_iv (data, base, step);
2182 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2183 return;
2184
2185 fail:
2186 for_each_index (op_p, idx_record_use, data);
2187 }
2188
2189 /* Finds and records invariants used in STMT. */
2190
2191 static void
2192 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2193 {
2194 ssa_op_iter iter;
2195 use_operand_p use_p;
2196 tree op;
2197
2198 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2199 {
2200 op = USE_FROM_PTR (use_p);
2201 record_invariant (data, op, false);
2202 }
2203 }
2204
2205 /* Finds interesting uses of induction variables in the statement STMT. */
2206
2207 static void
2208 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2209 {
2210 struct iv *iv;
2211 tree op, *lhs, *rhs;
2212 ssa_op_iter iter;
2213 use_operand_p use_p;
2214 enum tree_code code;
2215
2216 find_invariants_stmt (data, stmt);
2217
2218 if (gimple_code (stmt) == GIMPLE_COND)
2219 {
2220 find_interesting_uses_cond (data, stmt);
2221 return;
2222 }
2223
2224 if (is_gimple_assign (stmt))
2225 {
2226 lhs = gimple_assign_lhs_ptr (stmt);
2227 rhs = gimple_assign_rhs1_ptr (stmt);
2228
2229 if (TREE_CODE (*lhs) == SSA_NAME)
2230 {
2231 /* If the statement defines an induction variable, the uses are not
2232 interesting by themselves. */
2233
2234 iv = get_iv (data, *lhs);
2235
2236 if (iv && !integer_zerop (iv->step))
2237 return;
2238 }
2239
2240 code = gimple_assign_rhs_code (stmt);
2241 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2242 && (REFERENCE_CLASS_P (*rhs)
2243 || is_gimple_val (*rhs)))
2244 {
2245 if (REFERENCE_CLASS_P (*rhs))
2246 find_interesting_uses_address (data, stmt, rhs);
2247 else
2248 find_interesting_uses_op (data, *rhs);
2249
2250 if (REFERENCE_CLASS_P (*lhs))
2251 find_interesting_uses_address (data, stmt, lhs);
2252 return;
2253 }
2254 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2255 {
2256 find_interesting_uses_cond (data, stmt);
2257 return;
2258 }
2259
2260 /* TODO -- we should also handle address uses of type
2261
2262 memory = call (whatever);
2263
2264 and
2265
2266 call (memory). */
2267 }
2268
2269 if (gimple_code (stmt) == GIMPLE_PHI
2270 && gimple_bb (stmt) == data->current_loop->header)
2271 {
2272 iv = get_iv (data, PHI_RESULT (stmt));
2273
2274 if (iv && !integer_zerop (iv->step))
2275 return;
2276 }
2277
2278 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2279 {
2280 op = USE_FROM_PTR (use_p);
2281
2282 if (TREE_CODE (op) != SSA_NAME)
2283 continue;
2284
2285 iv = get_iv (data, op);
2286 if (!iv)
2287 continue;
2288
2289 find_interesting_uses_op (data, op);
2290 }
2291 }
2292
2293 /* Finds interesting uses of induction variables outside of loops
2294 on loop exit edge EXIT. */
2295
2296 static void
2297 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2298 {
2299 gphi *phi;
2300 gphi_iterator psi;
2301 tree def;
2302
2303 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2304 {
2305 phi = psi.phi ();
2306 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2307 if (!virtual_operand_p (def))
2308 find_interesting_uses_op (data, def);
2309 }
2310 }
2311
2312 /* Finds uses of the induction variables that are interesting. */
2313
2314 static void
2315 find_interesting_uses (struct ivopts_data *data)
2316 {
2317 basic_block bb;
2318 gimple_stmt_iterator bsi;
2319 basic_block *body = get_loop_body (data->current_loop);
2320 unsigned i;
2321 struct version_info *info;
2322 edge e;
2323
2324 if (dump_file && (dump_flags & TDF_DETAILS))
2325 fprintf (dump_file, "Uses:\n\n");
2326
2327 for (i = 0; i < data->current_loop->num_nodes; i++)
2328 {
2329 edge_iterator ei;
2330 bb = body[i];
2331
2332 FOR_EACH_EDGE (e, ei, bb->succs)
2333 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2334 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2335 find_interesting_uses_outside (data, e);
2336
2337 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2339 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2340 if (!is_gimple_debug (gsi_stmt (bsi)))
2341 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2342 }
2343
2344 if (dump_file && (dump_flags & TDF_DETAILS))
2345 {
2346 bitmap_iterator bi;
2347
2348 fprintf (dump_file, "\n");
2349
2350 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2351 {
2352 info = ver_info (data, i);
2353 if (info->inv_id)
2354 {
2355 fprintf (dump_file, " ");
2356 print_generic_expr (dump_file, info->name, TDF_SLIM);
2357 fprintf (dump_file, " is invariant (%d)%s\n",
2358 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2359 }
2360 }
2361
2362 fprintf (dump_file, "\n");
2363 }
2364
2365 free (body);
2366 }
2367
2368 /* Compute maximum offset of [base + offset] addressing mode
2369 for memory reference represented by USE. */
2370
2371 static HOST_WIDE_INT
2372 compute_max_addr_offset (struct iv_use *use)
2373 {
2374 int width;
2375 rtx reg, addr;
2376 HOST_WIDE_INT i, off;
2377 unsigned list_index, num;
2378 addr_space_t as;
2379 machine_mode mem_mode, addr_mode;
2380 static vec<HOST_WIDE_INT> max_offset_list;
2381
2382 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2383 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2384
2385 num = max_offset_list.length ();
2386 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2387 if (list_index >= num)
2388 {
2389 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2390 for (; num < max_offset_list.length (); num++)
2391 max_offset_list[num] = -1;
2392 }
2393
2394 off = max_offset_list[list_index];
2395 if (off != -1)
2396 return off;
2397
2398 addr_mode = targetm.addr_space.address_mode (as);
2399 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2400 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2401
2402 width = GET_MODE_BITSIZE (addr_mode) - 1;
2403 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2404 width = HOST_BITS_PER_WIDE_INT - 1;
2405
2406 for (i = width; i > 0; i--)
2407 {
2408 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2409 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2410 if (memory_address_addr_space_p (mem_mode, addr, as))
2411 break;
2412
2413 /* For some strict-alignment targets, the offset must be naturally
2414 aligned. Try an aligned offset if mem_mode is not QImode. */
2415 off = ((unsigned HOST_WIDE_INT) 1 << i);
2416 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2417 {
2418 off -= GET_MODE_SIZE (mem_mode);
2419 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2420 if (memory_address_addr_space_p (mem_mode, addr, as))
2421 break;
2422 }
2423 }
2424 if (i == 0)
2425 off = 0;
2426
2427 max_offset_list[list_index] = off;
2428 return off;
2429 }
2430
2431 /* Check if all small groups should be split. Return true if and
2432 only if:
2433
2434 1) At least one groups contain two uses with different offsets.
2435 2) No group contains more than two uses with different offsets.
2436
2437 Return false otherwise. We want to split such groups because:
2438
2439 1) Small groups don't have much benefit and may interfer with
2440 general candidate selection.
2441 2) Size for problem with only small groups is usually small and
2442 general algorithm can handle it well.
2443
2444 TODO -- Above claim may not hold when auto increment is supported. */
2445
2446 static bool
2447 split_all_small_groups (struct ivopts_data *data)
2448 {
2449 bool split_p = false;
2450 unsigned int i, n, distinct;
2451 struct iv_use *pre, *use;
2452
2453 n = n_iv_uses (data);
2454 for (i = 0; i < n; i++)
2455 {
2456 use = iv_use (data, i);
2457 if (!use->next)
2458 continue;
2459
2460 distinct = 1;
2461 gcc_assert (use->type == USE_ADDRESS);
2462 for (pre = use, use = use->next; use; pre = use, use = use->next)
2463 {
2464 if (pre->addr_offset != use->addr_offset)
2465 distinct++;
2466
2467 if (distinct > 2)
2468 return false;
2469 }
2470 if (distinct == 2)
2471 split_p = true;
2472 }
2473
2474 return split_p;
2475 }
2476
2477 /* For each group of address type uses, this function further groups
2478 these uses according to the maximum offset supported by target's
2479 [base + offset] addressing mode. */
2480
2481 static void
2482 group_address_uses (struct ivopts_data *data)
2483 {
2484 HOST_WIDE_INT max_offset = -1;
2485 unsigned int i, n, sub_id;
2486 struct iv_use *pre, *use;
2487 unsigned HOST_WIDE_INT addr_offset_first;
2488
2489 /* Reset max offset to split all small groups. */
2490 if (split_all_small_groups (data))
2491 max_offset = 0;
2492
2493 n = n_iv_uses (data);
2494 for (i = 0; i < n; i++)
2495 {
2496 use = iv_use (data, i);
2497 if (!use->next)
2498 continue;
2499
2500 gcc_assert (use->type == USE_ADDRESS);
2501 if (max_offset != 0)
2502 max_offset = compute_max_addr_offset (use);
2503
2504 while (use)
2505 {
2506 sub_id = 0;
2507 addr_offset_first = use->addr_offset;
2508 /* Only uses with offset that can fit in offset part against
2509 the first use can be grouped together. */
2510 for (pre = use, use = use->next;
2511 use && (use->addr_offset - addr_offset_first
2512 <= (unsigned HOST_WIDE_INT) max_offset);
2513 pre = use, use = use->next)
2514 {
2515 use->id = pre->id;
2516 use->sub_id = ++sub_id;
2517 }
2518
2519 /* Break the list and create new group. */
2520 if (use)
2521 {
2522 pre->next = NULL;
2523 use->id = n_iv_uses (data);
2524 use->related_cands = BITMAP_ALLOC (NULL);
2525 data->iv_uses.safe_push (use);
2526 }
2527 }
2528 }
2529
2530 if (dump_file && (dump_flags & TDF_DETAILS))
2531 dump_uses (dump_file, data);
2532 }
2533
2534 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2535 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2536 we are at the top-level of the processed address. */
2537
2538 static tree
2539 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2540 HOST_WIDE_INT *offset)
2541 {
2542 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2543 enum tree_code code;
2544 tree type, orig_type = TREE_TYPE (expr);
2545 HOST_WIDE_INT off0, off1, st;
2546 tree orig_expr = expr;
2547
2548 STRIP_NOPS (expr);
2549
2550 type = TREE_TYPE (expr);
2551 code = TREE_CODE (expr);
2552 *offset = 0;
2553
2554 switch (code)
2555 {
2556 case INTEGER_CST:
2557 if (!cst_and_fits_in_hwi (expr)
2558 || integer_zerop (expr))
2559 return orig_expr;
2560
2561 *offset = int_cst_value (expr);
2562 return build_int_cst (orig_type, 0);
2563
2564 case POINTER_PLUS_EXPR:
2565 case PLUS_EXPR:
2566 case MINUS_EXPR:
2567 op0 = TREE_OPERAND (expr, 0);
2568 op1 = TREE_OPERAND (expr, 1);
2569
2570 op0 = strip_offset_1 (op0, false, false, &off0);
2571 op1 = strip_offset_1 (op1, false, false, &off1);
2572
2573 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2574 if (op0 == TREE_OPERAND (expr, 0)
2575 && op1 == TREE_OPERAND (expr, 1))
2576 return orig_expr;
2577
2578 if (integer_zerop (op1))
2579 expr = op0;
2580 else if (integer_zerop (op0))
2581 {
2582 if (code == MINUS_EXPR)
2583 expr = fold_build1 (NEGATE_EXPR, type, op1);
2584 else
2585 expr = op1;
2586 }
2587 else
2588 expr = fold_build2 (code, type, op0, op1);
2589
2590 return fold_convert (orig_type, expr);
2591
2592 case MULT_EXPR:
2593 op1 = TREE_OPERAND (expr, 1);
2594 if (!cst_and_fits_in_hwi (op1))
2595 return orig_expr;
2596
2597 op0 = TREE_OPERAND (expr, 0);
2598 op0 = strip_offset_1 (op0, false, false, &off0);
2599 if (op0 == TREE_OPERAND (expr, 0))
2600 return orig_expr;
2601
2602 *offset = off0 * int_cst_value (op1);
2603 if (integer_zerop (op0))
2604 expr = op0;
2605 else
2606 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2607
2608 return fold_convert (orig_type, expr);
2609
2610 case ARRAY_REF:
2611 case ARRAY_RANGE_REF:
2612 if (!inside_addr)
2613 return orig_expr;
2614
2615 step = array_ref_element_size (expr);
2616 if (!cst_and_fits_in_hwi (step))
2617 break;
2618
2619 st = int_cst_value (step);
2620 op1 = TREE_OPERAND (expr, 1);
2621 op1 = strip_offset_1 (op1, false, false, &off1);
2622 *offset = off1 * st;
2623
2624 if (top_compref
2625 && integer_zerop (op1))
2626 {
2627 /* Strip the component reference completely. */
2628 op0 = TREE_OPERAND (expr, 0);
2629 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2630 *offset += off0;
2631 return op0;
2632 }
2633 break;
2634
2635 case COMPONENT_REF:
2636 {
2637 tree field;
2638
2639 if (!inside_addr)
2640 return orig_expr;
2641
2642 tmp = component_ref_field_offset (expr);
2643 field = TREE_OPERAND (expr, 1);
2644 if (top_compref
2645 && cst_and_fits_in_hwi (tmp)
2646 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2647 {
2648 HOST_WIDE_INT boffset, abs_off;
2649
2650 /* Strip the component reference completely. */
2651 op0 = TREE_OPERAND (expr, 0);
2652 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2653 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2654 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2655 if (boffset < 0)
2656 abs_off = -abs_off;
2657
2658 *offset = off0 + int_cst_value (tmp) + abs_off;
2659 return op0;
2660 }
2661 }
2662 break;
2663
2664 case ADDR_EXPR:
2665 op0 = TREE_OPERAND (expr, 0);
2666 op0 = strip_offset_1 (op0, true, true, &off0);
2667 *offset += off0;
2668
2669 if (op0 == TREE_OPERAND (expr, 0))
2670 return orig_expr;
2671
2672 expr = build_fold_addr_expr (op0);
2673 return fold_convert (orig_type, expr);
2674
2675 case MEM_REF:
2676 /* ??? Offset operand? */
2677 inside_addr = false;
2678 break;
2679
2680 default:
2681 return orig_expr;
2682 }
2683
2684 /* Default handling of expressions for that we want to recurse into
2685 the first operand. */
2686 op0 = TREE_OPERAND (expr, 0);
2687 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2688 *offset += off0;
2689
2690 if (op0 == TREE_OPERAND (expr, 0)
2691 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2692 return orig_expr;
2693
2694 expr = copy_node (expr);
2695 TREE_OPERAND (expr, 0) = op0;
2696 if (op1)
2697 TREE_OPERAND (expr, 1) = op1;
2698
2699 /* Inside address, we might strip the top level component references,
2700 thus changing type of the expression. Handling of ADDR_EXPR
2701 will fix that. */
2702 expr = fold_convert (orig_type, expr);
2703
2704 return expr;
2705 }
2706
2707 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2708
2709 static tree
2710 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2711 {
2712 HOST_WIDE_INT off;
2713 tree core = strip_offset_1 (expr, false, false, &off);
2714 *offset = off;
2715 return core;
2716 }
2717
2718 /* Returns variant of TYPE that can be used as base for different uses.
2719 We return unsigned type with the same precision, which avoids problems
2720 with overflows. */
2721
2722 static tree
2723 generic_type_for (tree type)
2724 {
2725 if (POINTER_TYPE_P (type))
2726 return unsigned_type_for (type);
2727
2728 if (TYPE_UNSIGNED (type))
2729 return type;
2730
2731 return unsigned_type_for (type);
2732 }
2733
2734 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2735 the bitmap to that we should store it. */
2736
2737 static struct ivopts_data *fd_ivopts_data;
2738 static tree
2739 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2740 {
2741 bitmap *depends_on = (bitmap *) data;
2742 struct version_info *info;
2743
2744 if (TREE_CODE (*expr_p) != SSA_NAME)
2745 return NULL_TREE;
2746 info = name_info (fd_ivopts_data, *expr_p);
2747
2748 if (!info->inv_id || info->has_nonlin_use)
2749 return NULL_TREE;
2750
2751 if (!*depends_on)
2752 *depends_on = BITMAP_ALLOC (NULL);
2753 bitmap_set_bit (*depends_on, info->inv_id);
2754
2755 return NULL_TREE;
2756 }
2757
2758 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2759 position to POS. If USE is not NULL, the candidate is set as related to
2760 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2761 replacement of the final value of the iv by a direct computation. */
2762
2763 static struct iv_cand *
2764 add_candidate_1 (struct ivopts_data *data,
2765 tree base, tree step, bool important, enum iv_position pos,
2766 struct iv_use *use, gimple *incremented_at,
2767 struct iv *orig_iv = NULL)
2768 {
2769 unsigned i;
2770 struct iv_cand *cand = NULL;
2771 tree type, orig_type;
2772
2773 /* For non-original variables, make sure their values are computed in a type
2774 that does not invoke undefined behavior on overflows (since in general,
2775 we cannot prove that these induction variables are non-wrapping). */
2776 if (pos != IP_ORIGINAL)
2777 {
2778 orig_type = TREE_TYPE (base);
2779 type = generic_type_for (orig_type);
2780 if (type != orig_type)
2781 {
2782 base = fold_convert (type, base);
2783 step = fold_convert (type, step);
2784 }
2785 }
2786
2787 for (i = 0; i < n_iv_cands (data); i++)
2788 {
2789 cand = iv_cand (data, i);
2790
2791 if (cand->pos != pos)
2792 continue;
2793
2794 if (cand->incremented_at != incremented_at
2795 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2796 && cand->ainc_use != use))
2797 continue;
2798
2799 if (!cand->iv)
2800 {
2801 if (!base && !step)
2802 break;
2803
2804 continue;
2805 }
2806
2807 if (!base && !step)
2808 continue;
2809
2810 if (operand_equal_p (base, cand->iv->base, 0)
2811 && operand_equal_p (step, cand->iv->step, 0)
2812 && (TYPE_PRECISION (TREE_TYPE (base))
2813 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2814 break;
2815 }
2816
2817 if (i == n_iv_cands (data))
2818 {
2819 cand = XCNEW (struct iv_cand);
2820 cand->id = i;
2821
2822 if (!base && !step)
2823 cand->iv = NULL;
2824 else
2825 cand->iv = alloc_iv (data, base, step);
2826
2827 cand->pos = pos;
2828 if (pos != IP_ORIGINAL && cand->iv)
2829 {
2830 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2831 cand->var_after = cand->var_before;
2832 }
2833 cand->important = important;
2834 cand->incremented_at = incremented_at;
2835 data->iv_candidates.safe_push (cand);
2836
2837 if (step
2838 && TREE_CODE (step) != INTEGER_CST)
2839 {
2840 fd_ivopts_data = data;
2841 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2842 }
2843
2844 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2845 cand->ainc_use = use;
2846 else
2847 cand->ainc_use = NULL;
2848
2849 cand->orig_iv = orig_iv;
2850 if (dump_file && (dump_flags & TDF_DETAILS))
2851 dump_cand (dump_file, cand);
2852 }
2853
2854 if (important && !cand->important)
2855 {
2856 cand->important = true;
2857 if (dump_file && (dump_flags & TDF_DETAILS))
2858 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2859 }
2860
2861 if (use)
2862 {
2863 bitmap_set_bit (use->related_cands, i);
2864 if (dump_file && (dump_flags & TDF_DETAILS))
2865 fprintf (dump_file, "Candidate %d is related to use %d\n",
2866 cand->id, use->id);
2867 }
2868
2869 return cand;
2870 }
2871
2872 /* Returns true if incrementing the induction variable at the end of the LOOP
2873 is allowed.
2874
2875 The purpose is to avoid splitting latch edge with a biv increment, thus
2876 creating a jump, possibly confusing other optimization passes and leaving
2877 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2878 is not available (so we do not have a better alternative), or if the latch
2879 edge is already nonempty. */
2880
2881 static bool
2882 allow_ip_end_pos_p (struct loop *loop)
2883 {
2884 if (!ip_normal_pos (loop))
2885 return true;
2886
2887 if (!empty_block_p (ip_end_pos (loop)))
2888 return true;
2889
2890 return false;
2891 }
2892
2893 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2894 Important field is set to IMPORTANT. */
2895
2896 static void
2897 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2898 bool important, struct iv_use *use)
2899 {
2900 basic_block use_bb = gimple_bb (use->stmt);
2901 machine_mode mem_mode;
2902 unsigned HOST_WIDE_INT cstepi;
2903
2904 /* If we insert the increment in any position other than the standard
2905 ones, we must ensure that it is incremented once per iteration.
2906 It must not be in an inner nested loop, or one side of an if
2907 statement. */
2908 if (use_bb->loop_father != data->current_loop
2909 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2910 || stmt_could_throw_p (use->stmt)
2911 || !cst_and_fits_in_hwi (step))
2912 return;
2913
2914 cstepi = int_cst_value (step);
2915
2916 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2917 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2918 || USE_STORE_PRE_INCREMENT (mem_mode))
2919 && GET_MODE_SIZE (mem_mode) == cstepi)
2920 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2921 || USE_STORE_PRE_DECREMENT (mem_mode))
2922 && GET_MODE_SIZE (mem_mode) == -cstepi))
2923 {
2924 enum tree_code code = MINUS_EXPR;
2925 tree new_base;
2926 tree new_step = step;
2927
2928 if (POINTER_TYPE_P (TREE_TYPE (base)))
2929 {
2930 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2931 code = POINTER_PLUS_EXPR;
2932 }
2933 else
2934 new_step = fold_convert (TREE_TYPE (base), new_step);
2935 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2936 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2937 use->stmt);
2938 }
2939 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2940 || USE_STORE_POST_INCREMENT (mem_mode))
2941 && GET_MODE_SIZE (mem_mode) == cstepi)
2942 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2943 || USE_STORE_POST_DECREMENT (mem_mode))
2944 && GET_MODE_SIZE (mem_mode) == -cstepi))
2945 {
2946 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2947 use->stmt);
2948 }
2949 }
2950
2951 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2952 position to POS. If USE is not NULL, the candidate is set as related to
2953 it. The candidate computation is scheduled before exit condition and at
2954 the end of loop. */
2955
2956 static void
2957 add_candidate (struct ivopts_data *data,
2958 tree base, tree step, bool important, struct iv_use *use,
2959 struct iv *orig_iv = NULL)
2960 {
2961 gcc_assert (use == NULL || use->sub_id == 0);
2962
2963 if (ip_normal_pos (data->current_loop))
2964 add_candidate_1 (data, base, step, important,
2965 IP_NORMAL, use, NULL, orig_iv);
2966 if (ip_end_pos (data->current_loop)
2967 && allow_ip_end_pos_p (data->current_loop))
2968 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2969 }
2970
2971 /* Adds standard iv candidates. */
2972
2973 static void
2974 add_standard_iv_candidates (struct ivopts_data *data)
2975 {
2976 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2977
2978 /* The same for a double-integer type if it is still fast enough. */
2979 if (TYPE_PRECISION
2980 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2981 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2982 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2983 build_int_cst (long_integer_type_node, 1), true, NULL);
2984
2985 /* The same for a double-integer type if it is still fast enough. */
2986 if (TYPE_PRECISION
2987 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2988 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2989 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2990 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2991 }
2992
2993
2994 /* Adds candidates bases on the old induction variable IV. */
2995
2996 static void
2997 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2998 {
2999 gimple *phi;
3000 tree def;
3001 struct iv_cand *cand;
3002
3003 /* Check if this biv is used in address type use. */
3004 if (iv->no_overflow && iv->have_address_use
3005 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3006 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3007 {
3008 tree base = fold_convert (sizetype, iv->base);
3009 tree step = fold_convert (sizetype, iv->step);
3010
3011 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3012 add_candidate (data, base, step, true, NULL, iv);
3013 /* Add iv cand of the original type only if it has nonlinear use. */
3014 if (iv->have_use_for)
3015 add_candidate (data, iv->base, iv->step, true, NULL);
3016 }
3017 else
3018 add_candidate (data, iv->base, iv->step, true, NULL);
3019
3020 /* The same, but with initial value zero. */
3021 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3022 add_candidate (data, size_int (0), iv->step, true, NULL);
3023 else
3024 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3025 iv->step, true, NULL);
3026
3027 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3028 if (gimple_code (phi) == GIMPLE_PHI)
3029 {
3030 /* Additionally record the possibility of leaving the original iv
3031 untouched. */
3032 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3033 /* Don't add candidate if it's from another PHI node because
3034 it's an affine iv appearing in the form of PEELED_CHREC. */
3035 phi = SSA_NAME_DEF_STMT (def);
3036 if (gimple_code (phi) != GIMPLE_PHI)
3037 {
3038 cand = add_candidate_1 (data,
3039 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3040 SSA_NAME_DEF_STMT (def));
3041 cand->var_before = iv->ssa_name;
3042 cand->var_after = def;
3043 }
3044 else
3045 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3046 }
3047 }
3048
3049 /* Adds candidates based on the old induction variables. */
3050
3051 static void
3052 add_iv_candidate_for_bivs (struct ivopts_data *data)
3053 {
3054 unsigned i;
3055 struct iv *iv;
3056 bitmap_iterator bi;
3057
3058 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3059 {
3060 iv = ver_info (data, i)->iv;
3061 if (iv && iv->biv_p && !integer_zerop (iv->step))
3062 add_iv_candidate_for_biv (data, iv);
3063 }
3064 }
3065
3066 /* Adds candidates based on the value of USE's iv. */
3067
3068 static void
3069 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3070 {
3071 unsigned HOST_WIDE_INT offset;
3072 tree base;
3073 tree basetype;
3074 struct iv *iv = use->iv;
3075
3076 add_candidate (data, iv->base, iv->step, false, use);
3077
3078 /* The same, but with initial value zero. Make such variable important,
3079 since it is generic enough so that possibly many uses may be based
3080 on it. */
3081 basetype = TREE_TYPE (iv->base);
3082 if (POINTER_TYPE_P (basetype))
3083 basetype = sizetype;
3084 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3085
3086 /* Third, try removing the constant offset. Make sure to even
3087 add a candidate for &a[0] vs. (T *)&a. */
3088 base = strip_offset (iv->base, &offset);
3089 if (offset || base != iv->base)
3090 add_candidate (data, base, iv->step, false, use);
3091
3092 /* At last, add auto-incremental candidates. Make such variables
3093 important since other iv uses with same base object may be based
3094 on it. */
3095 if (use != NULL && use->type == USE_ADDRESS)
3096 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3097 }
3098
3099 /* Adds candidates based on the uses. */
3100
3101 static void
3102 add_iv_candidate_for_uses (struct ivopts_data *data)
3103 {
3104 unsigned i;
3105
3106 for (i = 0; i < n_iv_uses (data); i++)
3107 {
3108 struct iv_use *use = iv_use (data, i);
3109
3110 if (!use)
3111 continue;
3112
3113 switch (use->type)
3114 {
3115 case USE_NONLINEAR_EXPR:
3116 case USE_COMPARE:
3117 case USE_ADDRESS:
3118 /* Just add the ivs based on the value of the iv used here. */
3119 add_iv_candidate_for_use (data, use);
3120 break;
3121
3122 default:
3123 gcc_unreachable ();
3124 }
3125 }
3126 }
3127
3128 /* Record important candidates and add them to related_cands bitmaps
3129 if needed. */
3130
3131 static void
3132 record_important_candidates (struct ivopts_data *data)
3133 {
3134 unsigned i;
3135 struct iv_use *use;
3136
3137 for (i = 0; i < n_iv_cands (data); i++)
3138 {
3139 struct iv_cand *cand = iv_cand (data, i);
3140
3141 if (cand->important)
3142 bitmap_set_bit (data->important_candidates, i);
3143 }
3144
3145 data->consider_all_candidates = (n_iv_cands (data)
3146 <= CONSIDER_ALL_CANDIDATES_BOUND);
3147
3148 if (data->consider_all_candidates)
3149 {
3150 /* We will not need "related_cands" bitmaps in this case,
3151 so release them to decrease peak memory consumption. */
3152 for (i = 0; i < n_iv_uses (data); i++)
3153 {
3154 use = iv_use (data, i);
3155 BITMAP_FREE (use->related_cands);
3156 }
3157 }
3158 else
3159 {
3160 /* Add important candidates to the related_cands bitmaps. */
3161 for (i = 0; i < n_iv_uses (data); i++)
3162 bitmap_ior_into (iv_use (data, i)->related_cands,
3163 data->important_candidates);
3164 }
3165 }
3166
3167 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3168 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3169 we allocate a simple list to every use. */
3170
3171 static void
3172 alloc_use_cost_map (struct ivopts_data *data)
3173 {
3174 unsigned i, size, s;
3175
3176 for (i = 0; i < n_iv_uses (data); i++)
3177 {
3178 struct iv_use *use = iv_use (data, i);
3179
3180 if (data->consider_all_candidates)
3181 size = n_iv_cands (data);
3182 else
3183 {
3184 s = bitmap_count_bits (use->related_cands);
3185
3186 /* Round up to the power of two, so that moduling by it is fast. */
3187 size = s ? (1 << ceil_log2 (s)) : 1;
3188 }
3189
3190 use->n_map_members = size;
3191 use->cost_map = XCNEWVEC (struct cost_pair, size);
3192 }
3193 }
3194
3195 /* Returns description of computation cost of expression whose runtime
3196 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3197
3198 static comp_cost
3199 new_cost (unsigned runtime, unsigned complexity)
3200 {
3201 comp_cost cost;
3202
3203 cost.cost = runtime;
3204 cost.complexity = complexity;
3205
3206 return cost;
3207 }
3208
3209 /* Returns true if COST is infinite. */
3210
3211 static bool
3212 infinite_cost_p (comp_cost cost)
3213 {
3214 return cost.cost == INFTY;
3215 }
3216
3217 /* Adds costs COST1 and COST2. */
3218
3219 static comp_cost
3220 add_costs (comp_cost cost1, comp_cost cost2)
3221 {
3222 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3223 return infinite_cost;
3224
3225 cost1.cost += cost2.cost;
3226 cost1.complexity += cost2.complexity;
3227
3228 return cost1;
3229 }
3230 /* Subtracts costs COST1 and COST2. */
3231
3232 static comp_cost
3233 sub_costs (comp_cost cost1, comp_cost cost2)
3234 {
3235 cost1.cost -= cost2.cost;
3236 cost1.complexity -= cost2.complexity;
3237
3238 return cost1;
3239 }
3240
3241 /* Returns a negative number if COST1 < COST2, a positive number if
3242 COST1 > COST2, and 0 if COST1 = COST2. */
3243
3244 static int
3245 compare_costs (comp_cost cost1, comp_cost cost2)
3246 {
3247 if (cost1.cost == cost2.cost)
3248 return cost1.complexity - cost2.complexity;
3249
3250 return cost1.cost - cost2.cost;
3251 }
3252
3253 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3254 on invariants DEPENDS_ON and that the value used in expressing it
3255 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3256
3257 static void
3258 set_use_iv_cost (struct ivopts_data *data,
3259 struct iv_use *use, struct iv_cand *cand,
3260 comp_cost cost, bitmap depends_on, tree value,
3261 enum tree_code comp, int inv_expr_id)
3262 {
3263 unsigned i, s;
3264
3265 if (infinite_cost_p (cost))
3266 {
3267 BITMAP_FREE (depends_on);
3268 return;
3269 }
3270
3271 if (data->consider_all_candidates)
3272 {
3273 use->cost_map[cand->id].cand = cand;
3274 use->cost_map[cand->id].cost = cost;
3275 use->cost_map[cand->id].depends_on = depends_on;
3276 use->cost_map[cand->id].value = value;
3277 use->cost_map[cand->id].comp = comp;
3278 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3279 return;
3280 }
3281
3282 /* n_map_members is a power of two, so this computes modulo. */
3283 s = cand->id & (use->n_map_members - 1);
3284 for (i = s; i < use->n_map_members; i++)
3285 if (!use->cost_map[i].cand)
3286 goto found;
3287 for (i = 0; i < s; i++)
3288 if (!use->cost_map[i].cand)
3289 goto found;
3290
3291 gcc_unreachable ();
3292
3293 found:
3294 use->cost_map[i].cand = cand;
3295 use->cost_map[i].cost = cost;
3296 use->cost_map[i].depends_on = depends_on;
3297 use->cost_map[i].value = value;
3298 use->cost_map[i].comp = comp;
3299 use->cost_map[i].inv_expr_id = inv_expr_id;
3300 }
3301
3302 /* Gets cost of (USE, CANDIDATE) pair. */
3303
3304 static struct cost_pair *
3305 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3306 struct iv_cand *cand)
3307 {
3308 unsigned i, s;
3309 struct cost_pair *ret;
3310
3311 if (!cand)
3312 return NULL;
3313
3314 if (data->consider_all_candidates)
3315 {
3316 ret = use->cost_map + cand->id;
3317 if (!ret->cand)
3318 return NULL;
3319
3320 return ret;
3321 }
3322
3323 /* n_map_members is a power of two, so this computes modulo. */
3324 s = cand->id & (use->n_map_members - 1);
3325 for (i = s; i < use->n_map_members; i++)
3326 if (use->cost_map[i].cand == cand)
3327 return use->cost_map + i;
3328 else if (use->cost_map[i].cand == NULL)
3329 return NULL;
3330 for (i = 0; i < s; i++)
3331 if (use->cost_map[i].cand == cand)
3332 return use->cost_map + i;
3333 else if (use->cost_map[i].cand == NULL)
3334 return NULL;
3335
3336 return NULL;
3337 }
3338
3339 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3340 static rtx
3341 produce_memory_decl_rtl (tree obj, int *regno)
3342 {
3343 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3344 machine_mode address_mode = targetm.addr_space.address_mode (as);
3345 rtx x;
3346
3347 gcc_assert (obj);
3348 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3349 {
3350 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3351 x = gen_rtx_SYMBOL_REF (address_mode, name);
3352 SET_SYMBOL_REF_DECL (x, obj);
3353 x = gen_rtx_MEM (DECL_MODE (obj), x);
3354 set_mem_addr_space (x, as);
3355 targetm.encode_section_info (obj, x, true);
3356 }
3357 else
3358 {
3359 x = gen_raw_REG (address_mode, (*regno)++);
3360 x = gen_rtx_MEM (DECL_MODE (obj), x);
3361 set_mem_addr_space (x, as);
3362 }
3363
3364 return x;
3365 }
3366
3367 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3368 walk_tree. DATA contains the actual fake register number. */
3369
3370 static tree
3371 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3372 {
3373 tree obj = NULL_TREE;
3374 rtx x = NULL_RTX;
3375 int *regno = (int *) data;
3376
3377 switch (TREE_CODE (*expr_p))
3378 {
3379 case ADDR_EXPR:
3380 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3381 handled_component_p (*expr_p);
3382 expr_p = &TREE_OPERAND (*expr_p, 0))
3383 continue;
3384 obj = *expr_p;
3385 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3386 x = produce_memory_decl_rtl (obj, regno);
3387 break;
3388
3389 case SSA_NAME:
3390 *ws = 0;
3391 obj = SSA_NAME_VAR (*expr_p);
3392 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3393 if (!obj)
3394 return NULL_TREE;
3395 if (!DECL_RTL_SET_P (obj))
3396 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3397 break;
3398
3399 case VAR_DECL:
3400 case PARM_DECL:
3401 case RESULT_DECL:
3402 *ws = 0;
3403 obj = *expr_p;
3404
3405 if (DECL_RTL_SET_P (obj))
3406 break;
3407
3408 if (DECL_MODE (obj) == BLKmode)
3409 x = produce_memory_decl_rtl (obj, regno);
3410 else
3411 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3412
3413 break;
3414
3415 default:
3416 break;
3417 }
3418
3419 if (x)
3420 {
3421 decl_rtl_to_reset.safe_push (obj);
3422 SET_DECL_RTL (obj, x);
3423 }
3424
3425 return NULL_TREE;
3426 }
3427
3428 /* Determines cost of the computation of EXPR. */
3429
3430 static unsigned
3431 computation_cost (tree expr, bool speed)
3432 {
3433 rtx_insn *seq;
3434 rtx rslt;
3435 tree type = TREE_TYPE (expr);
3436 unsigned cost;
3437 /* Avoid using hard regs in ways which may be unsupported. */
3438 int regno = LAST_VIRTUAL_REGISTER + 1;
3439 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3440 enum node_frequency real_frequency = node->frequency;
3441
3442 node->frequency = NODE_FREQUENCY_NORMAL;
3443 crtl->maybe_hot_insn_p = speed;
3444 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3445 start_sequence ();
3446 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3447 seq = get_insns ();
3448 end_sequence ();
3449 default_rtl_profile ();
3450 node->frequency = real_frequency;
3451
3452 cost = seq_cost (seq, speed);
3453 if (MEM_P (rslt))
3454 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3455 TYPE_ADDR_SPACE (type), speed);
3456 else if (!REG_P (rslt))
3457 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3458
3459 return cost;
3460 }
3461
3462 /* Returns variable containing the value of candidate CAND at statement AT. */
3463
3464 static tree
3465 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3466 {
3467 if (stmt_after_increment (loop, cand, stmt))
3468 return cand->var_after;
3469 else
3470 return cand->var_before;
3471 }
3472
3473 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3474 same precision that is at least as wide as the precision of TYPE, stores
3475 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3476 type of A and B. */
3477
3478 static tree
3479 determine_common_wider_type (tree *a, tree *b)
3480 {
3481 tree wider_type = NULL;
3482 tree suba, subb;
3483 tree atype = TREE_TYPE (*a);
3484
3485 if (CONVERT_EXPR_P (*a))
3486 {
3487 suba = TREE_OPERAND (*a, 0);
3488 wider_type = TREE_TYPE (suba);
3489 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3490 return atype;
3491 }
3492 else
3493 return atype;
3494
3495 if (CONVERT_EXPR_P (*b))
3496 {
3497 subb = TREE_OPERAND (*b, 0);
3498 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3499 return atype;
3500 }
3501 else
3502 return atype;
3503
3504 *a = suba;
3505 *b = subb;
3506 return wider_type;
3507 }
3508
3509 /* Determines the expression by that USE is expressed from induction variable
3510 CAND at statement AT in LOOP. The expression is stored in a decomposed
3511 form into AFF. Returns false if USE cannot be expressed using CAND. */
3512
3513 static bool
3514 get_computation_aff (struct loop *loop,
3515 struct iv_use *use, struct iv_cand *cand, gimple *at,
3516 struct aff_tree *aff)
3517 {
3518 tree ubase = use->iv->base;
3519 tree ustep = use->iv->step;
3520 tree cbase = cand->iv->base;
3521 tree cstep = cand->iv->step, cstep_common;
3522 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3523 tree common_type, var;
3524 tree uutype;
3525 aff_tree cbase_aff, var_aff;
3526 widest_int rat;
3527
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3529 {
3530 /* We do not have a precision to express the values of use. */
3531 return false;
3532 }
3533
3534 var = var_at_stmt (loop, cand, at);
3535 uutype = unsigned_type_for (utype);
3536
3537 /* If the conversion is not noop, perform it. */
3538 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3539 {
3540 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3541 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3542 {
3543 tree inner_base, inner_step, inner_type;
3544 inner_base = TREE_OPERAND (cbase, 0);
3545 if (CONVERT_EXPR_P (cstep))
3546 inner_step = TREE_OPERAND (cstep, 0);
3547 else
3548 inner_step = cstep;
3549
3550 inner_type = TREE_TYPE (inner_base);
3551 /* If candidate is added from a biv whose type is smaller than
3552 ctype, we know both candidate and the biv won't overflow.
3553 In this case, it's safe to skip the convertion in candidate.
3554 As an example, (unsigned short)((unsigned long)A) equals to
3555 (unsigned short)A, if A has a type no larger than short. */
3556 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3557 {
3558 cbase = inner_base;
3559 cstep = inner_step;
3560 }
3561 }
3562 cstep = fold_convert (uutype, cstep);
3563 cbase = fold_convert (uutype, cbase);
3564 var = fold_convert (uutype, var);
3565 }
3566
3567 if (!constant_multiple_of (ustep, cstep, &rat))
3568 return false;
3569
3570 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3571 type, we achieve better folding by computing their difference in this
3572 wider type, and cast the result to UUTYPE. We do not need to worry about
3573 overflows, as all the arithmetics will in the end be performed in UUTYPE
3574 anyway. */
3575 common_type = determine_common_wider_type (&ubase, &cbase);
3576
3577 /* use = ubase - ratio * cbase + ratio * var. */
3578 tree_to_aff_combination (ubase, common_type, aff);
3579 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3580 tree_to_aff_combination (var, uutype, &var_aff);
3581
3582 /* We need to shift the value if we are after the increment. */
3583 if (stmt_after_increment (loop, cand, at))
3584 {
3585 aff_tree cstep_aff;
3586
3587 if (common_type != uutype)
3588 cstep_common = fold_convert (common_type, cstep);
3589 else
3590 cstep_common = cstep;
3591
3592 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3593 aff_combination_add (&cbase_aff, &cstep_aff);
3594 }
3595
3596 aff_combination_scale (&cbase_aff, -rat);
3597 aff_combination_add (aff, &cbase_aff);
3598 if (common_type != uutype)
3599 aff_combination_convert (aff, uutype);
3600
3601 aff_combination_scale (&var_aff, rat);
3602 aff_combination_add (aff, &var_aff);
3603
3604 return true;
3605 }
3606
3607 /* Return the type of USE. */
3608
3609 static tree
3610 get_use_type (struct iv_use *use)
3611 {
3612 tree base_type = TREE_TYPE (use->iv->base);
3613 tree type;
3614
3615 if (use->type == USE_ADDRESS)
3616 {
3617 /* The base_type may be a void pointer. Create a pointer type based on
3618 the mem_ref instead. */
3619 type = build_pointer_type (TREE_TYPE (*use->op_p));
3620 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3621 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3622 }
3623 else
3624 type = base_type;
3625
3626 return type;
3627 }
3628
3629 /* Determines the expression by that USE is expressed from induction variable
3630 CAND at statement AT in LOOP. The computation is unshared. */
3631
3632 static tree
3633 get_computation_at (struct loop *loop,
3634 struct iv_use *use, struct iv_cand *cand, gimple *at)
3635 {
3636 aff_tree aff;
3637 tree type = get_use_type (use);
3638
3639 if (!get_computation_aff (loop, use, cand, at, &aff))
3640 return NULL_TREE;
3641 unshare_aff_combination (&aff);
3642 return fold_convert (type, aff_combination_to_tree (&aff));
3643 }
3644
3645 /* Determines the expression by that USE is expressed from induction variable
3646 CAND in LOOP. The computation is unshared. */
3647
3648 static tree
3649 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3650 {
3651 return get_computation_at (loop, use, cand, use->stmt);
3652 }
3653
3654 /* Adjust the cost COST for being in loop setup rather than loop body.
3655 If we're optimizing for space, the loop setup overhead is constant;
3656 if we're optimizing for speed, amortize it over the per-iteration cost. */
3657 static unsigned
3658 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3659 {
3660 if (cost == INFTY)
3661 return cost;
3662 else if (optimize_loop_for_speed_p (data->current_loop))
3663 return cost / avg_loop_niter (data->current_loop);
3664 else
3665 return cost;
3666 }
3667
3668 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3669 validity for a memory reference accessing memory of mode MODE in
3670 address space AS. */
3671
3672
3673 bool
3674 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3675 addr_space_t as)
3676 {
3677 #define MAX_RATIO 128
3678 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3679 static vec<sbitmap> valid_mult_list;
3680 sbitmap valid_mult;
3681
3682 if (data_index >= valid_mult_list.length ())
3683 valid_mult_list.safe_grow_cleared (data_index + 1);
3684
3685 valid_mult = valid_mult_list[data_index];
3686 if (!valid_mult)
3687 {
3688 machine_mode address_mode = targetm.addr_space.address_mode (as);
3689 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3690 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3691 rtx addr, scaled;
3692 HOST_WIDE_INT i;
3693
3694 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3695 bitmap_clear (valid_mult);
3696 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3697 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3698 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3699 {
3700 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3701 if (memory_address_addr_space_p (mode, addr, as)
3702 || memory_address_addr_space_p (mode, scaled, as))
3703 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3704 }
3705
3706 if (dump_file && (dump_flags & TDF_DETAILS))
3707 {
3708 fprintf (dump_file, " allowed multipliers:");
3709 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3710 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3711 fprintf (dump_file, " %d", (int) i);
3712 fprintf (dump_file, "\n");
3713 fprintf (dump_file, "\n");
3714 }
3715
3716 valid_mult_list[data_index] = valid_mult;
3717 }
3718
3719 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3720 return false;
3721
3722 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3723 }
3724
3725 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3726 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3727 variable is omitted. Compute the cost for a memory reference that accesses
3728 a memory location of mode MEM_MODE in address space AS.
3729
3730 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3731 size of MEM_MODE / RATIO) is available. To make this determination, we
3732 look at the size of the increment to be made, which is given in CSTEP.
3733 CSTEP may be zero if the step is unknown.
3734 STMT_AFTER_INC is true iff the statement we're looking at is after the
3735 increment of the original biv.
3736
3737 TODO -- there must be some better way. This all is quite crude. */
3738
3739 enum ainc_type
3740 {
3741 AINC_PRE_INC, /* Pre increment. */
3742 AINC_PRE_DEC, /* Pre decrement. */
3743 AINC_POST_INC, /* Post increment. */
3744 AINC_POST_DEC, /* Post decrement. */
3745 AINC_NONE /* Also the number of auto increment types. */
3746 };
3747
3748 struct address_cost_data
3749 {
3750 HOST_WIDE_INT min_offset, max_offset;
3751 unsigned costs[2][2][2][2];
3752 unsigned ainc_costs[AINC_NONE];
3753 };
3754
3755
3756 static comp_cost
3757 get_address_cost (bool symbol_present, bool var_present,
3758 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3759 HOST_WIDE_INT cstep, machine_mode mem_mode,
3760 addr_space_t as, bool speed,
3761 bool stmt_after_inc, bool *may_autoinc)
3762 {
3763 machine_mode address_mode = targetm.addr_space.address_mode (as);
3764 static vec<address_cost_data *> address_cost_data_list;
3765 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3766 address_cost_data *data;
3767 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3768 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3769 unsigned cost, acost, complexity;
3770 enum ainc_type autoinc_type;
3771 bool offset_p, ratio_p, autoinc;
3772 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3773 unsigned HOST_WIDE_INT mask;
3774 unsigned bits;
3775
3776 if (data_index >= address_cost_data_list.length ())
3777 address_cost_data_list.safe_grow_cleared (data_index + 1);
3778
3779 data = address_cost_data_list[data_index];
3780 if (!data)
3781 {
3782 HOST_WIDE_INT i;
3783 HOST_WIDE_INT rat, off = 0;
3784 int old_cse_not_expected, width;
3785 unsigned sym_p, var_p, off_p, rat_p, add_c;
3786 rtx_insn *seq;
3787 rtx addr, base;
3788 rtx reg0, reg1;
3789
3790 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3791
3792 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3793
3794 width = GET_MODE_BITSIZE (address_mode) - 1;
3795 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3796 width = HOST_BITS_PER_WIDE_INT - 1;
3797 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3798
3799 for (i = width; i >= 0; i--)
3800 {
3801 off = -((unsigned HOST_WIDE_INT) 1 << i);
3802 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3803 if (memory_address_addr_space_p (mem_mode, addr, as))
3804 break;
3805 }
3806 data->min_offset = (i == -1? 0 : off);
3807
3808 for (i = width; i >= 0; i--)
3809 {
3810 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3811 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3812 if (memory_address_addr_space_p (mem_mode, addr, as))
3813 break;
3814 /* For some strict-alignment targets, the offset must be naturally
3815 aligned. Try an aligned offset if mem_mode is not QImode. */
3816 off = mem_mode != QImode
3817 ? ((unsigned HOST_WIDE_INT) 1 << i)
3818 - GET_MODE_SIZE (mem_mode)
3819 : 0;
3820 if (off > 0)
3821 {
3822 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3823 if (memory_address_addr_space_p (mem_mode, addr, as))
3824 break;
3825 }
3826 }
3827 if (i == -1)
3828 off = 0;
3829 data->max_offset = off;
3830
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3832 {
3833 fprintf (dump_file, "get_address_cost:\n");
3834 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3835 GET_MODE_NAME (mem_mode),
3836 data->min_offset);
3837 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3838 GET_MODE_NAME (mem_mode),
3839 data->max_offset);
3840 }
3841
3842 rat = 1;
3843 for (i = 2; i <= MAX_RATIO; i++)
3844 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3845 {
3846 rat = i;
3847 break;
3848 }
3849
3850 /* Compute the cost of various addressing modes. */
3851 acost = 0;
3852 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3853 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3854
3855 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3856 || USE_STORE_PRE_DECREMENT (mem_mode))
3857 {
3858 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3859 has_predec[mem_mode]
3860 = memory_address_addr_space_p (mem_mode, addr, as);
3861
3862 if (has_predec[mem_mode])
3863 data->ainc_costs[AINC_PRE_DEC]
3864 = address_cost (addr, mem_mode, as, speed);
3865 }
3866 if (USE_LOAD_POST_DECREMENT (mem_mode)
3867 || USE_STORE_POST_DECREMENT (mem_mode))
3868 {
3869 addr = gen_rtx_POST_DEC (address_mode, reg0);
3870 has_postdec[mem_mode]
3871 = memory_address_addr_space_p (mem_mode, addr, as);
3872
3873 if (has_postdec[mem_mode])
3874 data->ainc_costs[AINC_POST_DEC]
3875 = address_cost (addr, mem_mode, as, speed);
3876 }
3877 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3878 || USE_STORE_PRE_DECREMENT (mem_mode))
3879 {
3880 addr = gen_rtx_PRE_INC (address_mode, reg0);
3881 has_preinc[mem_mode]
3882 = memory_address_addr_space_p (mem_mode, addr, as);
3883
3884 if (has_preinc[mem_mode])
3885 data->ainc_costs[AINC_PRE_INC]
3886 = address_cost (addr, mem_mode, as, speed);
3887 }
3888 if (USE_LOAD_POST_INCREMENT (mem_mode)
3889 || USE_STORE_POST_INCREMENT (mem_mode))
3890 {
3891 addr = gen_rtx_POST_INC (address_mode, reg0);
3892 has_postinc[mem_mode]
3893 = memory_address_addr_space_p (mem_mode, addr, as);
3894
3895 if (has_postinc[mem_mode])
3896 data->ainc_costs[AINC_POST_INC]
3897 = address_cost (addr, mem_mode, as, speed);
3898 }
3899 for (i = 0; i < 16; i++)
3900 {
3901 sym_p = i & 1;
3902 var_p = (i >> 1) & 1;
3903 off_p = (i >> 2) & 1;
3904 rat_p = (i >> 3) & 1;
3905
3906 addr = reg0;
3907 if (rat_p)
3908 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3909 gen_int_mode (rat, address_mode));
3910
3911 if (var_p)
3912 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3913
3914 if (sym_p)
3915 {
3916 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3917 /* ??? We can run into trouble with some backends by presenting
3918 it with symbols which haven't been properly passed through
3919 targetm.encode_section_info. By setting the local bit, we
3920 enhance the probability of things working. */
3921 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3922
3923 if (off_p)
3924 base = gen_rtx_fmt_e (CONST, address_mode,
3925 gen_rtx_fmt_ee
3926 (PLUS, address_mode, base,
3927 gen_int_mode (off, address_mode)));
3928 }
3929 else if (off_p)
3930 base = gen_int_mode (off, address_mode);
3931 else
3932 base = NULL_RTX;
3933
3934 if (base)
3935 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3936
3937 start_sequence ();
3938 /* To avoid splitting addressing modes, pretend that no cse will
3939 follow. */
3940 old_cse_not_expected = cse_not_expected;
3941 cse_not_expected = true;
3942 addr = memory_address_addr_space (mem_mode, addr, as);
3943 cse_not_expected = old_cse_not_expected;
3944 seq = get_insns ();
3945 end_sequence ();
3946
3947 acost = seq_cost (seq, speed);
3948 acost += address_cost (addr, mem_mode, as, speed);
3949
3950 if (!acost)
3951 acost = 1;
3952 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3953 }
3954
3955 /* On some targets, it is quite expensive to load symbol to a register,
3956 which makes addresses that contain symbols look much more expensive.
3957 However, the symbol will have to be loaded in any case before the
3958 loop (and quite likely we have it in register already), so it does not
3959 make much sense to penalize them too heavily. So make some final
3960 tweaks for the SYMBOL_PRESENT modes:
3961
3962 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3963 var is cheaper, use this mode with small penalty.
3964 If VAR_PRESENT is true, try whether the mode with
3965 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3966 if this is the case, use it. */
3967 add_c = add_cost (speed, address_mode);
3968 for (i = 0; i < 8; i++)
3969 {
3970 var_p = i & 1;
3971 off_p = (i >> 1) & 1;
3972 rat_p = (i >> 2) & 1;
3973
3974 acost = data->costs[0][1][off_p][rat_p] + 1;
3975 if (var_p)
3976 acost += add_c;
3977
3978 if (acost < data->costs[1][var_p][off_p][rat_p])
3979 data->costs[1][var_p][off_p][rat_p] = acost;
3980 }
3981
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3983 {
3984 fprintf (dump_file, "Address costs:\n");
3985
3986 for (i = 0; i < 16; i++)
3987 {
3988 sym_p = i & 1;
3989 var_p = (i >> 1) & 1;
3990 off_p = (i >> 2) & 1;
3991 rat_p = (i >> 3) & 1;
3992
3993 fprintf (dump_file, " ");
3994 if (sym_p)
3995 fprintf (dump_file, "sym + ");
3996 if (var_p)
3997 fprintf (dump_file, "var + ");
3998 if (off_p)
3999 fprintf (dump_file, "cst + ");
4000 if (rat_p)
4001 fprintf (dump_file, "rat * ");
4002
4003 acost = data->costs[sym_p][var_p][off_p][rat_p];
4004 fprintf (dump_file, "index costs %d\n", acost);
4005 }
4006 if (has_predec[mem_mode] || has_postdec[mem_mode]
4007 || has_preinc[mem_mode] || has_postinc[mem_mode])
4008 fprintf (dump_file, " May include autoinc/dec\n");
4009 fprintf (dump_file, "\n");
4010 }
4011
4012 address_cost_data_list[data_index] = data;
4013 }
4014
4015 bits = GET_MODE_BITSIZE (address_mode);
4016 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4017 offset &= mask;
4018 if ((offset >> (bits - 1) & 1))
4019 offset |= ~mask;
4020 s_offset = offset;
4021
4022 autoinc = false;
4023 autoinc_type = AINC_NONE;
4024 msize = GET_MODE_SIZE (mem_mode);
4025 autoinc_offset = offset;
4026 if (stmt_after_inc)
4027 autoinc_offset += ratio * cstep;
4028 if (symbol_present || var_present || ratio != 1)
4029 autoinc = false;
4030 else
4031 {
4032 if (has_postinc[mem_mode] && autoinc_offset == 0
4033 && msize == cstep)
4034 autoinc_type = AINC_POST_INC;
4035 else if (has_postdec[mem_mode] && autoinc_offset == 0
4036 && msize == -cstep)
4037 autoinc_type = AINC_POST_DEC;
4038 else if (has_preinc[mem_mode] && autoinc_offset == msize
4039 && msize == cstep)
4040 autoinc_type = AINC_PRE_INC;
4041 else if (has_predec[mem_mode] && autoinc_offset == -msize
4042 && msize == -cstep)
4043 autoinc_type = AINC_PRE_DEC;
4044
4045 if (autoinc_type != AINC_NONE)
4046 autoinc = true;
4047 }
4048
4049 cost = 0;
4050 offset_p = (s_offset != 0
4051 && data->min_offset <= s_offset
4052 && s_offset <= data->max_offset);
4053 ratio_p = (ratio != 1
4054 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4055
4056 if (ratio != 1 && !ratio_p)
4057 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4058
4059 if (s_offset && !offset_p && !symbol_present)
4060 cost += add_cost (speed, address_mode);
4061
4062 if (may_autoinc)
4063 *may_autoinc = autoinc;
4064 if (autoinc)
4065 acost = data->ainc_costs[autoinc_type];
4066 else
4067 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4068 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4069 return new_cost (cost + acost, complexity);
4070 }
4071
4072 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4073 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4074 calculating the operands of EXPR. Returns true if successful, and returns
4075 the cost in COST. */
4076
4077 static bool
4078 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4079 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4080 {
4081 comp_cost res;
4082 tree op1 = TREE_OPERAND (expr, 1);
4083 tree cst = TREE_OPERAND (mult, 1);
4084 tree multop = TREE_OPERAND (mult, 0);
4085 int m = exact_log2 (int_cst_value (cst));
4086 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4087 int as_cost, sa_cost;
4088 bool mult_in_op1;
4089
4090 if (!(m >= 0 && m < maxm))
4091 return false;
4092
4093 STRIP_NOPS (op1);
4094 mult_in_op1 = operand_equal_p (op1, mult, 0);
4095
4096 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4097
4098 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4099 use that in preference to a shift insn followed by an add insn. */
4100 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4101 ? shiftadd_cost (speed, mode, m)
4102 : (mult_in_op1
4103 ? shiftsub1_cost (speed, mode, m)
4104 : shiftsub0_cost (speed, mode, m)));
4105
4106 res = new_cost (MIN (as_cost, sa_cost), 0);
4107 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4108
4109 STRIP_NOPS (multop);
4110 if (!is_gimple_val (multop))
4111 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4112
4113 *cost = res;
4114 return true;
4115 }
4116
4117 /* Estimates cost of forcing expression EXPR into a variable. */
4118
4119 static comp_cost
4120 force_expr_to_var_cost (tree expr, bool speed)
4121 {
4122 static bool costs_initialized = false;
4123 static unsigned integer_cost [2];
4124 static unsigned symbol_cost [2];
4125 static unsigned address_cost [2];
4126 tree op0, op1;
4127 comp_cost cost0, cost1, cost;
4128 machine_mode mode;
4129
4130 if (!costs_initialized)
4131 {
4132 tree type = build_pointer_type (integer_type_node);
4133 tree var, addr;
4134 rtx x;
4135 int i;
4136
4137 var = create_tmp_var_raw (integer_type_node, "test_var");
4138 TREE_STATIC (var) = 1;
4139 x = produce_memory_decl_rtl (var, NULL);
4140 SET_DECL_RTL (var, x);
4141
4142 addr = build1 (ADDR_EXPR, type, var);
4143
4144
4145 for (i = 0; i < 2; i++)
4146 {
4147 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4148 2000), i);
4149
4150 symbol_cost[i] = computation_cost (addr, i) + 1;
4151
4152 address_cost[i]
4153 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4155 {
4156 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4157 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4158 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4159 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4160 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4161 fprintf (dump_file, "\n");
4162 }
4163 }
4164
4165 costs_initialized = true;
4166 }
4167
4168 STRIP_NOPS (expr);
4169
4170 if (SSA_VAR_P (expr))
4171 return no_cost;
4172
4173 if (is_gimple_min_invariant (expr))
4174 {
4175 if (TREE_CODE (expr) == INTEGER_CST)
4176 return new_cost (integer_cost [speed], 0);
4177
4178 if (TREE_CODE (expr) == ADDR_EXPR)
4179 {
4180 tree obj = TREE_OPERAND (expr, 0);
4181
4182 if (TREE_CODE (obj) == VAR_DECL
4183 || TREE_CODE (obj) == PARM_DECL
4184 || TREE_CODE (obj) == RESULT_DECL)
4185 return new_cost (symbol_cost [speed], 0);
4186 }
4187
4188 return new_cost (address_cost [speed], 0);
4189 }
4190
4191 switch (TREE_CODE (expr))
4192 {
4193 case POINTER_PLUS_EXPR:
4194 case PLUS_EXPR:
4195 case MINUS_EXPR:
4196 case MULT_EXPR:
4197 op0 = TREE_OPERAND (expr, 0);
4198 op1 = TREE_OPERAND (expr, 1);
4199 STRIP_NOPS (op0);
4200 STRIP_NOPS (op1);
4201 break;
4202
4203 CASE_CONVERT:
4204 case NEGATE_EXPR:
4205 op0 = TREE_OPERAND (expr, 0);
4206 STRIP_NOPS (op0);
4207 op1 = NULL_TREE;
4208 break;
4209
4210 default:
4211 /* Just an arbitrary value, FIXME. */
4212 return new_cost (target_spill_cost[speed], 0);
4213 }
4214
4215 if (op0 == NULL_TREE
4216 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4217 cost0 = no_cost;
4218 else
4219 cost0 = force_expr_to_var_cost (op0, speed);
4220
4221 if (op1 == NULL_TREE
4222 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4223 cost1 = no_cost;
4224 else
4225 cost1 = force_expr_to_var_cost (op1, speed);
4226
4227 mode = TYPE_MODE (TREE_TYPE (expr));
4228 switch (TREE_CODE (expr))
4229 {
4230 case POINTER_PLUS_EXPR:
4231 case PLUS_EXPR:
4232 case MINUS_EXPR:
4233 case NEGATE_EXPR:
4234 cost = new_cost (add_cost (speed, mode), 0);
4235 if (TREE_CODE (expr) != NEGATE_EXPR)
4236 {
4237 tree mult = NULL_TREE;
4238 comp_cost sa_cost;
4239 if (TREE_CODE (op1) == MULT_EXPR)
4240 mult = op1;
4241 else if (TREE_CODE (op0) == MULT_EXPR)
4242 mult = op0;
4243
4244 if (mult != NULL_TREE
4245 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4246 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4247 speed, &sa_cost))
4248 return sa_cost;
4249 }
4250 break;
4251
4252 CASE_CONVERT:
4253 {
4254 tree inner_mode, outer_mode;
4255 outer_mode = TREE_TYPE (expr);
4256 inner_mode = TREE_TYPE (op0);
4257 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4258 TYPE_MODE (inner_mode), speed), 0);
4259 }
4260 break;
4261
4262 case MULT_EXPR:
4263 if (cst_and_fits_in_hwi (op0))
4264 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4265 mode, speed), 0);
4266 else if (cst_and_fits_in_hwi (op1))
4267 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4268 mode, speed), 0);
4269 else
4270 return new_cost (target_spill_cost [speed], 0);
4271 break;
4272
4273 default:
4274 gcc_unreachable ();
4275 }
4276
4277 cost = add_costs (cost, cost0);
4278 cost = add_costs (cost, cost1);
4279
4280 /* Bound the cost by target_spill_cost. The parts of complicated
4281 computations often are either loop invariant or at least can
4282 be shared between several iv uses, so letting this grow without
4283 limits would not give reasonable results. */
4284 if (cost.cost > (int) target_spill_cost [speed])
4285 cost.cost = target_spill_cost [speed];
4286
4287 return cost;
4288 }
4289
4290 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4291 invariants the computation depends on. */
4292
4293 static comp_cost
4294 force_var_cost (struct ivopts_data *data,
4295 tree expr, bitmap *depends_on)
4296 {
4297 if (depends_on)
4298 {
4299 fd_ivopts_data = data;
4300 walk_tree (&expr, find_depends, depends_on, NULL);
4301 }
4302
4303 return force_expr_to_var_cost (expr, data->speed);
4304 }
4305
4306 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4307 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4308 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4309 invariants the computation depends on. */
4310
4311 static comp_cost
4312 split_address_cost (struct ivopts_data *data,
4313 tree addr, bool *symbol_present, bool *var_present,
4314 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4315 {
4316 tree core;
4317 HOST_WIDE_INT bitsize;
4318 HOST_WIDE_INT bitpos;
4319 tree toffset;
4320 machine_mode mode;
4321 int unsignedp, reversep, volatilep;
4322
4323 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4324 &unsignedp, &reversep, &volatilep, false);
4325
4326 if (toffset != 0
4327 || bitpos % BITS_PER_UNIT != 0
4328 || reversep
4329 || TREE_CODE (core) != VAR_DECL)
4330 {
4331 *symbol_present = false;
4332 *var_present = true;
4333 fd_ivopts_data = data;
4334 if (depends_on)
4335 walk_tree (&addr, find_depends, depends_on, NULL);
4336
4337 return new_cost (target_spill_cost[data->speed], 0);
4338 }
4339
4340 *offset += bitpos / BITS_PER_UNIT;
4341 if (TREE_STATIC (core)
4342 || DECL_EXTERNAL (core))
4343 {
4344 *symbol_present = true;
4345 *var_present = false;
4346 return no_cost;
4347 }
4348
4349 *symbol_present = false;
4350 *var_present = true;
4351 return no_cost;
4352 }
4353
4354 /* Estimates cost of expressing difference of addresses E1 - E2 as
4355 var + symbol + offset. The value of offset is added to OFFSET,
4356 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4357 part is missing. DEPENDS_ON is a set of the invariants the computation
4358 depends on. */
4359
4360 static comp_cost
4361 ptr_difference_cost (struct ivopts_data *data,
4362 tree e1, tree e2, bool *symbol_present, bool *var_present,
4363 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4364 {
4365 HOST_WIDE_INT diff = 0;
4366 aff_tree aff_e1, aff_e2;
4367 tree type;
4368
4369 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4370
4371 if (ptr_difference_const (e1, e2, &diff))
4372 {
4373 *offset += diff;
4374 *symbol_present = false;
4375 *var_present = false;
4376 return no_cost;
4377 }
4378
4379 if (integer_zerop (e2))
4380 return split_address_cost (data, TREE_OPERAND (e1, 0),
4381 symbol_present, var_present, offset, depends_on);
4382
4383 *symbol_present = false;
4384 *var_present = true;
4385
4386 type = signed_type_for (TREE_TYPE (e1));
4387 tree_to_aff_combination (e1, type, &aff_e1);
4388 tree_to_aff_combination (e2, type, &aff_e2);
4389 aff_combination_scale (&aff_e2, -1);
4390 aff_combination_add (&aff_e1, &aff_e2);
4391
4392 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4393 }
4394
4395 /* Estimates cost of expressing difference E1 - E2 as
4396 var + symbol + offset. The value of offset is added to OFFSET,
4397 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4398 part is missing. DEPENDS_ON is a set of the invariants the computation
4399 depends on. */
4400
4401 static comp_cost
4402 difference_cost (struct ivopts_data *data,
4403 tree e1, tree e2, bool *symbol_present, bool *var_present,
4404 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4405 {
4406 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4407 unsigned HOST_WIDE_INT off1, off2;
4408 aff_tree aff_e1, aff_e2;
4409 tree type;
4410
4411 e1 = strip_offset (e1, &off1);
4412 e2 = strip_offset (e2, &off2);
4413 *offset += off1 - off2;
4414
4415 STRIP_NOPS (e1);
4416 STRIP_NOPS (e2);
4417
4418 if (TREE_CODE (e1) == ADDR_EXPR)
4419 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4420 offset, depends_on);
4421 *symbol_present = false;
4422
4423 if (operand_equal_p (e1, e2, 0))
4424 {
4425 *var_present = false;
4426 return no_cost;
4427 }
4428
4429 *var_present = true;
4430
4431 if (integer_zerop (e2))
4432 return force_var_cost (data, e1, depends_on);
4433
4434 if (integer_zerop (e1))
4435 {
4436 comp_cost cost = force_var_cost (data, e2, depends_on);
4437 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4438 return cost;
4439 }
4440
4441 type = signed_type_for (TREE_TYPE (e1));
4442 tree_to_aff_combination (e1, type, &aff_e1);
4443 tree_to_aff_combination (e2, type, &aff_e2);
4444 aff_combination_scale (&aff_e2, -1);
4445 aff_combination_add (&aff_e1, &aff_e2);
4446
4447 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4448 }
4449
4450 /* Returns true if AFF1 and AFF2 are identical. */
4451
4452 static bool
4453 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4454 {
4455 unsigned i;
4456
4457 if (aff1->n != aff2->n)
4458 return false;
4459
4460 for (i = 0; i < aff1->n; i++)
4461 {
4462 if (aff1->elts[i].coef != aff2->elts[i].coef)
4463 return false;
4464
4465 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4466 return false;
4467 }
4468 return true;
4469 }
4470
4471 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4472
4473 static int
4474 get_expr_id (struct ivopts_data *data, tree expr)
4475 {
4476 struct iv_inv_expr_ent ent;
4477 struct iv_inv_expr_ent **slot;
4478
4479 ent.expr = expr;
4480 ent.hash = iterative_hash_expr (expr, 0);
4481 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4482 if (*slot)
4483 return (*slot)->id;
4484
4485 *slot = XNEW (struct iv_inv_expr_ent);
4486 (*slot)->expr = expr;
4487 (*slot)->hash = ent.hash;
4488 (*slot)->id = data->inv_expr_id++;
4489 return (*slot)->id;
4490 }
4491
4492 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4493 requires a new compiler generated temporary. Returns -1 otherwise.
4494 ADDRESS_P is a flag indicating if the expression is for address
4495 computation. */
4496
4497 static int
4498 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4499 tree cbase, HOST_WIDE_INT ratio,
4500 bool address_p)
4501 {
4502 aff_tree ubase_aff, cbase_aff;
4503 tree expr, ub, cb;
4504
4505 STRIP_NOPS (ubase);
4506 STRIP_NOPS (cbase);
4507 ub = ubase;
4508 cb = cbase;
4509
4510 if ((TREE_CODE (ubase) == INTEGER_CST)
4511 && (TREE_CODE (cbase) == INTEGER_CST))
4512 return -1;
4513
4514 /* Strips the constant part. */
4515 if (TREE_CODE (ubase) == PLUS_EXPR
4516 || TREE_CODE (ubase) == MINUS_EXPR
4517 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4518 {
4519 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4520 ubase = TREE_OPERAND (ubase, 0);
4521 }
4522
4523 /* Strips the constant part. */
4524 if (TREE_CODE (cbase) == PLUS_EXPR
4525 || TREE_CODE (cbase) == MINUS_EXPR
4526 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4527 {
4528 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4529 cbase = TREE_OPERAND (cbase, 0);
4530 }
4531
4532 if (address_p)
4533 {
4534 if (((TREE_CODE (ubase) == SSA_NAME)
4535 || (TREE_CODE (ubase) == ADDR_EXPR
4536 && is_gimple_min_invariant (ubase)))
4537 && (TREE_CODE (cbase) == INTEGER_CST))
4538 return -1;
4539
4540 if (((TREE_CODE (cbase) == SSA_NAME)
4541 || (TREE_CODE (cbase) == ADDR_EXPR
4542 && is_gimple_min_invariant (cbase)))
4543 && (TREE_CODE (ubase) == INTEGER_CST))
4544 return -1;
4545 }
4546
4547 if (ratio == 1)
4548 {
4549 if (operand_equal_p (ubase, cbase, 0))
4550 return -1;
4551
4552 if (TREE_CODE (ubase) == ADDR_EXPR
4553 && TREE_CODE (cbase) == ADDR_EXPR)
4554 {
4555 tree usym, csym;
4556
4557 usym = TREE_OPERAND (ubase, 0);
4558 csym = TREE_OPERAND (cbase, 0);
4559 if (TREE_CODE (usym) == ARRAY_REF)
4560 {
4561 tree ind = TREE_OPERAND (usym, 1);
4562 if (TREE_CODE (ind) == INTEGER_CST
4563 && tree_fits_shwi_p (ind)
4564 && tree_to_shwi (ind) == 0)
4565 usym = TREE_OPERAND (usym, 0);
4566 }
4567 if (TREE_CODE (csym) == ARRAY_REF)
4568 {
4569 tree ind = TREE_OPERAND (csym, 1);
4570 if (TREE_CODE (ind) == INTEGER_CST
4571 && tree_fits_shwi_p (ind)
4572 && tree_to_shwi (ind) == 0)
4573 csym = TREE_OPERAND (csym, 0);
4574 }
4575 if (operand_equal_p (usym, csym, 0))
4576 return -1;
4577 }
4578 /* Now do more complex comparison */
4579 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4580 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4581 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4582 return -1;
4583 }
4584
4585 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4586 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4587
4588 aff_combination_scale (&cbase_aff, -1 * ratio);
4589 aff_combination_add (&ubase_aff, &cbase_aff);
4590 expr = aff_combination_to_tree (&ubase_aff);
4591 return get_expr_id (data, expr);
4592 }
4593
4594
4595
4596 /* Determines the cost of the computation by that USE is expressed
4597 from induction variable CAND. If ADDRESS_P is true, we just need
4598 to create an address from it, otherwise we want to get it into
4599 register. A set of invariants we depend on is stored in
4600 DEPENDS_ON. AT is the statement at that the value is computed.
4601 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4602 addressing is likely. */
4603
4604 static comp_cost
4605 get_computation_cost_at (struct ivopts_data *data,
4606 struct iv_use *use, struct iv_cand *cand,
4607 bool address_p, bitmap *depends_on, gimple *at,
4608 bool *can_autoinc,
4609 int *inv_expr_id)
4610 {
4611 tree ubase = use->iv->base, ustep = use->iv->step;
4612 tree cbase, cstep;
4613 tree utype = TREE_TYPE (ubase), ctype;
4614 unsigned HOST_WIDE_INT cstepi, offset = 0;
4615 HOST_WIDE_INT ratio, aratio;
4616 bool var_present, symbol_present, stmt_is_after_inc;
4617 comp_cost cost;
4618 widest_int rat;
4619 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4620 machine_mode mem_mode = (address_p
4621 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4622 : VOIDmode);
4623
4624 if (depends_on)
4625 *depends_on = NULL;
4626
4627 /* Only consider real candidates. */
4628 if (!cand->iv)
4629 return infinite_cost;
4630
4631 cbase = cand->iv->base;
4632 cstep = cand->iv->step;
4633 ctype = TREE_TYPE (cbase);
4634
4635 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4636 {
4637 /* We do not have a precision to express the values of use. */
4638 return infinite_cost;
4639 }
4640
4641 if (address_p
4642 || (use->iv->base_object
4643 && cand->iv->base_object
4644 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4645 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4646 {
4647 /* Do not try to express address of an object with computation based
4648 on address of a different object. This may cause problems in rtl
4649 level alias analysis (that does not expect this to be happening,
4650 as this is illegal in C), and would be unlikely to be useful
4651 anyway. */
4652 if (use->iv->base_object
4653 && cand->iv->base_object
4654 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4655 return infinite_cost;
4656 }
4657
4658 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4659 {
4660 /* TODO -- add direct handling of this case. */
4661 goto fallback;
4662 }
4663
4664 /* CSTEPI is removed from the offset in case statement is after the
4665 increment. If the step is not constant, we use zero instead.
4666 This is a bit imprecise (there is the extra addition), but
4667 redundancy elimination is likely to transform the code so that
4668 it uses value of the variable before increment anyway,
4669 so it is not that much unrealistic. */
4670 if (cst_and_fits_in_hwi (cstep))
4671 cstepi = int_cst_value (cstep);
4672 else
4673 cstepi = 0;
4674
4675 if (!constant_multiple_of (ustep, cstep, &rat))
4676 return infinite_cost;
4677
4678 if (wi::fits_shwi_p (rat))
4679 ratio = rat.to_shwi ();
4680 else
4681 return infinite_cost;
4682
4683 STRIP_NOPS (cbase);
4684 ctype = TREE_TYPE (cbase);
4685
4686 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4687
4688 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4689 or ratio == 1, it is better to handle this like
4690
4691 ubase - ratio * cbase + ratio * var
4692
4693 (also holds in the case ratio == -1, TODO. */
4694
4695 if (cst_and_fits_in_hwi (cbase))
4696 {
4697 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4698 cost = difference_cost (data,
4699 ubase, build_int_cst (utype, 0),
4700 &symbol_present, &var_present, &offset,
4701 depends_on);
4702 cost.cost /= avg_loop_niter (data->current_loop);
4703 }
4704 else if (ratio == 1)
4705 {
4706 tree real_cbase = cbase;
4707
4708 /* Check to see if any adjustment is needed. */
4709 if (cstepi == 0 && stmt_is_after_inc)
4710 {
4711 aff_tree real_cbase_aff;
4712 aff_tree cstep_aff;
4713
4714 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4715 &real_cbase_aff);
4716 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4717
4718 aff_combination_add (&real_cbase_aff, &cstep_aff);
4719 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4720 }
4721
4722 cost = difference_cost (data,
4723 ubase, real_cbase,
4724 &symbol_present, &var_present, &offset,
4725 depends_on);
4726 cost.cost /= avg_loop_niter (data->current_loop);
4727 }
4728 else if (address_p
4729 && !POINTER_TYPE_P (ctype)
4730 && multiplier_allowed_in_address_p
4731 (ratio, mem_mode,
4732 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4733 {
4734 if (cstepi == 0 && stmt_is_after_inc)
4735 {
4736 if (POINTER_TYPE_P (ctype))
4737 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4738 else
4739 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4740 }
4741 cbase
4742 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4743 cost = difference_cost (data,
4744 ubase, cbase,
4745 &symbol_present, &var_present, &offset,
4746 depends_on);
4747 cost.cost /= avg_loop_niter (data->current_loop);
4748 }
4749 else
4750 {
4751 cost = force_var_cost (data, cbase, depends_on);
4752 cost = add_costs (cost,
4753 difference_cost (data,
4754 ubase, build_int_cst (utype, 0),
4755 &symbol_present, &var_present,
4756 &offset, depends_on));
4757 cost.cost /= avg_loop_niter (data->current_loop);
4758 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4759 }
4760
4761 /* Set of invariants depended on by sub use has already been computed
4762 for the first use in the group. */
4763 if (use->sub_id)
4764 {
4765 cost.cost = 0;
4766 if (depends_on && *depends_on)
4767 bitmap_clear (*depends_on);
4768 }
4769 else if (inv_expr_id)
4770 {
4771 *inv_expr_id =
4772 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4773 /* Clear depends on. */
4774 if (*inv_expr_id != -1 && depends_on && *depends_on)
4775 bitmap_clear (*depends_on);
4776 }
4777
4778 /* If we are after the increment, the value of the candidate is higher by
4779 one iteration. */
4780 if (stmt_is_after_inc)
4781 offset -= ratio * cstepi;
4782
4783 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4784 (symbol/var1/const parts may be omitted). If we are looking for an
4785 address, find the cost of addressing this. */
4786 if (address_p)
4787 return add_costs (cost,
4788 get_address_cost (symbol_present, var_present,
4789 offset, ratio, cstepi,
4790 mem_mode,
4791 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4792 speed, stmt_is_after_inc,
4793 can_autoinc));
4794
4795 /* Otherwise estimate the costs for computing the expression. */
4796 if (!symbol_present && !var_present && !offset)
4797 {
4798 if (ratio != 1)
4799 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4800 return cost;
4801 }
4802
4803 /* Symbol + offset should be compile-time computable so consider that they
4804 are added once to the variable, if present. */
4805 if (var_present && (symbol_present || offset))
4806 cost.cost += adjust_setup_cost (data,
4807 add_cost (speed, TYPE_MODE (ctype)));
4808
4809 /* Having offset does not affect runtime cost in case it is added to
4810 symbol, but it increases complexity. */
4811 if (offset)
4812 cost.complexity++;
4813
4814 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4815
4816 aratio = ratio > 0 ? ratio : -ratio;
4817 if (aratio != 1)
4818 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4819 return cost;
4820
4821 fallback:
4822 if (can_autoinc)
4823 *can_autoinc = false;
4824
4825 {
4826 /* Just get the expression, expand it and measure the cost. */
4827 tree comp = get_computation_at (data->current_loop, use, cand, at);
4828
4829 if (!comp)
4830 return infinite_cost;
4831
4832 if (address_p)
4833 comp = build_simple_mem_ref (comp);
4834
4835 return new_cost (computation_cost (comp, speed), 0);
4836 }
4837 }
4838
4839 /* Determines the cost of the computation by that USE is expressed
4840 from induction variable CAND. If ADDRESS_P is true, we just need
4841 to create an address from it, otherwise we want to get it into
4842 register. A set of invariants we depend on is stored in
4843 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4844 autoinc addressing is likely. */
4845
4846 static comp_cost
4847 get_computation_cost (struct ivopts_data *data,
4848 struct iv_use *use, struct iv_cand *cand,
4849 bool address_p, bitmap *depends_on,
4850 bool *can_autoinc, int *inv_expr_id)
4851 {
4852 return get_computation_cost_at (data,
4853 use, cand, address_p, depends_on, use->stmt,
4854 can_autoinc, inv_expr_id);
4855 }
4856
4857 /* Determines cost of basing replacement of USE on CAND in a generic
4858 expression. */
4859
4860 static bool
4861 determine_use_iv_cost_generic (struct ivopts_data *data,
4862 struct iv_use *use, struct iv_cand *cand)
4863 {
4864 bitmap depends_on;
4865 comp_cost cost;
4866 int inv_expr_id = -1;
4867
4868 /* The simple case first -- if we need to express value of the preserved
4869 original biv, the cost is 0. This also prevents us from counting the
4870 cost of increment twice -- once at this use and once in the cost of
4871 the candidate. */
4872 if (cand->pos == IP_ORIGINAL
4873 && cand->incremented_at == use->stmt)
4874 {
4875 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4876 ERROR_MARK, -1);
4877 return true;
4878 }
4879
4880 cost = get_computation_cost (data, use, cand, false, &depends_on,
4881 NULL, &inv_expr_id);
4882
4883 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4884 inv_expr_id);
4885
4886 return !infinite_cost_p (cost);
4887 }
4888
4889 /* Determines cost of basing replacement of USE on CAND in an address. */
4890
4891 static bool
4892 determine_use_iv_cost_address (struct ivopts_data *data,
4893 struct iv_use *use, struct iv_cand *cand)
4894 {
4895 bitmap depends_on;
4896 bool can_autoinc;
4897 int inv_expr_id = -1;
4898 struct iv_use *sub_use;
4899 comp_cost sub_cost;
4900 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4901 &can_autoinc, &inv_expr_id);
4902
4903 if (cand->ainc_use == use)
4904 {
4905 if (can_autoinc)
4906 cost.cost -= cand->cost_step;
4907 /* If we generated the candidate solely for exploiting autoincrement
4908 opportunities, and it turns out it can't be used, set the cost to
4909 infinity to make sure we ignore it. */
4910 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4911 cost = infinite_cost;
4912 }
4913 for (sub_use = use->next;
4914 sub_use && !infinite_cost_p (cost);
4915 sub_use = sub_use->next)
4916 {
4917 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
4918 &can_autoinc, NULL);
4919 cost = add_costs (cost, sub_cost);
4920 }
4921
4922 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4923 inv_expr_id);
4924
4925 return !infinite_cost_p (cost);
4926 }
4927
4928 /* Computes value of candidate CAND at position AT in iteration NITER, and
4929 stores it to VAL. */
4930
4931 static void
4932 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4933 aff_tree *val)
4934 {
4935 aff_tree step, delta, nit;
4936 struct iv *iv = cand->iv;
4937 tree type = TREE_TYPE (iv->base);
4938 tree steptype = type;
4939 if (POINTER_TYPE_P (type))
4940 steptype = sizetype;
4941 steptype = unsigned_type_for (type);
4942
4943 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4944 aff_combination_convert (&step, steptype);
4945 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4946 aff_combination_convert (&nit, steptype);
4947 aff_combination_mult (&nit, &step, &delta);
4948 if (stmt_after_increment (loop, cand, at))
4949 aff_combination_add (&delta, &step);
4950
4951 tree_to_aff_combination (iv->base, type, val);
4952 if (!POINTER_TYPE_P (type))
4953 aff_combination_convert (val, steptype);
4954 aff_combination_add (val, &delta);
4955 }
4956
4957 /* Returns period of induction variable iv. */
4958
4959 static tree
4960 iv_period (struct iv *iv)
4961 {
4962 tree step = iv->step, period, type;
4963 tree pow2div;
4964
4965 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4966
4967 type = unsigned_type_for (TREE_TYPE (step));
4968 /* Period of the iv is lcm (step, type_range)/step -1,
4969 i.e., N*type_range/step - 1. Since type range is power
4970 of two, N == (step >> num_of_ending_zeros_binary (step),
4971 so the final result is
4972
4973 (type_range >> num_of_ending_zeros_binary (step)) - 1
4974
4975 */
4976 pow2div = num_ending_zeros (step);
4977
4978 period = build_low_bits_mask (type,
4979 (TYPE_PRECISION (type)
4980 - tree_to_uhwi (pow2div)));
4981
4982 return period;
4983 }
4984
4985 /* Returns the comparison operator used when eliminating the iv USE. */
4986
4987 static enum tree_code
4988 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4989 {
4990 struct loop *loop = data->current_loop;
4991 basic_block ex_bb;
4992 edge exit;
4993
4994 ex_bb = gimple_bb (use->stmt);
4995 exit = EDGE_SUCC (ex_bb, 0);
4996 if (flow_bb_inside_loop_p (loop, exit->dest))
4997 exit = EDGE_SUCC (ex_bb, 1);
4998
4999 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5000 }
5001
5002 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5003 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5004 calculation is performed in non-wrapping type.
5005
5006 TODO: More generally, we could test for the situation that
5007 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5008 This would require knowing the sign of OFFSET. */
5009
5010 static bool
5011 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5012 {
5013 enum tree_code code;
5014 tree e1, e2;
5015 aff_tree aff_e1, aff_e2, aff_offset;
5016
5017 if (!nowrap_type_p (TREE_TYPE (base)))
5018 return false;
5019
5020 base = expand_simple_operations (base);
5021
5022 if (TREE_CODE (base) == SSA_NAME)
5023 {
5024 gimple *stmt = SSA_NAME_DEF_STMT (base);
5025
5026 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5027 return false;
5028
5029 code = gimple_assign_rhs_code (stmt);
5030 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5031 return false;
5032
5033 e1 = gimple_assign_rhs1 (stmt);
5034 e2 = gimple_assign_rhs2 (stmt);
5035 }
5036 else
5037 {
5038 code = TREE_CODE (base);
5039 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5040 return false;
5041 e1 = TREE_OPERAND (base, 0);
5042 e2 = TREE_OPERAND (base, 1);
5043 }
5044
5045 /* Use affine expansion as deeper inspection to prove the equality. */
5046 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5047 &aff_e2, &data->name_expansion_cache);
5048 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5049 &aff_offset, &data->name_expansion_cache);
5050 aff_combination_scale (&aff_offset, -1);
5051 switch (code)
5052 {
5053 case PLUS_EXPR:
5054 aff_combination_add (&aff_e2, &aff_offset);
5055 if (aff_combination_zero_p (&aff_e2))
5056 return true;
5057
5058 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5059 &aff_e1, &data->name_expansion_cache);
5060 aff_combination_add (&aff_e1, &aff_offset);
5061 return aff_combination_zero_p (&aff_e1);
5062
5063 case POINTER_PLUS_EXPR:
5064 aff_combination_add (&aff_e2, &aff_offset);
5065 return aff_combination_zero_p (&aff_e2);
5066
5067 default:
5068 return false;
5069 }
5070 }
5071
5072 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5073 comparison with CAND. NITER describes the number of iterations of
5074 the loops. If successful, the comparison in COMP_P is altered accordingly.
5075
5076 We aim to handle the following situation:
5077
5078 sometype *base, *p;
5079 int a, b, i;
5080
5081 i = a;
5082 p = p_0 = base + a;
5083
5084 do
5085 {
5086 bla (*p);
5087 p++;
5088 i++;
5089 }
5090 while (i < b);
5091
5092 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5093 We aim to optimize this to
5094
5095 p = p_0 = base + a;
5096 do
5097 {
5098 bla (*p);
5099 p++;
5100 }
5101 while (p < p_0 - a + b);
5102
5103 This preserves the correctness, since the pointer arithmetics does not
5104 overflow. More precisely:
5105
5106 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5107 overflow in computing it or the values of p.
5108 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5109 overflow. To prove this, we use the fact that p_0 = base + a. */
5110
5111 static bool
5112 iv_elimination_compare_lt (struct ivopts_data *data,
5113 struct iv_cand *cand, enum tree_code *comp_p,
5114 struct tree_niter_desc *niter)
5115 {
5116 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5117 struct aff_tree nit, tmpa, tmpb;
5118 enum tree_code comp;
5119 HOST_WIDE_INT step;
5120
5121 /* We need to know that the candidate induction variable does not overflow.
5122 While more complex analysis may be used to prove this, for now just
5123 check that the variable appears in the original program and that it
5124 is computed in a type that guarantees no overflows. */
5125 cand_type = TREE_TYPE (cand->iv->base);
5126 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5127 return false;
5128
5129 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5130 the calculation of the BOUND could overflow, making the comparison
5131 invalid. */
5132 if (!data->loop_single_exit_p)
5133 return false;
5134
5135 /* We need to be able to decide whether candidate is increasing or decreasing
5136 in order to choose the right comparison operator. */
5137 if (!cst_and_fits_in_hwi (cand->iv->step))
5138 return false;
5139 step = int_cst_value (cand->iv->step);
5140
5141 /* Check that the number of iterations matches the expected pattern:
5142 a + 1 > b ? 0 : b - a - 1. */
5143 mbz = niter->may_be_zero;
5144 if (TREE_CODE (mbz) == GT_EXPR)
5145 {
5146 /* Handle a + 1 > b. */
5147 tree op0 = TREE_OPERAND (mbz, 0);
5148 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5149 {
5150 a = TREE_OPERAND (op0, 0);
5151 b = TREE_OPERAND (mbz, 1);
5152 }
5153 else
5154 return false;
5155 }
5156 else if (TREE_CODE (mbz) == LT_EXPR)
5157 {
5158 tree op1 = TREE_OPERAND (mbz, 1);
5159
5160 /* Handle b < a + 1. */
5161 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5162 {
5163 a = TREE_OPERAND (op1, 0);
5164 b = TREE_OPERAND (mbz, 0);
5165 }
5166 else
5167 return false;
5168 }
5169 else
5170 return false;
5171
5172 /* Expected number of iterations is B - A - 1. Check that it matches
5173 the actual number, i.e., that B - A - NITER = 1. */
5174 tree_to_aff_combination (niter->niter, nit_type, &nit);
5175 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5176 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5177 aff_combination_scale (&nit, -1);
5178 aff_combination_scale (&tmpa, -1);
5179 aff_combination_add (&tmpb, &tmpa);
5180 aff_combination_add (&tmpb, &nit);
5181 if (tmpb.n != 0 || tmpb.offset != 1)
5182 return false;
5183
5184 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5185 overflow. */
5186 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5187 cand->iv->step,
5188 fold_convert (TREE_TYPE (cand->iv->step), a));
5189 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5190 return false;
5191
5192 /* Determine the new comparison operator. */
5193 comp = step < 0 ? GT_EXPR : LT_EXPR;
5194 if (*comp_p == NE_EXPR)
5195 *comp_p = comp;
5196 else if (*comp_p == EQ_EXPR)
5197 *comp_p = invert_tree_comparison (comp, false);
5198 else
5199 gcc_unreachable ();
5200
5201 return true;
5202 }
5203
5204 /* Check whether it is possible to express the condition in USE by comparison
5205 of candidate CAND. If so, store the value compared with to BOUND, and the
5206 comparison operator to COMP. */
5207
5208 static bool
5209 may_eliminate_iv (struct ivopts_data *data,
5210 struct iv_use *use, struct iv_cand *cand, tree *bound,
5211 enum tree_code *comp)
5212 {
5213 basic_block ex_bb;
5214 edge exit;
5215 tree period;
5216 struct loop *loop = data->current_loop;
5217 aff_tree bnd;
5218 struct tree_niter_desc *desc = NULL;
5219
5220 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5221 return false;
5222
5223 /* For now works only for exits that dominate the loop latch.
5224 TODO: extend to other conditions inside loop body. */
5225 ex_bb = gimple_bb (use->stmt);
5226 if (use->stmt != last_stmt (ex_bb)
5227 || gimple_code (use->stmt) != GIMPLE_COND
5228 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5229 return false;
5230
5231 exit = EDGE_SUCC (ex_bb, 0);
5232 if (flow_bb_inside_loop_p (loop, exit->dest))
5233 exit = EDGE_SUCC (ex_bb, 1);
5234 if (flow_bb_inside_loop_p (loop, exit->dest))
5235 return false;
5236
5237 desc = niter_for_exit (data, exit);
5238 if (!desc)
5239 return false;
5240
5241 /* Determine whether we can use the variable to test the exit condition.
5242 This is the case iff the period of the induction variable is greater
5243 than the number of iterations for which the exit condition is true. */
5244 period = iv_period (cand->iv);
5245
5246 /* If the number of iterations is constant, compare against it directly. */
5247 if (TREE_CODE (desc->niter) == INTEGER_CST)
5248 {
5249 /* See cand_value_at. */
5250 if (stmt_after_increment (loop, cand, use->stmt))
5251 {
5252 if (!tree_int_cst_lt (desc->niter, period))
5253 return false;
5254 }
5255 else
5256 {
5257 if (tree_int_cst_lt (period, desc->niter))
5258 return false;
5259 }
5260 }
5261
5262 /* If not, and if this is the only possible exit of the loop, see whether
5263 we can get a conservative estimate on the number of iterations of the
5264 entire loop and compare against that instead. */
5265 else
5266 {
5267 widest_int period_value, max_niter;
5268
5269 max_niter = desc->max;
5270 if (stmt_after_increment (loop, cand, use->stmt))
5271 max_niter += 1;
5272 period_value = wi::to_widest (period);
5273 if (wi::gtu_p (max_niter, period_value))
5274 {
5275 /* See if we can take advantage of inferred loop bound information. */
5276 if (data->loop_single_exit_p)
5277 {
5278 if (!max_loop_iterations (loop, &max_niter))
5279 return false;
5280 /* The loop bound is already adjusted by adding 1. */
5281 if (wi::gtu_p (max_niter, period_value))
5282 return false;
5283 }
5284 else
5285 return false;
5286 }
5287 }
5288
5289 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5290
5291 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5292 aff_combination_to_tree (&bnd));
5293 *comp = iv_elimination_compare (data, use);
5294
5295 /* It is unlikely that computing the number of iterations using division
5296 would be more profitable than keeping the original induction variable. */
5297 if (expression_expensive_p (*bound))
5298 return false;
5299
5300 /* Sometimes, it is possible to handle the situation that the number of
5301 iterations may be zero unless additional assumtions by using <
5302 instead of != in the exit condition.
5303
5304 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5305 base the exit condition on it. However, that is often too
5306 expensive. */
5307 if (!integer_zerop (desc->may_be_zero))
5308 return iv_elimination_compare_lt (data, cand, comp, desc);
5309
5310 return true;
5311 }
5312
5313 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5314 be copied, if it is used in the loop body and DATA->body_includes_call. */
5315
5316 static int
5317 parm_decl_cost (struct ivopts_data *data, tree bound)
5318 {
5319 tree sbound = bound;
5320 STRIP_NOPS (sbound);
5321
5322 if (TREE_CODE (sbound) == SSA_NAME
5323 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5324 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5325 && data->body_includes_call)
5326 return COSTS_N_INSNS (1);
5327
5328 return 0;
5329 }
5330
5331 /* Determines cost of basing replacement of USE on CAND in a condition. */
5332
5333 static bool
5334 determine_use_iv_cost_condition (struct ivopts_data *data,
5335 struct iv_use *use, struct iv_cand *cand)
5336 {
5337 tree bound = NULL_TREE;
5338 struct iv *cmp_iv;
5339 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5340 comp_cost elim_cost, express_cost, cost, bound_cost;
5341 bool ok;
5342 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5343 tree *control_var, *bound_cst;
5344 enum tree_code comp = ERROR_MARK;
5345
5346 /* Only consider real candidates. */
5347 if (!cand->iv)
5348 {
5349 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5350 ERROR_MARK, -1);
5351 return false;
5352 }
5353
5354 /* Try iv elimination. */
5355 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5356 {
5357 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5358 if (elim_cost.cost == 0)
5359 elim_cost.cost = parm_decl_cost (data, bound);
5360 else if (TREE_CODE (bound) == INTEGER_CST)
5361 elim_cost.cost = 0;
5362 /* If we replace a loop condition 'i < n' with 'p < base + n',
5363 depends_on_elim will have 'base' and 'n' set, which implies
5364 that both 'base' and 'n' will be live during the loop. More likely,
5365 'base + n' will be loop invariant, resulting in only one live value
5366 during the loop. So in that case we clear depends_on_elim and set
5367 elim_inv_expr_id instead. */
5368 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5369 {
5370 elim_inv_expr_id = get_expr_id (data, bound);
5371 bitmap_clear (depends_on_elim);
5372 }
5373 /* The bound is a loop invariant, so it will be only computed
5374 once. */
5375 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5376 }
5377 else
5378 elim_cost = infinite_cost;
5379
5380 /* Try expressing the original giv. If it is compared with an invariant,
5381 note that we cannot get rid of it. */
5382 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5383 NULL, &cmp_iv);
5384 gcc_assert (ok);
5385
5386 /* When the condition is a comparison of the candidate IV against
5387 zero, prefer this IV.
5388
5389 TODO: The constant that we're subtracting from the cost should
5390 be target-dependent. This information should be added to the
5391 target costs for each backend. */
5392 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5393 && integer_zerop (*bound_cst)
5394 && (operand_equal_p (*control_var, cand->var_after, 0)
5395 || operand_equal_p (*control_var, cand->var_before, 0)))
5396 elim_cost.cost -= 1;
5397
5398 express_cost = get_computation_cost (data, use, cand, false,
5399 &depends_on_express, NULL,
5400 &express_inv_expr_id);
5401 fd_ivopts_data = data;
5402 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5403
5404 /* Count the cost of the original bound as well. */
5405 bound_cost = force_var_cost (data, *bound_cst, NULL);
5406 if (bound_cost.cost == 0)
5407 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5408 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5409 bound_cost.cost = 0;
5410 express_cost.cost += bound_cost.cost;
5411
5412 /* Choose the better approach, preferring the eliminated IV. */
5413 if (compare_costs (elim_cost, express_cost) <= 0)
5414 {
5415 cost = elim_cost;
5416 depends_on = depends_on_elim;
5417 depends_on_elim = NULL;
5418 inv_expr_id = elim_inv_expr_id;
5419 }
5420 else
5421 {
5422 cost = express_cost;
5423 depends_on = depends_on_express;
5424 depends_on_express = NULL;
5425 bound = NULL_TREE;
5426 comp = ERROR_MARK;
5427 inv_expr_id = express_inv_expr_id;
5428 }
5429
5430 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5431
5432 if (depends_on_elim)
5433 BITMAP_FREE (depends_on_elim);
5434 if (depends_on_express)
5435 BITMAP_FREE (depends_on_express);
5436
5437 return !infinite_cost_p (cost);
5438 }
5439
5440 /* Determines cost of basing replacement of USE on CAND. Returns false
5441 if USE cannot be based on CAND. */
5442
5443 static bool
5444 determine_use_iv_cost (struct ivopts_data *data,
5445 struct iv_use *use, struct iv_cand *cand)
5446 {
5447 switch (use->type)
5448 {
5449 case USE_NONLINEAR_EXPR:
5450 return determine_use_iv_cost_generic (data, use, cand);
5451
5452 case USE_ADDRESS:
5453 return determine_use_iv_cost_address (data, use, cand);
5454
5455 case USE_COMPARE:
5456 return determine_use_iv_cost_condition (data, use, cand);
5457
5458 default:
5459 gcc_unreachable ();
5460 }
5461 }
5462
5463 /* Return true if get_computation_cost indicates that autoincrement is
5464 a possibility for the pair of USE and CAND, false otherwise. */
5465
5466 static bool
5467 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5468 struct iv_cand *cand)
5469 {
5470 bitmap depends_on;
5471 bool can_autoinc;
5472 comp_cost cost;
5473
5474 if (use->type != USE_ADDRESS)
5475 return false;
5476
5477 cost = get_computation_cost (data, use, cand, true, &depends_on,
5478 &can_autoinc, NULL);
5479
5480 BITMAP_FREE (depends_on);
5481
5482 return !infinite_cost_p (cost) && can_autoinc;
5483 }
5484
5485 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5486 use that allows autoincrement, and set their AINC_USE if possible. */
5487
5488 static void
5489 set_autoinc_for_original_candidates (struct ivopts_data *data)
5490 {
5491 unsigned i, j;
5492
5493 for (i = 0; i < n_iv_cands (data); i++)
5494 {
5495 struct iv_cand *cand = iv_cand (data, i);
5496 struct iv_use *closest_before = NULL;
5497 struct iv_use *closest_after = NULL;
5498 if (cand->pos != IP_ORIGINAL)
5499 continue;
5500
5501 for (j = 0; j < n_iv_uses (data); j++)
5502 {
5503 struct iv_use *use = iv_use (data, j);
5504 unsigned uid = gimple_uid (use->stmt);
5505
5506 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5507 continue;
5508
5509 if (uid < gimple_uid (cand->incremented_at)
5510 && (closest_before == NULL
5511 || uid > gimple_uid (closest_before->stmt)))
5512 closest_before = use;
5513
5514 if (uid > gimple_uid (cand->incremented_at)
5515 && (closest_after == NULL
5516 || uid < gimple_uid (closest_after->stmt)))
5517 closest_after = use;
5518 }
5519
5520 if (closest_before != NULL
5521 && autoinc_possible_for_pair (data, closest_before, cand))
5522 cand->ainc_use = closest_before;
5523 else if (closest_after != NULL
5524 && autoinc_possible_for_pair (data, closest_after, cand))
5525 cand->ainc_use = closest_after;
5526 }
5527 }
5528
5529 /* Finds the candidates for the induction variables. */
5530
5531 static void
5532 find_iv_candidates (struct ivopts_data *data)
5533 {
5534 /* Add commonly used ivs. */
5535 add_standard_iv_candidates (data);
5536
5537 /* Add old induction variables. */
5538 add_iv_candidate_for_bivs (data);
5539
5540 /* Add induction variables derived from uses. */
5541 add_iv_candidate_for_uses (data);
5542
5543 set_autoinc_for_original_candidates (data);
5544
5545 /* Record the important candidates. */
5546 record_important_candidates (data);
5547 }
5548
5549 /* Determines costs of basing the use of the iv on an iv candidate. */
5550
5551 static void
5552 determine_use_iv_costs (struct ivopts_data *data)
5553 {
5554 unsigned i, j;
5555 struct iv_use *use;
5556 struct iv_cand *cand;
5557 bitmap to_clear = BITMAP_ALLOC (NULL);
5558
5559 alloc_use_cost_map (data);
5560
5561 for (i = 0; i < n_iv_uses (data); i++)
5562 {
5563 use = iv_use (data, i);
5564
5565 if (data->consider_all_candidates)
5566 {
5567 for (j = 0; j < n_iv_cands (data); j++)
5568 {
5569 cand = iv_cand (data, j);
5570 determine_use_iv_cost (data, use, cand);
5571 }
5572 }
5573 else
5574 {
5575 bitmap_iterator bi;
5576
5577 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5578 {
5579 cand = iv_cand (data, j);
5580 if (!determine_use_iv_cost (data, use, cand))
5581 bitmap_set_bit (to_clear, j);
5582 }
5583
5584 /* Remove the candidates for that the cost is infinite from
5585 the list of related candidates. */
5586 bitmap_and_compl_into (use->related_cands, to_clear);
5587 bitmap_clear (to_clear);
5588 }
5589 }
5590
5591 BITMAP_FREE (to_clear);
5592
5593 if (dump_file && (dump_flags & TDF_DETAILS))
5594 {
5595 fprintf (dump_file, "Use-candidate costs:\n");
5596
5597 for (i = 0; i < n_iv_uses (data); i++)
5598 {
5599 use = iv_use (data, i);
5600
5601 fprintf (dump_file, "Use %d:\n", i);
5602 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5603 for (j = 0; j < use->n_map_members; j++)
5604 {
5605 if (!use->cost_map[j].cand
5606 || infinite_cost_p (use->cost_map[j].cost))
5607 continue;
5608
5609 fprintf (dump_file, " %d\t%d\t%d\t",
5610 use->cost_map[j].cand->id,
5611 use->cost_map[j].cost.cost,
5612 use->cost_map[j].cost.complexity);
5613 if (use->cost_map[j].depends_on)
5614 bitmap_print (dump_file,
5615 use->cost_map[j].depends_on, "","");
5616 if (use->cost_map[j].inv_expr_id != -1)
5617 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5618 fprintf (dump_file, "\n");
5619 }
5620
5621 fprintf (dump_file, "\n");
5622 }
5623 fprintf (dump_file, "\n");
5624 }
5625 }
5626
5627 /* Determines cost of the candidate CAND. */
5628
5629 static void
5630 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5631 {
5632 comp_cost cost_base;
5633 unsigned cost, cost_step;
5634 tree base;
5635
5636 if (!cand->iv)
5637 {
5638 cand->cost = 0;
5639 return;
5640 }
5641
5642 /* There are two costs associated with the candidate -- its increment
5643 and its initialization. The second is almost negligible for any loop
5644 that rolls enough, so we take it just very little into account. */
5645
5646 base = cand->iv->base;
5647 cost_base = force_var_cost (data, base, NULL);
5648 /* It will be exceptional that the iv register happens to be initialized with
5649 the proper value at no cost. In general, there will at least be a regcopy
5650 or a const set. */
5651 if (cost_base.cost == 0)
5652 cost_base.cost = COSTS_N_INSNS (1);
5653 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5654
5655 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5656
5657 /* Prefer the original ivs unless we may gain something by replacing it.
5658 The reason is to make debugging simpler; so this is not relevant for
5659 artificial ivs created by other optimization passes. */
5660 if (cand->pos != IP_ORIGINAL
5661 || !SSA_NAME_VAR (cand->var_before)
5662 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5663 cost++;
5664
5665 /* Prefer not to insert statements into latch unless there are some
5666 already (so that we do not create unnecessary jumps). */
5667 if (cand->pos == IP_END
5668 && empty_block_p (ip_end_pos (data->current_loop)))
5669 cost++;
5670
5671 cand->cost = cost;
5672 cand->cost_step = cost_step;
5673 }
5674
5675 /* Determines costs of computation of the candidates. */
5676
5677 static void
5678 determine_iv_costs (struct ivopts_data *data)
5679 {
5680 unsigned i;
5681
5682 if (dump_file && (dump_flags & TDF_DETAILS))
5683 {
5684 fprintf (dump_file, "Candidate costs:\n");
5685 fprintf (dump_file, " cand\tcost\n");
5686 }
5687
5688 for (i = 0; i < n_iv_cands (data); i++)
5689 {
5690 struct iv_cand *cand = iv_cand (data, i);
5691
5692 determine_iv_cost (data, cand);
5693
5694 if (dump_file && (dump_flags & TDF_DETAILS))
5695 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5696 }
5697
5698 if (dump_file && (dump_flags & TDF_DETAILS))
5699 fprintf (dump_file, "\n");
5700 }
5701
5702 /* Calculates cost for having SIZE induction variables. */
5703
5704 static unsigned
5705 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5706 {
5707 /* We add size to the cost, so that we prefer eliminating ivs
5708 if possible. */
5709 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5710 data->body_includes_call);
5711 }
5712
5713 /* For each size of the induction variable set determine the penalty. */
5714
5715 static void
5716 determine_set_costs (struct ivopts_data *data)
5717 {
5718 unsigned j, n;
5719 gphi *phi;
5720 gphi_iterator psi;
5721 tree op;
5722 struct loop *loop = data->current_loop;
5723 bitmap_iterator bi;
5724
5725 if (dump_file && (dump_flags & TDF_DETAILS))
5726 {
5727 fprintf (dump_file, "Global costs:\n");
5728 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5729 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5730 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5731 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5732 }
5733
5734 n = 0;
5735 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5736 {
5737 phi = psi.phi ();
5738 op = PHI_RESULT (phi);
5739
5740 if (virtual_operand_p (op))
5741 continue;
5742
5743 if (get_iv (data, op))
5744 continue;
5745
5746 n++;
5747 }
5748
5749 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5750 {
5751 struct version_info *info = ver_info (data, j);
5752
5753 if (info->inv_id && info->has_nonlin_use)
5754 n++;
5755 }
5756
5757 data->regs_used = n;
5758 if (dump_file && (dump_flags & TDF_DETAILS))
5759 fprintf (dump_file, " regs_used %d\n", n);
5760
5761 if (dump_file && (dump_flags & TDF_DETAILS))
5762 {
5763 fprintf (dump_file, " cost for size:\n");
5764 fprintf (dump_file, " ivs\tcost\n");
5765 for (j = 0; j <= 2 * target_avail_regs; j++)
5766 fprintf (dump_file, " %d\t%d\n", j,
5767 ivopts_global_cost_for_size (data, j));
5768 fprintf (dump_file, "\n");
5769 }
5770 }
5771
5772 /* Returns true if A is a cheaper cost pair than B. */
5773
5774 static bool
5775 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5776 {
5777 int cmp;
5778
5779 if (!a)
5780 return false;
5781
5782 if (!b)
5783 return true;
5784
5785 cmp = compare_costs (a->cost, b->cost);
5786 if (cmp < 0)
5787 return true;
5788
5789 if (cmp > 0)
5790 return false;
5791
5792 /* In case the costs are the same, prefer the cheaper candidate. */
5793 if (a->cand->cost < b->cand->cost)
5794 return true;
5795
5796 return false;
5797 }
5798
5799
5800 /* Returns candidate by that USE is expressed in IVS. */
5801
5802 static struct cost_pair *
5803 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5804 {
5805 return ivs->cand_for_use[use->id];
5806 }
5807
5808 /* Computes the cost field of IVS structure. */
5809
5810 static void
5811 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5812 {
5813 comp_cost cost = ivs->cand_use_cost;
5814
5815 cost.cost += ivs->cand_cost;
5816
5817 cost.cost += ivopts_global_cost_for_size (data,
5818 ivs->n_regs + ivs->num_used_inv_expr);
5819
5820 ivs->cost = cost;
5821 }
5822
5823 /* Remove invariants in set INVS to set IVS. */
5824
5825 static void
5826 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5827 {
5828 bitmap_iterator bi;
5829 unsigned iid;
5830
5831 if (!invs)
5832 return;
5833
5834 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5835 {
5836 ivs->n_invariant_uses[iid]--;
5837 if (ivs->n_invariant_uses[iid] == 0)
5838 ivs->n_regs--;
5839 }
5840 }
5841
5842 /* Set USE not to be expressed by any candidate in IVS. */
5843
5844 static void
5845 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5846 struct iv_use *use)
5847 {
5848 unsigned uid = use->id, cid;
5849 struct cost_pair *cp;
5850
5851 cp = ivs->cand_for_use[uid];
5852 if (!cp)
5853 return;
5854 cid = cp->cand->id;
5855
5856 ivs->bad_uses++;
5857 ivs->cand_for_use[uid] = NULL;
5858 ivs->n_cand_uses[cid]--;
5859
5860 if (ivs->n_cand_uses[cid] == 0)
5861 {
5862 bitmap_clear_bit (ivs->cands, cid);
5863 /* Do not count the pseudocandidates. */
5864 if (cp->cand->iv)
5865 ivs->n_regs--;
5866 ivs->n_cands--;
5867 ivs->cand_cost -= cp->cand->cost;
5868
5869 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5870 }
5871
5872 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5873
5874 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5875
5876 if (cp->inv_expr_id != -1)
5877 {
5878 ivs->used_inv_expr[cp->inv_expr_id]--;
5879 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5880 ivs->num_used_inv_expr--;
5881 }
5882 iv_ca_recount_cost (data, ivs);
5883 }
5884
5885 /* Add invariants in set INVS to set IVS. */
5886
5887 static void
5888 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5889 {
5890 bitmap_iterator bi;
5891 unsigned iid;
5892
5893 if (!invs)
5894 return;
5895
5896 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5897 {
5898 ivs->n_invariant_uses[iid]++;
5899 if (ivs->n_invariant_uses[iid] == 1)
5900 ivs->n_regs++;
5901 }
5902 }
5903
5904 /* Set cost pair for USE in set IVS to CP. */
5905
5906 static void
5907 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5908 struct iv_use *use, struct cost_pair *cp)
5909 {
5910 unsigned uid = use->id, cid;
5911
5912 if (ivs->cand_for_use[uid] == cp)
5913 return;
5914
5915 if (ivs->cand_for_use[uid])
5916 iv_ca_set_no_cp (data, ivs, use);
5917
5918 if (cp)
5919 {
5920 cid = cp->cand->id;
5921
5922 ivs->bad_uses--;
5923 ivs->cand_for_use[uid] = cp;
5924 ivs->n_cand_uses[cid]++;
5925 if (ivs->n_cand_uses[cid] == 1)
5926 {
5927 bitmap_set_bit (ivs->cands, cid);
5928 /* Do not count the pseudocandidates. */
5929 if (cp->cand->iv)
5930 ivs->n_regs++;
5931 ivs->n_cands++;
5932 ivs->cand_cost += cp->cand->cost;
5933
5934 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5935 }
5936
5937 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5938 iv_ca_set_add_invariants (ivs, cp->depends_on);
5939
5940 if (cp->inv_expr_id != -1)
5941 {
5942 ivs->used_inv_expr[cp->inv_expr_id]++;
5943 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5944 ivs->num_used_inv_expr++;
5945 }
5946 iv_ca_recount_cost (data, ivs);
5947 }
5948 }
5949
5950 /* Extend set IVS by expressing USE by some of the candidates in it
5951 if possible. Consider all important candidates if candidates in
5952 set IVS don't give any result. */
5953
5954 static void
5955 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5956 struct iv_use *use)
5957 {
5958 struct cost_pair *best_cp = NULL, *cp;
5959 bitmap_iterator bi;
5960 unsigned i;
5961 struct iv_cand *cand;
5962
5963 gcc_assert (ivs->upto >= use->id);
5964 ivs->upto++;
5965 ivs->bad_uses++;
5966
5967 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5968 {
5969 cand = iv_cand (data, i);
5970 cp = get_use_iv_cost (data, use, cand);
5971 if (cheaper_cost_pair (cp, best_cp))
5972 best_cp = cp;
5973 }
5974
5975 if (best_cp == NULL)
5976 {
5977 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5978 {
5979 cand = iv_cand (data, i);
5980 cp = get_use_iv_cost (data, use, cand);
5981 if (cheaper_cost_pair (cp, best_cp))
5982 best_cp = cp;
5983 }
5984 }
5985
5986 iv_ca_set_cp (data, ivs, use, best_cp);
5987 }
5988
5989 /* Get cost for assignment IVS. */
5990
5991 static comp_cost
5992 iv_ca_cost (struct iv_ca *ivs)
5993 {
5994 /* This was a conditional expression but it triggered a bug in
5995 Sun C 5.5. */
5996 if (ivs->bad_uses)
5997 return infinite_cost;
5998 else
5999 return ivs->cost;
6000 }
6001
6002 /* Returns true if all dependences of CP are among invariants in IVS. */
6003
6004 static bool
6005 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6006 {
6007 unsigned i;
6008 bitmap_iterator bi;
6009
6010 if (!cp->depends_on)
6011 return true;
6012
6013 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6014 {
6015 if (ivs->n_invariant_uses[i] == 0)
6016 return false;
6017 }
6018
6019 return true;
6020 }
6021
6022 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6023 it before NEXT_CHANGE. */
6024
6025 static struct iv_ca_delta *
6026 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6027 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6028 {
6029 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6030
6031 change->use = use;
6032 change->old_cp = old_cp;
6033 change->new_cp = new_cp;
6034 change->next_change = next_change;
6035
6036 return change;
6037 }
6038
6039 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6040 are rewritten. */
6041
6042 static struct iv_ca_delta *
6043 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6044 {
6045 struct iv_ca_delta *last;
6046
6047 if (!l2)
6048 return l1;
6049
6050 if (!l1)
6051 return l2;
6052
6053 for (last = l1; last->next_change; last = last->next_change)
6054 continue;
6055 last->next_change = l2;
6056
6057 return l1;
6058 }
6059
6060 /* Reverse the list of changes DELTA, forming the inverse to it. */
6061
6062 static struct iv_ca_delta *
6063 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6064 {
6065 struct iv_ca_delta *act, *next, *prev = NULL;
6066
6067 for (act = delta; act; act = next)
6068 {
6069 next = act->next_change;
6070 act->next_change = prev;
6071 prev = act;
6072
6073 std::swap (act->old_cp, act->new_cp);
6074 }
6075
6076 return prev;
6077 }
6078
6079 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6080 reverted instead. */
6081
6082 static void
6083 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6084 struct iv_ca_delta *delta, bool forward)
6085 {
6086 struct cost_pair *from, *to;
6087 struct iv_ca_delta *act;
6088
6089 if (!forward)
6090 delta = iv_ca_delta_reverse (delta);
6091
6092 for (act = delta; act; act = act->next_change)
6093 {
6094 from = act->old_cp;
6095 to = act->new_cp;
6096 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6097 iv_ca_set_cp (data, ivs, act->use, to);
6098 }
6099
6100 if (!forward)
6101 iv_ca_delta_reverse (delta);
6102 }
6103
6104 /* Returns true if CAND is used in IVS. */
6105
6106 static bool
6107 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6108 {
6109 return ivs->n_cand_uses[cand->id] > 0;
6110 }
6111
6112 /* Returns number of induction variable candidates in the set IVS. */
6113
6114 static unsigned
6115 iv_ca_n_cands (struct iv_ca *ivs)
6116 {
6117 return ivs->n_cands;
6118 }
6119
6120 /* Free the list of changes DELTA. */
6121
6122 static void
6123 iv_ca_delta_free (struct iv_ca_delta **delta)
6124 {
6125 struct iv_ca_delta *act, *next;
6126
6127 for (act = *delta; act; act = next)
6128 {
6129 next = act->next_change;
6130 free (act);
6131 }
6132
6133 *delta = NULL;
6134 }
6135
6136 /* Allocates new iv candidates assignment. */
6137
6138 static struct iv_ca *
6139 iv_ca_new (struct ivopts_data *data)
6140 {
6141 struct iv_ca *nw = XNEW (struct iv_ca);
6142
6143 nw->upto = 0;
6144 nw->bad_uses = 0;
6145 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6146 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6147 nw->cands = BITMAP_ALLOC (NULL);
6148 nw->n_cands = 0;
6149 nw->n_regs = 0;
6150 nw->cand_use_cost = no_cost;
6151 nw->cand_cost = 0;
6152 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6153 nw->cost = no_cost;
6154 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6155 nw->num_used_inv_expr = 0;
6156
6157 return nw;
6158 }
6159
6160 /* Free memory occupied by the set IVS. */
6161
6162 static void
6163 iv_ca_free (struct iv_ca **ivs)
6164 {
6165 free ((*ivs)->cand_for_use);
6166 free ((*ivs)->n_cand_uses);
6167 BITMAP_FREE ((*ivs)->cands);
6168 free ((*ivs)->n_invariant_uses);
6169 free ((*ivs)->used_inv_expr);
6170 free (*ivs);
6171 *ivs = NULL;
6172 }
6173
6174 /* Dumps IVS to FILE. */
6175
6176 static void
6177 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6178 {
6179 const char *pref = " invariants ";
6180 unsigned i;
6181 comp_cost cost = iv_ca_cost (ivs);
6182
6183 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6184 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6185 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6186 bitmap_print (file, ivs->cands, " candidates: ","\n");
6187
6188 for (i = 0; i < ivs->upto; i++)
6189 {
6190 struct iv_use *use = iv_use (data, i);
6191 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6192 if (cp)
6193 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6194 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6195 else
6196 fprintf (file, " use:%d --> ??\n", use->id);
6197 }
6198
6199 for (i = 1; i <= data->max_inv_id; i++)
6200 if (ivs->n_invariant_uses[i])
6201 {
6202 fprintf (file, "%s%d", pref, i);
6203 pref = ", ";
6204 }
6205 fprintf (file, "\n\n");
6206 }
6207
6208 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6209 new set, and store differences in DELTA. Number of induction variables
6210 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6211 the function will try to find a solution with mimimal iv candidates. */
6212
6213 static comp_cost
6214 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6215 struct iv_cand *cand, struct iv_ca_delta **delta,
6216 unsigned *n_ivs, bool min_ncand)
6217 {
6218 unsigned i;
6219 comp_cost cost;
6220 struct iv_use *use;
6221 struct cost_pair *old_cp, *new_cp;
6222
6223 *delta = NULL;
6224 for (i = 0; i < ivs->upto; i++)
6225 {
6226 use = iv_use (data, i);
6227 old_cp = iv_ca_cand_for_use (ivs, use);
6228
6229 if (old_cp
6230 && old_cp->cand == cand)
6231 continue;
6232
6233 new_cp = get_use_iv_cost (data, use, cand);
6234 if (!new_cp)
6235 continue;
6236
6237 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6238 continue;
6239
6240 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6241 continue;
6242
6243 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6244 }
6245
6246 iv_ca_delta_commit (data, ivs, *delta, true);
6247 cost = iv_ca_cost (ivs);
6248 if (n_ivs)
6249 *n_ivs = iv_ca_n_cands (ivs);
6250 iv_ca_delta_commit (data, ivs, *delta, false);
6251
6252 return cost;
6253 }
6254
6255 /* Try narrowing set IVS by removing CAND. Return the cost of
6256 the new set and store the differences in DELTA. START is
6257 the candidate with which we start narrowing. */
6258
6259 static comp_cost
6260 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6261 struct iv_cand *cand, struct iv_cand *start,
6262 struct iv_ca_delta **delta)
6263 {
6264 unsigned i, ci;
6265 struct iv_use *use;
6266 struct cost_pair *old_cp, *new_cp, *cp;
6267 bitmap_iterator bi;
6268 struct iv_cand *cnd;
6269 comp_cost cost, best_cost, acost;
6270
6271 *delta = NULL;
6272 for (i = 0; i < n_iv_uses (data); i++)
6273 {
6274 use = iv_use (data, i);
6275
6276 old_cp = iv_ca_cand_for_use (ivs, use);
6277 if (old_cp->cand != cand)
6278 continue;
6279
6280 best_cost = iv_ca_cost (ivs);
6281 /* Start narrowing with START. */
6282 new_cp = get_use_iv_cost (data, use, start);
6283
6284 if (data->consider_all_candidates)
6285 {
6286 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6287 {
6288 if (ci == cand->id || (start && ci == start->id))
6289 continue;
6290
6291 cnd = iv_cand (data, ci);
6292
6293 cp = get_use_iv_cost (data, use, cnd);
6294 if (!cp)
6295 continue;
6296
6297 iv_ca_set_cp (data, ivs, use, cp);
6298 acost = iv_ca_cost (ivs);
6299
6300 if (compare_costs (acost, best_cost) < 0)
6301 {
6302 best_cost = acost;
6303 new_cp = cp;
6304 }
6305 }
6306 }
6307 else
6308 {
6309 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6310 {
6311 if (ci == cand->id || (start && ci == start->id))
6312 continue;
6313
6314 cnd = iv_cand (data, ci);
6315
6316 cp = get_use_iv_cost (data, use, cnd);
6317 if (!cp)
6318 continue;
6319
6320 iv_ca_set_cp (data, ivs, use, cp);
6321 acost = iv_ca_cost (ivs);
6322
6323 if (compare_costs (acost, best_cost) < 0)
6324 {
6325 best_cost = acost;
6326 new_cp = cp;
6327 }
6328 }
6329 }
6330 /* Restore to old cp for use. */
6331 iv_ca_set_cp (data, ivs, use, old_cp);
6332
6333 if (!new_cp)
6334 {
6335 iv_ca_delta_free (delta);
6336 return infinite_cost;
6337 }
6338
6339 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6340 }
6341
6342 iv_ca_delta_commit (data, ivs, *delta, true);
6343 cost = iv_ca_cost (ivs);
6344 iv_ca_delta_commit (data, ivs, *delta, false);
6345
6346 return cost;
6347 }
6348
6349 /* Try optimizing the set of candidates IVS by removing candidates different
6350 from to EXCEPT_CAND from it. Return cost of the new set, and store
6351 differences in DELTA. */
6352
6353 static comp_cost
6354 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6355 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6356 {
6357 bitmap_iterator bi;
6358 struct iv_ca_delta *act_delta, *best_delta;
6359 unsigned i;
6360 comp_cost best_cost, acost;
6361 struct iv_cand *cand;
6362
6363 best_delta = NULL;
6364 best_cost = iv_ca_cost (ivs);
6365
6366 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6367 {
6368 cand = iv_cand (data, i);
6369
6370 if (cand == except_cand)
6371 continue;
6372
6373 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6374
6375 if (compare_costs (acost, best_cost) < 0)
6376 {
6377 best_cost = acost;
6378 iv_ca_delta_free (&best_delta);
6379 best_delta = act_delta;
6380 }
6381 else
6382 iv_ca_delta_free (&act_delta);
6383 }
6384
6385 if (!best_delta)
6386 {
6387 *delta = NULL;
6388 return best_cost;
6389 }
6390
6391 /* Recurse to possibly remove other unnecessary ivs. */
6392 iv_ca_delta_commit (data, ivs, best_delta, true);
6393 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6394 iv_ca_delta_commit (data, ivs, best_delta, false);
6395 *delta = iv_ca_delta_join (best_delta, *delta);
6396 return best_cost;
6397 }
6398
6399 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6400 cheaper local cost for USE than BEST_CP. Return pointer to
6401 the corresponding cost_pair, otherwise just return BEST_CP. */
6402
6403 static struct cost_pair*
6404 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6405 unsigned int cand_idx, struct iv_cand *old_cand,
6406 struct cost_pair *best_cp)
6407 {
6408 struct iv_cand *cand;
6409 struct cost_pair *cp;
6410
6411 gcc_assert (old_cand != NULL && best_cp != NULL);
6412 if (cand_idx == old_cand->id)
6413 return best_cp;
6414
6415 cand = iv_cand (data, cand_idx);
6416 cp = get_use_iv_cost (data, use, cand);
6417 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6418 return cp;
6419
6420 return best_cp;
6421 }
6422
6423 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6424 which are used by more than one iv uses. For each of those candidates,
6425 this function tries to represent iv uses under that candidate using
6426 other ones with lower local cost, then tries to prune the new set.
6427 If the new set has lower cost, It returns the new cost after recording
6428 candidate replacement in list DELTA. */
6429
6430 static comp_cost
6431 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6432 struct iv_ca_delta **delta)
6433 {
6434 bitmap_iterator bi, bj;
6435 unsigned int i, j, k;
6436 struct iv_use *use;
6437 struct iv_cand *cand;
6438 comp_cost orig_cost, acost;
6439 struct iv_ca_delta *act_delta, *tmp_delta;
6440 struct cost_pair *old_cp, *best_cp = NULL;
6441
6442 *delta = NULL;
6443 orig_cost = iv_ca_cost (ivs);
6444
6445 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6446 {
6447 if (ivs->n_cand_uses[i] == 1
6448 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6449 continue;
6450
6451 cand = iv_cand (data, i);
6452
6453 act_delta = NULL;
6454 /* Represent uses under current candidate using other ones with
6455 lower local cost. */
6456 for (j = 0; j < ivs->upto; j++)
6457 {
6458 use = iv_use (data, j);
6459 old_cp = iv_ca_cand_for_use (ivs, use);
6460
6461 if (old_cp->cand != cand)
6462 continue;
6463
6464 best_cp = old_cp;
6465 if (data->consider_all_candidates)
6466 for (k = 0; k < n_iv_cands (data); k++)
6467 best_cp = cheaper_cost_with_cand (data, use, k,
6468 old_cp->cand, best_cp);
6469 else
6470 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6471 best_cp = cheaper_cost_with_cand (data, use, k,
6472 old_cp->cand, best_cp);
6473
6474 if (best_cp == old_cp)
6475 continue;
6476
6477 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6478 }
6479 /* No need for further prune. */
6480 if (!act_delta)
6481 continue;
6482
6483 /* Prune the new candidate set. */
6484 iv_ca_delta_commit (data, ivs, act_delta, true);
6485 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6486 iv_ca_delta_commit (data, ivs, act_delta, false);
6487 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6488
6489 if (compare_costs (acost, orig_cost) < 0)
6490 {
6491 *delta = act_delta;
6492 return acost;
6493 }
6494 else
6495 iv_ca_delta_free (&act_delta);
6496 }
6497
6498 return orig_cost;
6499 }
6500
6501 /* Tries to extend the sets IVS in the best possible way in order
6502 to express the USE. If ORIGINALP is true, prefer candidates from
6503 the original set of IVs, otherwise favor important candidates not
6504 based on any memory object. */
6505
6506 static bool
6507 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6508 struct iv_use *use, bool originalp)
6509 {
6510 comp_cost best_cost, act_cost;
6511 unsigned i;
6512 bitmap_iterator bi;
6513 struct iv_cand *cand;
6514 struct iv_ca_delta *best_delta = NULL, *act_delta;
6515 struct cost_pair *cp;
6516
6517 iv_ca_add_use (data, ivs, use);
6518 best_cost = iv_ca_cost (ivs);
6519 cp = iv_ca_cand_for_use (ivs, use);
6520 if (cp)
6521 {
6522 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6523 iv_ca_set_no_cp (data, ivs, use);
6524 }
6525
6526 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6527 first try important candidates not based on any memory object. Only if
6528 this fails, try the specific ones. Rationale -- in loops with many
6529 variables the best choice often is to use just one generic biv. If we
6530 added here many ivs specific to the uses, the optimization algorithm later
6531 would be likely to get stuck in a local minimum, thus causing us to create
6532 too many ivs. The approach from few ivs to more seems more likely to be
6533 successful -- starting from few ivs, replacing an expensive use by a
6534 specific iv should always be a win. */
6535 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6536 {
6537 cand = iv_cand (data, i);
6538
6539 if (originalp && cand->pos !=IP_ORIGINAL)
6540 continue;
6541
6542 if (!originalp && cand->iv->base_object != NULL_TREE)
6543 continue;
6544
6545 if (iv_ca_cand_used_p (ivs, cand))
6546 continue;
6547
6548 cp = get_use_iv_cost (data, use, cand);
6549 if (!cp)
6550 continue;
6551
6552 iv_ca_set_cp (data, ivs, use, cp);
6553 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6554 true);
6555 iv_ca_set_no_cp (data, ivs, use);
6556 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6557
6558 if (compare_costs (act_cost, best_cost) < 0)
6559 {
6560 best_cost = act_cost;
6561
6562 iv_ca_delta_free (&best_delta);
6563 best_delta = act_delta;
6564 }
6565 else
6566 iv_ca_delta_free (&act_delta);
6567 }
6568
6569 if (infinite_cost_p (best_cost))
6570 {
6571 for (i = 0; i < use->n_map_members; i++)
6572 {
6573 cp = use->cost_map + i;
6574 cand = cp->cand;
6575 if (!cand)
6576 continue;
6577
6578 /* Already tried this. */
6579 if (cand->important)
6580 {
6581 if (originalp && cand->pos == IP_ORIGINAL)
6582 continue;
6583 if (!originalp && cand->iv->base_object == NULL_TREE)
6584 continue;
6585 }
6586
6587 if (iv_ca_cand_used_p (ivs, cand))
6588 continue;
6589
6590 act_delta = NULL;
6591 iv_ca_set_cp (data, ivs, use, cp);
6592 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6593 iv_ca_set_no_cp (data, ivs, use);
6594 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6595 cp, act_delta);
6596
6597 if (compare_costs (act_cost, best_cost) < 0)
6598 {
6599 best_cost = act_cost;
6600
6601 if (best_delta)
6602 iv_ca_delta_free (&best_delta);
6603 best_delta = act_delta;
6604 }
6605 else
6606 iv_ca_delta_free (&act_delta);
6607 }
6608 }
6609
6610 iv_ca_delta_commit (data, ivs, best_delta, true);
6611 iv_ca_delta_free (&best_delta);
6612
6613 return !infinite_cost_p (best_cost);
6614 }
6615
6616 /* Finds an initial assignment of candidates to uses. */
6617
6618 static struct iv_ca *
6619 get_initial_solution (struct ivopts_data *data, bool originalp)
6620 {
6621 struct iv_ca *ivs = iv_ca_new (data);
6622 unsigned i;
6623
6624 for (i = 0; i < n_iv_uses (data); i++)
6625 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6626 {
6627 iv_ca_free (&ivs);
6628 return NULL;
6629 }
6630
6631 return ivs;
6632 }
6633
6634 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6635 points to a bool variable, this function tries to break local
6636 optimal fixed-point by replacing candidates in IVS if it's true. */
6637
6638 static bool
6639 try_improve_iv_set (struct ivopts_data *data,
6640 struct iv_ca *ivs, bool *try_replace_p)
6641 {
6642 unsigned i, n_ivs;
6643 comp_cost acost, best_cost = iv_ca_cost (ivs);
6644 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6645 struct iv_cand *cand;
6646
6647 /* Try extending the set of induction variables by one. */
6648 for (i = 0; i < n_iv_cands (data); i++)
6649 {
6650 cand = iv_cand (data, i);
6651
6652 if (iv_ca_cand_used_p (ivs, cand))
6653 continue;
6654
6655 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6656 if (!act_delta)
6657 continue;
6658
6659 /* If we successfully added the candidate and the set is small enough,
6660 try optimizing it by removing other candidates. */
6661 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6662 {
6663 iv_ca_delta_commit (data, ivs, act_delta, true);
6664 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6665 iv_ca_delta_commit (data, ivs, act_delta, false);
6666 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6667 }
6668
6669 if (compare_costs (acost, best_cost) < 0)
6670 {
6671 best_cost = acost;
6672 iv_ca_delta_free (&best_delta);
6673 best_delta = act_delta;
6674 }
6675 else
6676 iv_ca_delta_free (&act_delta);
6677 }
6678
6679 if (!best_delta)
6680 {
6681 /* Try removing the candidates from the set instead. */
6682 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6683
6684 if (!best_delta && *try_replace_p)
6685 {
6686 *try_replace_p = false;
6687 /* So far candidate selecting algorithm tends to choose fewer IVs
6688 so that it can handle cases in which loops have many variables
6689 but the best choice is often to use only one general biv. One
6690 weakness is it can't handle opposite cases, in which different
6691 candidates should be chosen with respect to each use. To solve
6692 the problem, we replace candidates in a manner described by the
6693 comments of iv_ca_replace, thus give general algorithm a chance
6694 to break local optimal fixed-point in these cases. */
6695 best_cost = iv_ca_replace (data, ivs, &best_delta);
6696 }
6697
6698 if (!best_delta)
6699 return false;
6700 }
6701
6702 iv_ca_delta_commit (data, ivs, best_delta, true);
6703 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6704 iv_ca_delta_free (&best_delta);
6705 return true;
6706 }
6707
6708 /* Attempts to find the optimal set of induction variables. We do simple
6709 greedy heuristic -- we try to replace at most one candidate in the selected
6710 solution and remove the unused ivs while this improves the cost. */
6711
6712 static struct iv_ca *
6713 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6714 {
6715 struct iv_ca *set;
6716 bool try_replace_p = true;
6717
6718 /* Get the initial solution. */
6719 set = get_initial_solution (data, originalp);
6720 if (!set)
6721 {
6722 if (dump_file && (dump_flags & TDF_DETAILS))
6723 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6724 return NULL;
6725 }
6726
6727 if (dump_file && (dump_flags & TDF_DETAILS))
6728 {
6729 fprintf (dump_file, "Initial set of candidates:\n");
6730 iv_ca_dump (data, dump_file, set);
6731 }
6732
6733 while (try_improve_iv_set (data, set, &try_replace_p))
6734 {
6735 if (dump_file && (dump_flags & TDF_DETAILS))
6736 {
6737 fprintf (dump_file, "Improved to:\n");
6738 iv_ca_dump (data, dump_file, set);
6739 }
6740 }
6741
6742 return set;
6743 }
6744
6745 static struct iv_ca *
6746 find_optimal_iv_set (struct ivopts_data *data)
6747 {
6748 unsigned i;
6749 struct iv_ca *set, *origset;
6750 struct iv_use *use;
6751 comp_cost cost, origcost;
6752
6753 /* Determine the cost based on a strategy that starts with original IVs,
6754 and try again using a strategy that prefers candidates not based
6755 on any IVs. */
6756 origset = find_optimal_iv_set_1 (data, true);
6757 set = find_optimal_iv_set_1 (data, false);
6758
6759 if (!origset && !set)
6760 return NULL;
6761
6762 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6763 cost = set ? iv_ca_cost (set) : infinite_cost;
6764
6765 if (dump_file && (dump_flags & TDF_DETAILS))
6766 {
6767 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6768 origcost.cost, origcost.complexity);
6769 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6770 cost.cost, cost.complexity);
6771 }
6772
6773 /* Choose the one with the best cost. */
6774 if (compare_costs (origcost, cost) <= 0)
6775 {
6776 if (set)
6777 iv_ca_free (&set);
6778 set = origset;
6779 }
6780 else if (origset)
6781 iv_ca_free (&origset);
6782
6783 for (i = 0; i < n_iv_uses (data); i++)
6784 {
6785 use = iv_use (data, i);
6786 use->selected = iv_ca_cand_for_use (set, use)->cand;
6787 }
6788
6789 return set;
6790 }
6791
6792 /* Creates a new induction variable corresponding to CAND. */
6793
6794 static void
6795 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6796 {
6797 gimple_stmt_iterator incr_pos;
6798 tree base;
6799 bool after = false;
6800
6801 if (!cand->iv)
6802 return;
6803
6804 switch (cand->pos)
6805 {
6806 case IP_NORMAL:
6807 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6808 break;
6809
6810 case IP_END:
6811 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6812 after = true;
6813 break;
6814
6815 case IP_AFTER_USE:
6816 after = true;
6817 /* fall through */
6818 case IP_BEFORE_USE:
6819 incr_pos = gsi_for_stmt (cand->incremented_at);
6820 break;
6821
6822 case IP_ORIGINAL:
6823 /* Mark that the iv is preserved. */
6824 name_info (data, cand->var_before)->preserve_biv = true;
6825 name_info (data, cand->var_after)->preserve_biv = true;
6826
6827 /* Rewrite the increment so that it uses var_before directly. */
6828 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6829 return;
6830 }
6831
6832 gimple_add_tmp_var (cand->var_before);
6833
6834 base = unshare_expr (cand->iv->base);
6835
6836 create_iv (base, unshare_expr (cand->iv->step),
6837 cand->var_before, data->current_loop,
6838 &incr_pos, after, &cand->var_before, &cand->var_after);
6839 }
6840
6841 /* Creates new induction variables described in SET. */
6842
6843 static void
6844 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6845 {
6846 unsigned i;
6847 struct iv_cand *cand;
6848 bitmap_iterator bi;
6849
6850 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6851 {
6852 cand = iv_cand (data, i);
6853 create_new_iv (data, cand);
6854 }
6855
6856 if (dump_file && (dump_flags & TDF_DETAILS))
6857 {
6858 fprintf (dump_file, "Selected IV set for loop %d",
6859 data->current_loop->num);
6860 if (data->loop_loc != UNKNOWN_LOCATION)
6861 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6862 LOCATION_LINE (data->loop_loc));
6863 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6864 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6865 {
6866 cand = iv_cand (data, i);
6867 dump_cand (dump_file, cand);
6868 }
6869 fprintf (dump_file, "\n");
6870 }
6871 }
6872
6873 /* Rewrites USE (definition of iv used in a nonlinear expression)
6874 using candidate CAND. */
6875
6876 static void
6877 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6878 struct iv_use *use, struct iv_cand *cand)
6879 {
6880 tree comp;
6881 tree op, tgt;
6882 gassign *ass;
6883 gimple_stmt_iterator bsi;
6884
6885 /* An important special case -- if we are asked to express value of
6886 the original iv by itself, just exit; there is no need to
6887 introduce a new computation (that might also need casting the
6888 variable to unsigned and back). */
6889 if (cand->pos == IP_ORIGINAL
6890 && cand->incremented_at == use->stmt)
6891 {
6892 enum tree_code stmt_code;
6893
6894 gcc_assert (is_gimple_assign (use->stmt));
6895 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6896
6897 /* Check whether we may leave the computation unchanged.
6898 This is the case only if it does not rely on other
6899 computations in the loop -- otherwise, the computation
6900 we rely upon may be removed in remove_unused_ivs,
6901 thus leading to ICE. */
6902 stmt_code = gimple_assign_rhs_code (use->stmt);
6903 if (stmt_code == PLUS_EXPR
6904 || stmt_code == MINUS_EXPR
6905 || stmt_code == POINTER_PLUS_EXPR)
6906 {
6907 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6908 op = gimple_assign_rhs2 (use->stmt);
6909 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6910 op = gimple_assign_rhs1 (use->stmt);
6911 else
6912 op = NULL_TREE;
6913 }
6914 else
6915 op = NULL_TREE;
6916
6917 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6918 return;
6919 }
6920
6921 comp = get_computation (data->current_loop, use, cand);
6922 gcc_assert (comp != NULL_TREE);
6923
6924 switch (gimple_code (use->stmt))
6925 {
6926 case GIMPLE_PHI:
6927 tgt = PHI_RESULT (use->stmt);
6928
6929 /* If we should keep the biv, do not replace it. */
6930 if (name_info (data, tgt)->preserve_biv)
6931 return;
6932
6933 bsi = gsi_after_labels (gimple_bb (use->stmt));
6934 break;
6935
6936 case GIMPLE_ASSIGN:
6937 tgt = gimple_assign_lhs (use->stmt);
6938 bsi = gsi_for_stmt (use->stmt);
6939 break;
6940
6941 default:
6942 gcc_unreachable ();
6943 }
6944
6945 if (!valid_gimple_rhs_p (comp)
6946 || (gimple_code (use->stmt) != GIMPLE_PHI
6947 /* We can't allow re-allocating the stmt as it might be pointed
6948 to still. */
6949 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6950 >= gimple_num_ops (gsi_stmt (bsi)))))
6951 {
6952 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6953 true, GSI_SAME_STMT);
6954 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6955 {
6956 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6957 /* As this isn't a plain copy we have to reset alignment
6958 information. */
6959 if (SSA_NAME_PTR_INFO (comp))
6960 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6961 }
6962 }
6963
6964 if (gimple_code (use->stmt) == GIMPLE_PHI)
6965 {
6966 ass = gimple_build_assign (tgt, comp);
6967 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6968
6969 bsi = gsi_for_stmt (use->stmt);
6970 remove_phi_node (&bsi, false);
6971 }
6972 else
6973 {
6974 gimple_assign_set_rhs_from_tree (&bsi, comp);
6975 use->stmt = gsi_stmt (bsi);
6976 }
6977 }
6978
6979 /* Performs a peephole optimization to reorder the iv update statement with
6980 a mem ref to enable instruction combining in later phases. The mem ref uses
6981 the iv value before the update, so the reordering transformation requires
6982 adjustment of the offset. CAND is the selected IV_CAND.
6983
6984 Example:
6985
6986 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6987 iv2 = iv1 + 1;
6988
6989 if (t < val) (1)
6990 goto L;
6991 goto Head;
6992
6993
6994 directly propagating t over to (1) will introduce overlapping live range
6995 thus increase register pressure. This peephole transform it into:
6996
6997
6998 iv2 = iv1 + 1;
6999 t = MEM_REF (base, iv2, 8, 8);
7000 if (t < val)
7001 goto L;
7002 goto Head;
7003 */
7004
7005 static void
7006 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7007 {
7008 tree var_after;
7009 gimple *iv_update, *stmt;
7010 basic_block bb;
7011 gimple_stmt_iterator gsi, gsi_iv;
7012
7013 if (cand->pos != IP_NORMAL)
7014 return;
7015
7016 var_after = cand->var_after;
7017 iv_update = SSA_NAME_DEF_STMT (var_after);
7018
7019 bb = gimple_bb (iv_update);
7020 gsi = gsi_last_nondebug_bb (bb);
7021 stmt = gsi_stmt (gsi);
7022
7023 /* Only handle conditional statement for now. */
7024 if (gimple_code (stmt) != GIMPLE_COND)
7025 return;
7026
7027 gsi_prev_nondebug (&gsi);
7028 stmt = gsi_stmt (gsi);
7029 if (stmt != iv_update)
7030 return;
7031
7032 gsi_prev_nondebug (&gsi);
7033 if (gsi_end_p (gsi))
7034 return;
7035
7036 stmt = gsi_stmt (gsi);
7037 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7038 return;
7039
7040 if (stmt != use->stmt)
7041 return;
7042
7043 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7044 return;
7045
7046 if (dump_file && (dump_flags & TDF_DETAILS))
7047 {
7048 fprintf (dump_file, "Reordering \n");
7049 print_gimple_stmt (dump_file, iv_update, 0, 0);
7050 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7051 fprintf (dump_file, "\n");
7052 }
7053
7054 gsi = gsi_for_stmt (use->stmt);
7055 gsi_iv = gsi_for_stmt (iv_update);
7056 gsi_move_before (&gsi_iv, &gsi);
7057
7058 cand->pos = IP_BEFORE_USE;
7059 cand->incremented_at = use->stmt;
7060 }
7061
7062 /* Rewrites USE (address that is an iv) using candidate CAND. */
7063
7064 static void
7065 rewrite_use_address_1 (struct ivopts_data *data,
7066 struct iv_use *use, struct iv_cand *cand)
7067 {
7068 aff_tree aff;
7069 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7070 tree base_hint = NULL_TREE;
7071 tree ref, iv;
7072 bool ok;
7073
7074 adjust_iv_update_pos (cand, use);
7075 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7076 gcc_assert (ok);
7077 unshare_aff_combination (&aff);
7078
7079 /* To avoid undefined overflow problems, all IV candidates use unsigned
7080 integer types. The drawback is that this makes it impossible for
7081 create_mem_ref to distinguish an IV that is based on a memory object
7082 from one that represents simply an offset.
7083
7084 To work around this problem, we pass a hint to create_mem_ref that
7085 indicates which variable (if any) in aff is an IV based on a memory
7086 object. Note that we only consider the candidate. If this is not
7087 based on an object, the base of the reference is in some subexpression
7088 of the use -- but these will use pointer types, so they are recognized
7089 by the create_mem_ref heuristics anyway. */
7090 if (cand->iv->base_object)
7091 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7092
7093 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7094 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7095 reference_alias_ptr_type (*use->op_p),
7096 iv, base_hint, data->speed);
7097 copy_ref_info (ref, *use->op_p);
7098 *use->op_p = ref;
7099 }
7100
7101 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7102 first use of a group, rewrites sub uses in the group too. */
7103
7104 static void
7105 rewrite_use_address (struct ivopts_data *data,
7106 struct iv_use *use, struct iv_cand *cand)
7107 {
7108 struct iv_use *next;
7109
7110 gcc_assert (use->sub_id == 0);
7111 rewrite_use_address_1 (data, use, cand);
7112 update_stmt (use->stmt);
7113
7114 for (next = use->next; next != NULL; next = next->next)
7115 {
7116 rewrite_use_address_1 (data, next, cand);
7117 update_stmt (next->stmt);
7118 }
7119
7120 return;
7121 }
7122
7123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7124 candidate CAND. */
7125
7126 static void
7127 rewrite_use_compare (struct ivopts_data *data,
7128 struct iv_use *use, struct iv_cand *cand)
7129 {
7130 tree comp, *var_p, op, bound;
7131 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7132 enum tree_code compare;
7133 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7134 bool ok;
7135
7136 bound = cp->value;
7137 if (bound)
7138 {
7139 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7140 tree var_type = TREE_TYPE (var);
7141 gimple_seq stmts;
7142
7143 if (dump_file && (dump_flags & TDF_DETAILS))
7144 {
7145 fprintf (dump_file, "Replacing exit test: ");
7146 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7147 }
7148 compare = cp->comp;
7149 bound = unshare_expr (fold_convert (var_type, bound));
7150 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7151 if (stmts)
7152 gsi_insert_seq_on_edge_immediate (
7153 loop_preheader_edge (data->current_loop),
7154 stmts);
7155
7156 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7157 gimple_cond_set_lhs (cond_stmt, var);
7158 gimple_cond_set_code (cond_stmt, compare);
7159 gimple_cond_set_rhs (cond_stmt, op);
7160 return;
7161 }
7162
7163 /* The induction variable elimination failed; just express the original
7164 giv. */
7165 comp = get_computation (data->current_loop, use, cand);
7166 gcc_assert (comp != NULL_TREE);
7167
7168 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7169 gcc_assert (ok);
7170
7171 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7172 true, GSI_SAME_STMT);
7173 }
7174
7175 /* Rewrites USE using candidate CAND. */
7176
7177 static void
7178 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7179 {
7180 switch (use->type)
7181 {
7182 case USE_NONLINEAR_EXPR:
7183 rewrite_use_nonlinear_expr (data, use, cand);
7184 break;
7185
7186 case USE_ADDRESS:
7187 rewrite_use_address (data, use, cand);
7188 break;
7189
7190 case USE_COMPARE:
7191 rewrite_use_compare (data, use, cand);
7192 break;
7193
7194 default:
7195 gcc_unreachable ();
7196 }
7197
7198 update_stmt (use->stmt);
7199 }
7200
7201 /* Rewrite the uses using the selected induction variables. */
7202
7203 static void
7204 rewrite_uses (struct ivopts_data *data)
7205 {
7206 unsigned i;
7207 struct iv_cand *cand;
7208 struct iv_use *use;
7209
7210 for (i = 0; i < n_iv_uses (data); i++)
7211 {
7212 use = iv_use (data, i);
7213 cand = use->selected;
7214 gcc_assert (cand);
7215
7216 rewrite_use (data, use, cand);
7217 }
7218 }
7219
7220 /* Removes the ivs that are not used after rewriting. */
7221
7222 static void
7223 remove_unused_ivs (struct ivopts_data *data)
7224 {
7225 unsigned j;
7226 bitmap_iterator bi;
7227 bitmap toremove = BITMAP_ALLOC (NULL);
7228
7229 /* Figure out an order in which to release SSA DEFs so that we don't
7230 release something that we'd have to propagate into a debug stmt
7231 afterwards. */
7232 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7233 {
7234 struct version_info *info;
7235
7236 info = ver_info (data, j);
7237 if (info->iv
7238 && !integer_zerop (info->iv->step)
7239 && !info->inv_id
7240 && !info->iv->have_use_for
7241 && !info->preserve_biv)
7242 {
7243 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7244
7245 tree def = info->iv->ssa_name;
7246
7247 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7248 {
7249 imm_use_iterator imm_iter;
7250 use_operand_p use_p;
7251 gimple *stmt;
7252 int count = 0;
7253
7254 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7255 {
7256 if (!gimple_debug_bind_p (stmt))
7257 continue;
7258
7259 /* We just want to determine whether to do nothing
7260 (count == 0), to substitute the computed
7261 expression into a single use of the SSA DEF by
7262 itself (count == 1), or to use a debug temp
7263 because the SSA DEF is used multiple times or as
7264 part of a larger expression (count > 1). */
7265 count++;
7266 if (gimple_debug_bind_get_value (stmt) != def)
7267 count++;
7268
7269 if (count > 1)
7270 BREAK_FROM_IMM_USE_STMT (imm_iter);
7271 }
7272
7273 if (!count)
7274 continue;
7275
7276 struct iv_use dummy_use;
7277 struct iv_cand *best_cand = NULL, *cand;
7278 unsigned i, best_pref = 0, cand_pref;
7279
7280 memset (&dummy_use, 0, sizeof (dummy_use));
7281 dummy_use.iv = info->iv;
7282 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7283 {
7284 cand = iv_use (data, i)->selected;
7285 if (cand == best_cand)
7286 continue;
7287 cand_pref = operand_equal_p (cand->iv->step,
7288 info->iv->step, 0)
7289 ? 4 : 0;
7290 cand_pref
7291 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7292 == TYPE_MODE (TREE_TYPE (info->iv->base))
7293 ? 2 : 0;
7294 cand_pref
7295 += TREE_CODE (cand->iv->base) == INTEGER_CST
7296 ? 1 : 0;
7297 if (best_cand == NULL || best_pref < cand_pref)
7298 {
7299 best_cand = cand;
7300 best_pref = cand_pref;
7301 }
7302 }
7303
7304 if (!best_cand)
7305 continue;
7306
7307 tree comp = get_computation_at (data->current_loop,
7308 &dummy_use, best_cand,
7309 SSA_NAME_DEF_STMT (def));
7310 if (!comp)
7311 continue;
7312
7313 if (count > 1)
7314 {
7315 tree vexpr = make_node (DEBUG_EXPR_DECL);
7316 DECL_ARTIFICIAL (vexpr) = 1;
7317 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7318 if (SSA_NAME_VAR (def))
7319 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7320 else
7321 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7322 gdebug *def_temp
7323 = gimple_build_debug_bind (vexpr, comp, NULL);
7324 gimple_stmt_iterator gsi;
7325
7326 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7327 gsi = gsi_after_labels (gimple_bb
7328 (SSA_NAME_DEF_STMT (def)));
7329 else
7330 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7331
7332 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7333 comp = vexpr;
7334 }
7335
7336 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7337 {
7338 if (!gimple_debug_bind_p (stmt))
7339 continue;
7340
7341 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7342 SET_USE (use_p, comp);
7343
7344 update_stmt (stmt);
7345 }
7346 }
7347 }
7348 }
7349
7350 release_defs_bitset (toremove);
7351
7352 BITMAP_FREE (toremove);
7353 }
7354
7355 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7356 for hash_map::traverse. */
7357
7358 bool
7359 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7360 {
7361 free (value);
7362 return true;
7363 }
7364
7365 /* Frees data allocated by the optimization of a single loop. */
7366
7367 static void
7368 free_loop_data (struct ivopts_data *data)
7369 {
7370 unsigned i, j;
7371 bitmap_iterator bi;
7372 tree obj;
7373
7374 if (data->niters)
7375 {
7376 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7377 delete data->niters;
7378 data->niters = NULL;
7379 }
7380
7381 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7382 {
7383 struct version_info *info;
7384
7385 info = ver_info (data, i);
7386 info->iv = NULL;
7387 info->has_nonlin_use = false;
7388 info->preserve_biv = false;
7389 info->inv_id = 0;
7390 }
7391 bitmap_clear (data->relevant);
7392 bitmap_clear (data->important_candidates);
7393
7394 for (i = 0; i < n_iv_uses (data); i++)
7395 {
7396 struct iv_use *use = iv_use (data, i);
7397 struct iv_use *pre = use, *sub = use->next;
7398
7399 while (sub)
7400 {
7401 gcc_assert (sub->related_cands == NULL);
7402 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7403
7404 pre = sub;
7405 sub = sub->next;
7406 free (pre);
7407 }
7408
7409 BITMAP_FREE (use->related_cands);
7410 for (j = 0; j < use->n_map_members; j++)
7411 if (use->cost_map[j].depends_on)
7412 BITMAP_FREE (use->cost_map[j].depends_on);
7413 free (use->cost_map);
7414 free (use);
7415 }
7416 data->iv_uses.truncate (0);
7417
7418 for (i = 0; i < n_iv_cands (data); i++)
7419 {
7420 struct iv_cand *cand = iv_cand (data, i);
7421
7422 if (cand->depends_on)
7423 BITMAP_FREE (cand->depends_on);
7424 free (cand);
7425 }
7426 data->iv_candidates.truncate (0);
7427
7428 if (data->version_info_size < num_ssa_names)
7429 {
7430 data->version_info_size = 2 * num_ssa_names;
7431 free (data->version_info);
7432 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7433 }
7434
7435 data->max_inv_id = 0;
7436
7437 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7438 SET_DECL_RTL (obj, NULL_RTX);
7439
7440 decl_rtl_to_reset.truncate (0);
7441
7442 data->inv_expr_tab->empty ();
7443 data->inv_expr_id = 0;
7444 }
7445
7446 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7447 loop tree. */
7448
7449 static void
7450 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7451 {
7452 free_loop_data (data);
7453 free (data->version_info);
7454 BITMAP_FREE (data->relevant);
7455 BITMAP_FREE (data->important_candidates);
7456
7457 decl_rtl_to_reset.release ();
7458 data->iv_uses.release ();
7459 data->iv_candidates.release ();
7460 delete data->inv_expr_tab;
7461 data->inv_expr_tab = NULL;
7462 free_affine_expand_cache (&data->name_expansion_cache);
7463 obstack_free (&data->iv_obstack, NULL);
7464 }
7465
7466 /* Returns true if the loop body BODY includes any function calls. */
7467
7468 static bool
7469 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7470 {
7471 gimple_stmt_iterator gsi;
7472 unsigned i;
7473
7474 for (i = 0; i < num_nodes; i++)
7475 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7476 {
7477 gimple *stmt = gsi_stmt (gsi);
7478 if (is_gimple_call (stmt)
7479 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7480 return true;
7481 }
7482 return false;
7483 }
7484
7485 /* Optimizes the LOOP. Returns true if anything changed. */
7486
7487 static bool
7488 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7489 {
7490 bool changed = false;
7491 struct iv_ca *iv_ca;
7492 edge exit = single_dom_exit (loop);
7493 basic_block *body;
7494
7495 gcc_assert (!data->niters);
7496 data->current_loop = loop;
7497 data->loop_loc = find_loop_location (loop);
7498 data->speed = optimize_loop_for_speed_p (loop);
7499
7500 if (dump_file && (dump_flags & TDF_DETAILS))
7501 {
7502 fprintf (dump_file, "Processing loop %d", loop->num);
7503 if (data->loop_loc != UNKNOWN_LOCATION)
7504 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7505 LOCATION_LINE (data->loop_loc));
7506 fprintf (dump_file, "\n");
7507
7508 if (exit)
7509 {
7510 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7511 exit->src->index, exit->dest->index);
7512 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7513 fprintf (dump_file, "\n");
7514 }
7515
7516 fprintf (dump_file, "\n");
7517 }
7518
7519 body = get_loop_body (loop);
7520 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7521 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7522 free (body);
7523
7524 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7525
7526 /* For each ssa name determines whether it behaves as an induction variable
7527 in some loop. */
7528 if (!find_induction_variables (data))
7529 goto finish;
7530
7531 /* Finds interesting uses (item 1). */
7532 find_interesting_uses (data);
7533 group_address_uses (data);
7534 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7535 goto finish;
7536
7537 /* Finds candidates for the induction variables (item 2). */
7538 find_iv_candidates (data);
7539
7540 /* Calculates the costs (item 3, part 1). */
7541 determine_iv_costs (data);
7542 determine_use_iv_costs (data);
7543 determine_set_costs (data);
7544
7545 /* Find the optimal set of induction variables (item 3, part 2). */
7546 iv_ca = find_optimal_iv_set (data);
7547 if (!iv_ca)
7548 goto finish;
7549 changed = true;
7550
7551 /* Create the new induction variables (item 4, part 1). */
7552 create_new_ivs (data, iv_ca);
7553 iv_ca_free (&iv_ca);
7554
7555 /* Rewrite the uses (item 4, part 2). */
7556 rewrite_uses (data);
7557
7558 /* Remove the ivs that are unused after rewriting. */
7559 remove_unused_ivs (data);
7560
7561 /* We have changed the structure of induction variables; it might happen
7562 that definitions in the scev database refer to some of them that were
7563 eliminated. */
7564 scev_reset ();
7565
7566 finish:
7567 free_loop_data (data);
7568
7569 return changed;
7570 }
7571
7572 /* Main entry point. Optimizes induction variables in loops. */
7573
7574 void
7575 tree_ssa_iv_optimize (void)
7576 {
7577 struct loop *loop;
7578 struct ivopts_data data;
7579
7580 tree_ssa_iv_optimize_init (&data);
7581
7582 /* Optimize the loops starting with the innermost ones. */
7583 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7584 {
7585 if (dump_file && (dump_flags & TDF_DETAILS))
7586 flow_loop_dump (loop, dump_file, NULL, 1);
7587
7588 tree_ssa_iv_optimize_loop (&data, loop);
7589 }
7590
7591 tree_ssa_iv_optimize_finalize (&data);
7592 }