tree-ssa-loop-ivopts.c (dump_iv): New parameter.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
142
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
147
148 /* The infinite cost. */
149 #define INFTY 10000000
150
151 #define AVG_LOOP_NITER(LOOP) 5
152
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
156
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
159 {
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
163
164 return niter;
165 }
166
167 /* Representation of the induction variable. */
168 struct iv
169 {
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 unsigned use_id; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool have_use_for; /* Do we already have a use for it? */
177 bool no_overflow; /* True if the iv doesn't overflow. */
178 };
179
180 /* Per-ssa version information (induction variable descriptions, etc.). */
181 struct version_info
182 {
183 tree name; /* The ssa name. */
184 struct iv *iv; /* Induction variable description. */
185 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
186 an expression that is not an induction variable. */
187 bool preserve_biv; /* For the original biv, whether to preserve it. */
188 unsigned inv_id; /* Id of an invariant. */
189 };
190
191 /* Types of uses. */
192 enum use_type
193 {
194 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
195 USE_ADDRESS, /* Use in an address. */
196 USE_COMPARE /* Use is a compare. */
197 };
198
199 /* Cost of a computation. */
200 typedef struct
201 {
202 int cost; /* The runtime cost. */
203 unsigned complexity; /* The estimate of the complexity of the code for
204 the computation (in no concrete units --
205 complexity field should be larger for more
206 complex expressions and addressing modes). */
207 } comp_cost;
208
209 static const comp_cost no_cost = {0, 0};
210 static const comp_cost infinite_cost = {INFTY, INFTY};
211
212 /* The candidate - cost pair. */
213 struct cost_pair
214 {
215 struct iv_cand *cand; /* The candidate. */
216 comp_cost cost; /* The cost. */
217 bitmap depends_on; /* The list of invariants that have to be
218 preserved. */
219 tree value; /* For final value elimination, the expression for
220 the final value of the iv. For iv elimination,
221 the new bound to compare with. */
222 enum tree_code comp; /* For iv elimination, the comparison. */
223 int inv_expr_id; /* Loop invariant expression id. */
224 };
225
226 /* Use. */
227 struct iv_use
228 {
229 unsigned id; /* The id of the use. */
230 unsigned sub_id; /* The id of the sub use. */
231 enum use_type type; /* Type of the use. */
232 struct iv *iv; /* The induction variable it is based on. */
233 gimple stmt; /* Statement in that it occurs. */
234 tree *op_p; /* The place where it occurs. */
235 bitmap related_cands; /* The set of "related" iv candidates, plus the common
236 important ones. */
237
238 unsigned n_map_members; /* Number of candidates in the cost_map list. */
239 struct cost_pair *cost_map;
240 /* The costs wrto the iv candidates. */
241
242 struct iv_cand *selected;
243 /* The selected candidate. */
244
245 struct iv_use *next; /* The next sub use. */
246 tree addr_base; /* Base address with const offset stripped. */
247 unsigned HOST_WIDE_INT addr_offset;
248 /* Const offset stripped from base address. */
249 };
250
251 /* The position where the iv is computed. */
252 enum iv_position
253 {
254 IP_NORMAL, /* At the end, just before the exit condition. */
255 IP_END, /* At the end of the latch block. */
256 IP_BEFORE_USE, /* Immediately before a specific use. */
257 IP_AFTER_USE, /* Immediately after a specific use. */
258 IP_ORIGINAL /* The original biv. */
259 };
260
261 /* The induction variable candidate. */
262 struct iv_cand
263 {
264 unsigned id; /* The number of the candidate. */
265 bool important; /* Whether this is an "important" candidate, i.e. such
266 that it should be considered by all uses. */
267 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
268 gimple incremented_at;/* For original biv, the statement where it is
269 incremented. */
270 tree var_before; /* The variable used for it before increment. */
271 tree var_after; /* The variable used for it after increment. */
272 struct iv *iv; /* The value of the candidate. NULL for
273 "pseudocandidate" used to indicate the possibility
274 to replace the final value of an iv by direct
275 computation of the value. */
276 unsigned cost; /* Cost of the candidate. */
277 unsigned cost_step; /* Cost of the candidate's increment operation. */
278 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
279 where it is incremented. */
280 bitmap depends_on; /* The list of invariants that are used in step of the
281 biv. */
282 };
283
284 /* Loop invariant expression hashtable entry. */
285 struct iv_inv_expr_ent
286 {
287 tree expr;
288 int id;
289 hashval_t hash;
290 };
291
292 /* The data used by the induction variable optimizations. */
293
294 typedef struct iv_use *iv_use_p;
295
296 typedef struct iv_cand *iv_cand_p;
297
298 /* Hashtable helpers. */
299
300 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
301 {
302 typedef iv_inv_expr_ent *value_type;
303 typedef iv_inv_expr_ent *compare_type;
304 static inline hashval_t hash (const iv_inv_expr_ent *);
305 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
306 };
307
308 /* Hash function for loop invariant expressions. */
309
310 inline hashval_t
311 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
312 {
313 return expr->hash;
314 }
315
316 /* Hash table equality function for expressions. */
317
318 inline bool
319 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
320 const iv_inv_expr_ent *expr2)
321 {
322 return expr1->hash == expr2->hash
323 && operand_equal_p (expr1->expr, expr2->expr, 0);
324 }
325
326 struct ivopts_data
327 {
328 /* The currently optimized loop. */
329 struct loop *current_loop;
330 source_location loop_loc;
331
332 /* Numbers of iterations for all exits of the current loop. */
333 hash_map<edge, tree_niter_desc *> *niters;
334
335 /* Number of registers used in it. */
336 unsigned regs_used;
337
338 /* The size of version_info array allocated. */
339 unsigned version_info_size;
340
341 /* The array of information for the ssa names. */
342 struct version_info *version_info;
343
344 /* The hashtable of loop invariant expressions created
345 by ivopt. */
346 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
347
348 /* Loop invariant expression id. */
349 int inv_expr_id;
350
351 /* The bitmap of indices in version_info whose value was changed. */
352 bitmap relevant;
353
354 /* The uses of induction variables. */
355 vec<iv_use_p> iv_uses;
356
357 /* The candidates. */
358 vec<iv_cand_p> iv_candidates;
359
360 /* A bitmap of important candidates. */
361 bitmap important_candidates;
362
363 /* Cache used by tree_to_aff_combination_expand. */
364 hash_map<tree, name_expansion *> *name_expansion_cache;
365
366 /* The maximum invariant id. */
367 unsigned max_inv_id;
368
369 /* Whether to consider just related and important candidates when replacing a
370 use. */
371 bool consider_all_candidates;
372
373 /* Are we optimizing for speed? */
374 bool speed;
375
376 /* Whether the loop body includes any function calls. */
377 bool body_includes_call;
378
379 /* Whether the loop body can only be exited via single exit. */
380 bool loop_single_exit_p;
381 };
382
383 /* An assignment of iv candidates to uses. */
384
385 struct iv_ca
386 {
387 /* The number of uses covered by the assignment. */
388 unsigned upto;
389
390 /* Number of uses that cannot be expressed by the candidates in the set. */
391 unsigned bad_uses;
392
393 /* Candidate assigned to a use, together with the related costs. */
394 struct cost_pair **cand_for_use;
395
396 /* Number of times each candidate is used. */
397 unsigned *n_cand_uses;
398
399 /* The candidates used. */
400 bitmap cands;
401
402 /* The number of candidates in the set. */
403 unsigned n_cands;
404
405 /* Total number of registers needed. */
406 unsigned n_regs;
407
408 /* Total cost of expressing uses. */
409 comp_cost cand_use_cost;
410
411 /* Total cost of candidates. */
412 unsigned cand_cost;
413
414 /* Number of times each invariant is used. */
415 unsigned *n_invariant_uses;
416
417 /* The array holding the number of uses of each loop
418 invariant expressions created by ivopt. */
419 unsigned *used_inv_expr;
420
421 /* The number of created loop invariants. */
422 unsigned num_used_inv_expr;
423
424 /* Total cost of the assignment. */
425 comp_cost cost;
426 };
427
428 /* Difference of two iv candidate assignments. */
429
430 struct iv_ca_delta
431 {
432 /* Changed use. */
433 struct iv_use *use;
434
435 /* An old assignment (for rollback purposes). */
436 struct cost_pair *old_cp;
437
438 /* A new assignment. */
439 struct cost_pair *new_cp;
440
441 /* Next change in the list. */
442 struct iv_ca_delta *next_change;
443 };
444
445 /* Bound on number of candidates below that all candidates are considered. */
446
447 #define CONSIDER_ALL_CANDIDATES_BOUND \
448 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
449
450 /* If there are more iv occurrences, we just give up (it is quite unlikely that
451 optimizing such a loop would help, and it would take ages). */
452
453 #define MAX_CONSIDERED_USES \
454 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
455
456 /* If there are at most this number of ivs in the set, try removing unnecessary
457 ivs from the set always. */
458
459 #define ALWAYS_PRUNE_CAND_SET_BOUND \
460 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
461
462 /* The list of trees for that the decl_rtl field must be reset is stored
463 here. */
464
465 static vec<tree> decl_rtl_to_reset;
466
467 static comp_cost force_expr_to_var_cost (tree, bool);
468
469 /* Number of uses recorded in DATA. */
470
471 static inline unsigned
472 n_iv_uses (struct ivopts_data *data)
473 {
474 return data->iv_uses.length ();
475 }
476
477 /* Ith use recorded in DATA. */
478
479 static inline struct iv_use *
480 iv_use (struct ivopts_data *data, unsigned i)
481 {
482 return data->iv_uses[i];
483 }
484
485 /* Number of candidates recorded in DATA. */
486
487 static inline unsigned
488 n_iv_cands (struct ivopts_data *data)
489 {
490 return data->iv_candidates.length ();
491 }
492
493 /* Ith candidate recorded in DATA. */
494
495 static inline struct iv_cand *
496 iv_cand (struct ivopts_data *data, unsigned i)
497 {
498 return data->iv_candidates[i];
499 }
500
501 /* The single loop exit if it dominates the latch, NULL otherwise. */
502
503 edge
504 single_dom_exit (struct loop *loop)
505 {
506 edge exit = single_exit (loop);
507
508 if (!exit)
509 return NULL;
510
511 if (!just_once_each_iteration_p (loop, exit->src))
512 return NULL;
513
514 return exit;
515 }
516
517 /* Dumps information about the induction variable IV to FILE. */
518
519 void
520 dump_iv (FILE *file, struct iv *iv, bool dump_name)
521 {
522 if (iv->ssa_name && dump_name)
523 {
524 fprintf (file, "ssa name ");
525 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
526 fprintf (file, "\n");
527 }
528
529 fprintf (file, " type ");
530 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
531 fprintf (file, "\n");
532
533 if (iv->step)
534 {
535 fprintf (file, " base ");
536 print_generic_expr (file, iv->base, TDF_SLIM);
537 fprintf (file, "\n");
538
539 fprintf (file, " step ");
540 print_generic_expr (file, iv->step, TDF_SLIM);
541 fprintf (file, "\n");
542 }
543 else
544 {
545 fprintf (file, " invariant ");
546 print_generic_expr (file, iv->base, TDF_SLIM);
547 fprintf (file, "\n");
548 }
549
550 if (iv->base_object)
551 {
552 fprintf (file, " base object ");
553 print_generic_expr (file, iv->base_object, TDF_SLIM);
554 fprintf (file, "\n");
555 }
556
557 if (iv->biv_p)
558 fprintf (file, " is a biv\n");
559 }
560
561 /* Dumps information about the USE to FILE. */
562
563 void
564 dump_use (FILE *file, struct iv_use *use)
565 {
566 fprintf (file, "use %d", use->id);
567 if (use->sub_id)
568 fprintf (file, ".%d", use->sub_id);
569
570 fprintf (file, "\n");
571
572 switch (use->type)
573 {
574 case USE_NONLINEAR_EXPR:
575 fprintf (file, " generic\n");
576 break;
577
578 case USE_ADDRESS:
579 fprintf (file, " address\n");
580 break;
581
582 case USE_COMPARE:
583 fprintf (file, " compare\n");
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589
590 fprintf (file, " in statement ");
591 print_gimple_stmt (file, use->stmt, 0, 0);
592 fprintf (file, "\n");
593
594 fprintf (file, " at position ");
595 if (use->op_p)
596 print_generic_expr (file, *use->op_p, TDF_SLIM);
597 fprintf (file, "\n");
598
599 dump_iv (file, use->iv, false);
600
601 if (use->related_cands)
602 {
603 fprintf (file, " related candidates ");
604 dump_bitmap (file, use->related_cands);
605 }
606 }
607
608 /* Dumps information about the uses to FILE. */
609
610 void
611 dump_uses (FILE *file, struct ivopts_data *data)
612 {
613 unsigned i;
614 struct iv_use *use;
615
616 for (i = 0; i < n_iv_uses (data); i++)
617 {
618 use = iv_use (data, i);
619 do
620 {
621 dump_use (file, use);
622 use = use->next;
623 }
624 while (use);
625 fprintf (file, "\n");
626 }
627 }
628
629 /* Dumps information about induction variable candidate CAND to FILE. */
630
631 void
632 dump_cand (FILE *file, struct iv_cand *cand)
633 {
634 struct iv *iv = cand->iv;
635
636 fprintf (file, "candidate %d%s\n",
637 cand->id, cand->important ? " (important)" : "");
638
639 if (cand->depends_on)
640 {
641 fprintf (file, " depends on ");
642 dump_bitmap (file, cand->depends_on);
643 }
644
645 if (!iv)
646 {
647 fprintf (file, " final value replacement\n");
648 return;
649 }
650
651 if (cand->var_before)
652 {
653 fprintf (file, " var_before ");
654 print_generic_expr (file, cand->var_before, TDF_SLIM);
655 fprintf (file, "\n");
656 }
657 if (cand->var_after)
658 {
659 fprintf (file, " var_after ");
660 print_generic_expr (file, cand->var_after, TDF_SLIM);
661 fprintf (file, "\n");
662 }
663
664 switch (cand->pos)
665 {
666 case IP_NORMAL:
667 fprintf (file, " incremented before exit test\n");
668 break;
669
670 case IP_BEFORE_USE:
671 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
672 break;
673
674 case IP_AFTER_USE:
675 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
676 break;
677
678 case IP_END:
679 fprintf (file, " incremented at end\n");
680 break;
681
682 case IP_ORIGINAL:
683 fprintf (file, " original biv\n");
684 break;
685 }
686
687 dump_iv (file, iv, false);
688 }
689
690 /* Returns the info for ssa version VER. */
691
692 static inline struct version_info *
693 ver_info (struct ivopts_data *data, unsigned ver)
694 {
695 return data->version_info + ver;
696 }
697
698 /* Returns the info for ssa name NAME. */
699
700 static inline struct version_info *
701 name_info (struct ivopts_data *data, tree name)
702 {
703 return ver_info (data, SSA_NAME_VERSION (name));
704 }
705
706 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
707 emitted in LOOP. */
708
709 static bool
710 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
711 {
712 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
713
714 gcc_assert (bb);
715
716 if (sbb == loop->latch)
717 return true;
718
719 if (sbb != bb)
720 return false;
721
722 return stmt == last_stmt (bb);
723 }
724
725 /* Returns true if STMT if after the place where the original induction
726 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
727 if the positions are identical. */
728
729 static bool
730 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
731 {
732 basic_block cand_bb = gimple_bb (cand->incremented_at);
733 basic_block stmt_bb = gimple_bb (stmt);
734
735 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
736 return false;
737
738 if (stmt_bb != cand_bb)
739 return true;
740
741 if (true_if_equal
742 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
743 return true;
744 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
745 }
746
747 /* Returns true if STMT if after the place where the induction variable
748 CAND is incremented in LOOP. */
749
750 static bool
751 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
752 {
753 switch (cand->pos)
754 {
755 case IP_END:
756 return false;
757
758 case IP_NORMAL:
759 return stmt_after_ip_normal_pos (loop, stmt);
760
761 case IP_ORIGINAL:
762 case IP_AFTER_USE:
763 return stmt_after_inc_pos (cand, stmt, false);
764
765 case IP_BEFORE_USE:
766 return stmt_after_inc_pos (cand, stmt, true);
767
768 default:
769 gcc_unreachable ();
770 }
771 }
772
773 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
774
775 static bool
776 abnormal_ssa_name_p (tree exp)
777 {
778 if (!exp)
779 return false;
780
781 if (TREE_CODE (exp) != SSA_NAME)
782 return false;
783
784 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
785 }
786
787 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
788 abnormal phi node. Callback for for_each_index. */
789
790 static bool
791 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
792 void *data ATTRIBUTE_UNUSED)
793 {
794 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
795 {
796 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
797 return false;
798 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
799 return false;
800 }
801
802 return !abnormal_ssa_name_p (*index);
803 }
804
805 /* Returns true if EXPR contains a ssa name that occurs in an
806 abnormal phi node. */
807
808 bool
809 contains_abnormal_ssa_name_p (tree expr)
810 {
811 enum tree_code code;
812 enum tree_code_class codeclass;
813
814 if (!expr)
815 return false;
816
817 code = TREE_CODE (expr);
818 codeclass = TREE_CODE_CLASS (code);
819
820 if (code == SSA_NAME)
821 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
822
823 if (code == INTEGER_CST
824 || is_gimple_min_invariant (expr))
825 return false;
826
827 if (code == ADDR_EXPR)
828 return !for_each_index (&TREE_OPERAND (expr, 0),
829 idx_contains_abnormal_ssa_name_p,
830 NULL);
831
832 if (code == COND_EXPR)
833 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
834 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
835 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
836
837 switch (codeclass)
838 {
839 case tcc_binary:
840 case tcc_comparison:
841 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
842 return true;
843
844 /* Fallthru. */
845 case tcc_unary:
846 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
847 return true;
848
849 break;
850
851 default:
852 gcc_unreachable ();
853 }
854
855 return false;
856 }
857
858 /* Returns the structure describing number of iterations determined from
859 EXIT of DATA->current_loop, or NULL if something goes wrong. */
860
861 static struct tree_niter_desc *
862 niter_for_exit (struct ivopts_data *data, edge exit)
863 {
864 struct tree_niter_desc *desc;
865 tree_niter_desc **slot;
866
867 if (!data->niters)
868 {
869 data->niters = new hash_map<edge, tree_niter_desc *>;
870 slot = NULL;
871 }
872 else
873 slot = data->niters->get (exit);
874
875 if (!slot)
876 {
877 /* Try to determine number of iterations. We cannot safely work with ssa
878 names that appear in phi nodes on abnormal edges, so that we do not
879 create overlapping life ranges for them (PR 27283). */
880 desc = XNEW (struct tree_niter_desc);
881 if (!number_of_iterations_exit (data->current_loop,
882 exit, desc, true)
883 || contains_abnormal_ssa_name_p (desc->niter))
884 {
885 XDELETE (desc);
886 desc = NULL;
887 }
888 data->niters->put (exit, desc);
889 }
890 else
891 desc = *slot;
892
893 return desc;
894 }
895
896 /* Returns the structure describing number of iterations determined from
897 single dominating exit of DATA->current_loop, or NULL if something
898 goes wrong. */
899
900 static struct tree_niter_desc *
901 niter_for_single_dom_exit (struct ivopts_data *data)
902 {
903 edge exit = single_dom_exit (data->current_loop);
904
905 if (!exit)
906 return NULL;
907
908 return niter_for_exit (data, exit);
909 }
910
911 /* Initializes data structures used by the iv optimization pass, stored
912 in DATA. */
913
914 static void
915 tree_ssa_iv_optimize_init (struct ivopts_data *data)
916 {
917 data->version_info_size = 2 * num_ssa_names;
918 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
919 data->relevant = BITMAP_ALLOC (NULL);
920 data->important_candidates = BITMAP_ALLOC (NULL);
921 data->max_inv_id = 0;
922 data->niters = NULL;
923 data->iv_uses.create (20);
924 data->iv_candidates.create (20);
925 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
926 data->inv_expr_id = 0;
927 data->name_expansion_cache = NULL;
928 decl_rtl_to_reset.create (20);
929 }
930
931 /* Returns a memory object to that EXPR points. In case we are able to
932 determine that it does not point to any such object, NULL is returned. */
933
934 static tree
935 determine_base_object (tree expr)
936 {
937 enum tree_code code = TREE_CODE (expr);
938 tree base, obj;
939
940 /* If this is a pointer casted to any type, we need to determine
941 the base object for the pointer; so handle conversions before
942 throwing away non-pointer expressions. */
943 if (CONVERT_EXPR_P (expr))
944 return determine_base_object (TREE_OPERAND (expr, 0));
945
946 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
947 return NULL_TREE;
948
949 switch (code)
950 {
951 case INTEGER_CST:
952 return NULL_TREE;
953
954 case ADDR_EXPR:
955 obj = TREE_OPERAND (expr, 0);
956 base = get_base_address (obj);
957
958 if (!base)
959 return expr;
960
961 if (TREE_CODE (base) == MEM_REF)
962 return determine_base_object (TREE_OPERAND (base, 0));
963
964 return fold_convert (ptr_type_node,
965 build_fold_addr_expr (base));
966
967 case POINTER_PLUS_EXPR:
968 return determine_base_object (TREE_OPERAND (expr, 0));
969
970 case PLUS_EXPR:
971 case MINUS_EXPR:
972 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
973 gcc_unreachable ();
974
975 default:
976 return fold_convert (ptr_type_node, expr);
977 }
978 }
979
980 /* Return true if address expression with non-DECL_P operand appears
981 in EXPR. */
982
983 static bool
984 contain_complex_addr_expr (tree expr)
985 {
986 bool res = false;
987
988 STRIP_NOPS (expr);
989 switch (TREE_CODE (expr))
990 {
991 case POINTER_PLUS_EXPR:
992 case PLUS_EXPR:
993 case MINUS_EXPR:
994 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
995 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
996 break;
997
998 case ADDR_EXPR:
999 return (!DECL_P (TREE_OPERAND (expr, 0)));
1000
1001 default:
1002 return false;
1003 }
1004
1005 return res;
1006 }
1007
1008 /* Allocates an induction variable with given initial value BASE and step STEP
1009 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1010
1011 static struct iv *
1012 alloc_iv (tree base, tree step, bool no_overflow = false)
1013 {
1014 tree expr = base;
1015 struct iv *iv = XCNEW (struct iv);
1016 gcc_assert (step != NULL_TREE);
1017
1018 /* Lower address expression in base except ones with DECL_P as operand.
1019 By doing this:
1020 1) More accurate cost can be computed for address expressions;
1021 2) Duplicate candidates won't be created for bases in different
1022 forms, like &a[0] and &a. */
1023 STRIP_NOPS (expr);
1024 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1025 || contain_complex_addr_expr (expr))
1026 {
1027 aff_tree comb;
1028 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1029 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1030 }
1031
1032 iv->base = base;
1033 iv->base_object = determine_base_object (base);
1034 iv->step = step;
1035 iv->biv_p = false;
1036 iv->have_use_for = false;
1037 iv->use_id = 0;
1038 iv->ssa_name = NULL_TREE;
1039 iv->no_overflow = no_overflow;
1040
1041 return iv;
1042 }
1043
1044 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1045 doesn't overflow. */
1046
1047 static void
1048 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1049 bool no_overflow)
1050 {
1051 struct version_info *info = name_info (data, iv);
1052
1053 gcc_assert (!info->iv);
1054
1055 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1056 info->iv = alloc_iv (base, step, no_overflow);
1057 info->iv->ssa_name = iv;
1058 }
1059
1060 /* Finds induction variable declaration for VAR. */
1061
1062 static struct iv *
1063 get_iv (struct ivopts_data *data, tree var)
1064 {
1065 basic_block bb;
1066 tree type = TREE_TYPE (var);
1067
1068 if (!POINTER_TYPE_P (type)
1069 && !INTEGRAL_TYPE_P (type))
1070 return NULL;
1071
1072 if (!name_info (data, var)->iv)
1073 {
1074 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1075
1076 if (!bb
1077 || !flow_bb_inside_loop_p (data->current_loop, bb))
1078 set_iv (data, var, var, build_int_cst (type, 0), true);
1079 }
1080
1081 return name_info (data, var)->iv;
1082 }
1083
1084 /* Return the first non-invariant ssa var found in EXPR. */
1085
1086 static tree
1087 extract_single_var_from_expr (tree expr)
1088 {
1089 int i, n;
1090 tree tmp;
1091 enum tree_code code;
1092
1093 if (!expr || is_gimple_min_invariant (expr))
1094 return NULL;
1095
1096 code = TREE_CODE (expr);
1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1098 {
1099 n = TREE_OPERAND_LENGTH (expr);
1100 for (i = 0; i < n; i++)
1101 {
1102 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1103
1104 if (tmp)
1105 return tmp;
1106 }
1107 }
1108 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1109 }
1110
1111 /* Finds basic ivs. */
1112
1113 static bool
1114 find_bivs (struct ivopts_data *data)
1115 {
1116 gphi *phi;
1117 affine_iv iv;
1118 tree step, type, base, stop;
1119 bool found = false;
1120 struct loop *loop = data->current_loop;
1121 gphi_iterator psi;
1122
1123 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124 {
1125 phi = psi.phi ();
1126
1127 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128 continue;
1129
1130 if (virtual_operand_p (PHI_RESULT (phi)))
1131 continue;
1132
1133 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1134 continue;
1135
1136 if (integer_zerop (iv.step))
1137 continue;
1138
1139 step = iv.step;
1140 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1141 /* Stop expanding iv base at the first ssa var referred by iv step.
1142 Ideally we should stop at any ssa var, because that's expensive
1143 and unusual to happen, we just do it on the first one.
1144
1145 See PR64705 for the rationale. */
1146 stop = extract_single_var_from_expr (step);
1147 base = expand_simple_operations (base, stop);
1148 if (contains_abnormal_ssa_name_p (base)
1149 || contains_abnormal_ssa_name_p (step))
1150 continue;
1151
1152 type = TREE_TYPE (PHI_RESULT (phi));
1153 base = fold_convert (type, base);
1154 if (step)
1155 {
1156 if (POINTER_TYPE_P (type))
1157 step = convert_to_ptrofftype (step);
1158 else
1159 step = fold_convert (type, step);
1160 }
1161
1162 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1163 found = true;
1164 }
1165
1166 return found;
1167 }
1168
1169 /* Marks basic ivs. */
1170
1171 static void
1172 mark_bivs (struct ivopts_data *data)
1173 {
1174 gphi *phi;
1175 gimple def;
1176 tree var;
1177 struct iv *iv, *incr_iv;
1178 struct loop *loop = data->current_loop;
1179 basic_block incr_bb;
1180 gphi_iterator psi;
1181
1182 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1183 {
1184 phi = psi.phi ();
1185
1186 iv = get_iv (data, PHI_RESULT (phi));
1187 if (!iv)
1188 continue;
1189
1190 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1191 def = SSA_NAME_DEF_STMT (var);
1192 /* Don't mark iv peeled from other one as biv. */
1193 if (def
1194 && gimple_code (def) == GIMPLE_PHI
1195 && gimple_bb (def) == loop->header)
1196 continue;
1197
1198 incr_iv = get_iv (data, var);
1199 if (!incr_iv)
1200 continue;
1201
1202 /* If the increment is in the subloop, ignore it. */
1203 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1204 if (incr_bb->loop_father != data->current_loop
1205 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1206 continue;
1207
1208 iv->biv_p = true;
1209 incr_iv->biv_p = true;
1210 }
1211 }
1212
1213 /* Checks whether STMT defines a linear induction variable and stores its
1214 parameters to IV. */
1215
1216 static bool
1217 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1218 {
1219 tree lhs, stop;
1220 struct loop *loop = data->current_loop;
1221
1222 iv->base = NULL_TREE;
1223 iv->step = NULL_TREE;
1224
1225 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1226 return false;
1227
1228 lhs = gimple_assign_lhs (stmt);
1229 if (TREE_CODE (lhs) != SSA_NAME)
1230 return false;
1231
1232 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1233 return false;
1234
1235 /* Stop expanding iv base at the first ssa var referred by iv step.
1236 Ideally we should stop at any ssa var, because that's expensive
1237 and unusual to happen, we just do it on the first one.
1238
1239 See PR64705 for the rationale. */
1240 stop = extract_single_var_from_expr (iv->step);
1241 iv->base = expand_simple_operations (iv->base, stop);
1242 if (contains_abnormal_ssa_name_p (iv->base)
1243 || contains_abnormal_ssa_name_p (iv->step))
1244 return false;
1245
1246 /* If STMT could throw, then do not consider STMT as defining a GIV.
1247 While this will suppress optimizations, we can not safely delete this
1248 GIV and associated statements, even if it appears it is not used. */
1249 if (stmt_could_throw_p (stmt))
1250 return false;
1251
1252 return true;
1253 }
1254
1255 /* Finds general ivs in statement STMT. */
1256
1257 static void
1258 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1259 {
1260 affine_iv iv;
1261
1262 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1263 return;
1264
1265 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1266 }
1267
1268 /* Finds general ivs in basic block BB. */
1269
1270 static void
1271 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1272 {
1273 gimple_stmt_iterator bsi;
1274
1275 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1276 find_givs_in_stmt (data, gsi_stmt (bsi));
1277 }
1278
1279 /* Finds general ivs. */
1280
1281 static void
1282 find_givs (struct ivopts_data *data)
1283 {
1284 struct loop *loop = data->current_loop;
1285 basic_block *body = get_loop_body_in_dom_order (loop);
1286 unsigned i;
1287
1288 for (i = 0; i < loop->num_nodes; i++)
1289 find_givs_in_bb (data, body[i]);
1290 free (body);
1291 }
1292
1293 /* For each ssa name defined in LOOP determines whether it is an induction
1294 variable and if so, its initial value and step. */
1295
1296 static bool
1297 find_induction_variables (struct ivopts_data *data)
1298 {
1299 unsigned i;
1300 bitmap_iterator bi;
1301
1302 if (!find_bivs (data))
1303 return false;
1304
1305 find_givs (data);
1306 mark_bivs (data);
1307
1308 if (dump_file && (dump_flags & TDF_DETAILS))
1309 {
1310 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1311
1312 if (niter)
1313 {
1314 fprintf (dump_file, " number of iterations ");
1315 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1316 if (!integer_zerop (niter->may_be_zero))
1317 {
1318 fprintf (dump_file, "; zero if ");
1319 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1320 }
1321 fprintf (dump_file, "\n\n");
1322 };
1323
1324 fprintf (dump_file, "Induction variables:\n\n");
1325
1326 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1327 {
1328 if (ver_info (data, i)->iv)
1329 dump_iv (dump_file, ver_info (data, i)->iv, true);
1330 }
1331 }
1332
1333 return true;
1334 }
1335
1336 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1337 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1338 is the const offset stripped from IV base. For uses of other types,
1339 ADDR_BASE and ADDR_OFFSET are zero by default. */
1340
1341 static struct iv_use *
1342 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1343 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1344 unsigned HOST_WIDE_INT addr_offset = 0)
1345 {
1346 struct iv_use *use = XCNEW (struct iv_use);
1347
1348 use->id = n_iv_uses (data);
1349 use->sub_id = 0;
1350 use->type = use_type;
1351 use->iv = iv;
1352 use->stmt = stmt;
1353 use->op_p = use_p;
1354 use->related_cands = BITMAP_ALLOC (NULL);
1355 use->next = NULL;
1356 use->addr_base = addr_base;
1357 use->addr_offset = addr_offset;
1358
1359 data->iv_uses.safe_push (use);
1360
1361 return use;
1362 }
1363
1364 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1365 The sub use is recorded under the one whose use id is ID_GROUP. */
1366
1367 static struct iv_use *
1368 record_sub_use (struct ivopts_data *data, tree *use_p,
1369 struct iv *iv, gimple stmt, enum use_type use_type,
1370 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1371 unsigned int id_group)
1372 {
1373 struct iv_use *use = XCNEW (struct iv_use);
1374 struct iv_use *group = iv_use (data, id_group);
1375
1376 use->id = group->id;
1377 use->sub_id = 0;
1378 use->type = use_type;
1379 use->iv = iv;
1380 use->stmt = stmt;
1381 use->op_p = use_p;
1382 use->related_cands = NULL;
1383 use->addr_base = addr_base;
1384 use->addr_offset = addr_offset;
1385
1386 /* Sub use list is maintained in offset ascending order. */
1387 if (addr_offset <= group->addr_offset)
1388 {
1389 use->related_cands = group->related_cands;
1390 group->related_cands = NULL;
1391 use->next = group;
1392 data->iv_uses[id_group] = use;
1393 }
1394 else
1395 {
1396 struct iv_use *pre;
1397 do
1398 {
1399 pre = group;
1400 group = group->next;
1401 }
1402 while (group && addr_offset > group->addr_offset);
1403 use->next = pre->next;
1404 pre->next = use;
1405 }
1406
1407 /* To avoid showing ssa name in the dumps, if it was not reset by the
1408 caller. */
1409 iv->ssa_name = NULL_TREE;
1410
1411 return use;
1412 }
1413
1414 /* Checks whether OP is a loop-level invariant and if so, records it.
1415 NONLINEAR_USE is true if the invariant is used in a way we do not
1416 handle specially. */
1417
1418 static void
1419 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1420 {
1421 basic_block bb;
1422 struct version_info *info;
1423
1424 if (TREE_CODE (op) != SSA_NAME
1425 || virtual_operand_p (op))
1426 return;
1427
1428 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1429 if (bb
1430 && flow_bb_inside_loop_p (data->current_loop, bb))
1431 return;
1432
1433 info = name_info (data, op);
1434 info->name = op;
1435 info->has_nonlin_use |= nonlinear_use;
1436 if (!info->inv_id)
1437 info->inv_id = ++data->max_inv_id;
1438 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1439 }
1440
1441 /* Checks whether the use OP is interesting and if so, records it. */
1442
1443 static struct iv_use *
1444 find_interesting_uses_op (struct ivopts_data *data, tree op)
1445 {
1446 struct iv *iv;
1447 struct iv *civ;
1448 gimple stmt;
1449 struct iv_use *use;
1450
1451 if (TREE_CODE (op) != SSA_NAME)
1452 return NULL;
1453
1454 iv = get_iv (data, op);
1455 if (!iv)
1456 return NULL;
1457
1458 if (iv->have_use_for)
1459 {
1460 use = iv_use (data, iv->use_id);
1461
1462 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1463 return use;
1464 }
1465
1466 if (integer_zerop (iv->step))
1467 {
1468 record_invariant (data, op, true);
1469 return NULL;
1470 }
1471 iv->have_use_for = true;
1472
1473 civ = XNEW (struct iv);
1474 *civ = *iv;
1475
1476 stmt = SSA_NAME_DEF_STMT (op);
1477 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1478 || is_gimple_assign (stmt));
1479
1480 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1481 iv->use_id = use->id;
1482
1483 return use;
1484 }
1485
1486 /* Given a condition in statement STMT, checks whether it is a compare
1487 of an induction variable and an invariant. If this is the case,
1488 CONTROL_VAR is set to location of the iv, BOUND to the location of
1489 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1490 induction variable descriptions, and true is returned. If this is not
1491 the case, CONTROL_VAR and BOUND are set to the arguments of the
1492 condition and false is returned. */
1493
1494 static bool
1495 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1496 tree **control_var, tree **bound,
1497 struct iv **iv_var, struct iv **iv_bound)
1498 {
1499 /* The objects returned when COND has constant operands. */
1500 static struct iv const_iv;
1501 static tree zero;
1502 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1503 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1504 bool ret = false;
1505
1506 if (gimple_code (stmt) == GIMPLE_COND)
1507 {
1508 gcond *cond_stmt = as_a <gcond *> (stmt);
1509 op0 = gimple_cond_lhs_ptr (cond_stmt);
1510 op1 = gimple_cond_rhs_ptr (cond_stmt);
1511 }
1512 else
1513 {
1514 op0 = gimple_assign_rhs1_ptr (stmt);
1515 op1 = gimple_assign_rhs2_ptr (stmt);
1516 }
1517
1518 zero = integer_zero_node;
1519 const_iv.step = integer_zero_node;
1520
1521 if (TREE_CODE (*op0) == SSA_NAME)
1522 iv0 = get_iv (data, *op0);
1523 if (TREE_CODE (*op1) == SSA_NAME)
1524 iv1 = get_iv (data, *op1);
1525
1526 /* Exactly one of the compared values must be an iv, and the other one must
1527 be an invariant. */
1528 if (!iv0 || !iv1)
1529 goto end;
1530
1531 if (integer_zerop (iv0->step))
1532 {
1533 /* Control variable may be on the other side. */
1534 tmp_op = op0; op0 = op1; op1 = tmp_op;
1535 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1536 }
1537 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1538
1539 end:
1540 if (control_var)
1541 *control_var = op0;
1542 if (iv_var)
1543 *iv_var = iv0;
1544 if (bound)
1545 *bound = op1;
1546 if (iv_bound)
1547 *iv_bound = iv1;
1548
1549 return ret;
1550 }
1551
1552 /* Checks whether the condition in STMT is interesting and if so,
1553 records it. */
1554
1555 static void
1556 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1557 {
1558 tree *var_p, *bound_p;
1559 struct iv *var_iv, *civ;
1560
1561 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1562 {
1563 find_interesting_uses_op (data, *var_p);
1564 find_interesting_uses_op (data, *bound_p);
1565 return;
1566 }
1567
1568 civ = XNEW (struct iv);
1569 *civ = *var_iv;
1570 record_use (data, NULL, civ, stmt, USE_COMPARE);
1571 }
1572
1573 /* Returns the outermost loop EXPR is obviously invariant in
1574 relative to the loop LOOP, i.e. if all its operands are defined
1575 outside of the returned loop. Returns NULL if EXPR is not
1576 even obviously invariant in LOOP. */
1577
1578 struct loop *
1579 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1580 {
1581 basic_block def_bb;
1582 unsigned i, len;
1583
1584 if (is_gimple_min_invariant (expr))
1585 return current_loops->tree_root;
1586
1587 if (TREE_CODE (expr) == SSA_NAME)
1588 {
1589 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1590 if (def_bb)
1591 {
1592 if (flow_bb_inside_loop_p (loop, def_bb))
1593 return NULL;
1594 return superloop_at_depth (loop,
1595 loop_depth (def_bb->loop_father) + 1);
1596 }
1597
1598 return current_loops->tree_root;
1599 }
1600
1601 if (!EXPR_P (expr))
1602 return NULL;
1603
1604 unsigned maxdepth = 0;
1605 len = TREE_OPERAND_LENGTH (expr);
1606 for (i = 0; i < len; i++)
1607 {
1608 struct loop *ivloop;
1609 if (!TREE_OPERAND (expr, i))
1610 continue;
1611
1612 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1613 if (!ivloop)
1614 return NULL;
1615 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1616 }
1617
1618 return superloop_at_depth (loop, maxdepth);
1619 }
1620
1621 /* Returns true if expression EXPR is obviously invariant in LOOP,
1622 i.e. if all its operands are defined outside of the LOOP. LOOP
1623 should not be the function body. */
1624
1625 bool
1626 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1627 {
1628 basic_block def_bb;
1629 unsigned i, len;
1630
1631 gcc_assert (loop_depth (loop) > 0);
1632
1633 if (is_gimple_min_invariant (expr))
1634 return true;
1635
1636 if (TREE_CODE (expr) == SSA_NAME)
1637 {
1638 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1639 if (def_bb
1640 && flow_bb_inside_loop_p (loop, def_bb))
1641 return false;
1642
1643 return true;
1644 }
1645
1646 if (!EXPR_P (expr))
1647 return false;
1648
1649 len = TREE_OPERAND_LENGTH (expr);
1650 for (i = 0; i < len; i++)
1651 if (TREE_OPERAND (expr, i)
1652 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1653 return false;
1654
1655 return true;
1656 }
1657
1658 /* Cumulates the steps of indices into DATA and replaces their values with the
1659 initial ones. Returns false when the value of the index cannot be determined.
1660 Callback for for_each_index. */
1661
1662 struct ifs_ivopts_data
1663 {
1664 struct ivopts_data *ivopts_data;
1665 gimple stmt;
1666 tree step;
1667 };
1668
1669 static bool
1670 idx_find_step (tree base, tree *idx, void *data)
1671 {
1672 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1673 struct iv *iv;
1674 bool use_overflow_semantics = false;
1675 tree step, iv_base, iv_step, lbound, off;
1676 struct loop *loop = dta->ivopts_data->current_loop;
1677
1678 /* If base is a component ref, require that the offset of the reference
1679 be invariant. */
1680 if (TREE_CODE (base) == COMPONENT_REF)
1681 {
1682 off = component_ref_field_offset (base);
1683 return expr_invariant_in_loop_p (loop, off);
1684 }
1685
1686 /* If base is array, first check whether we will be able to move the
1687 reference out of the loop (in order to take its address in strength
1688 reduction). In order for this to work we need both lower bound
1689 and step to be loop invariants. */
1690 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1691 {
1692 /* Moreover, for a range, the size needs to be invariant as well. */
1693 if (TREE_CODE (base) == ARRAY_RANGE_REF
1694 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1695 return false;
1696
1697 step = array_ref_element_size (base);
1698 lbound = array_ref_low_bound (base);
1699
1700 if (!expr_invariant_in_loop_p (loop, step)
1701 || !expr_invariant_in_loop_p (loop, lbound))
1702 return false;
1703 }
1704
1705 if (TREE_CODE (*idx) != SSA_NAME)
1706 return true;
1707
1708 iv = get_iv (dta->ivopts_data, *idx);
1709 if (!iv)
1710 return false;
1711
1712 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1713 *&x[0], which is not folded and does not trigger the
1714 ARRAY_REF path below. */
1715 *idx = iv->base;
1716
1717 if (integer_zerop (iv->step))
1718 return true;
1719
1720 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1721 {
1722 step = array_ref_element_size (base);
1723
1724 /* We only handle addresses whose step is an integer constant. */
1725 if (TREE_CODE (step) != INTEGER_CST)
1726 return false;
1727 }
1728 else
1729 /* The step for pointer arithmetics already is 1 byte. */
1730 step = size_one_node;
1731
1732 iv_base = iv->base;
1733 iv_step = iv->step;
1734 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1735 use_overflow_semantics = true;
1736
1737 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1738 sizetype, &iv_base, &iv_step, dta->stmt,
1739 use_overflow_semantics))
1740 {
1741 /* The index might wrap. */
1742 return false;
1743 }
1744
1745 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1746 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1747
1748 return true;
1749 }
1750
1751 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1752 object is passed to it in DATA. */
1753
1754 static bool
1755 idx_record_use (tree base, tree *idx,
1756 void *vdata)
1757 {
1758 struct ivopts_data *data = (struct ivopts_data *) vdata;
1759 find_interesting_uses_op (data, *idx);
1760 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1761 {
1762 find_interesting_uses_op (data, array_ref_element_size (base));
1763 find_interesting_uses_op (data, array_ref_low_bound (base));
1764 }
1765 return true;
1766 }
1767
1768 /* If we can prove that TOP = cst * BOT for some constant cst,
1769 store cst to MUL and return true. Otherwise return false.
1770 The returned value is always sign-extended, regardless of the
1771 signedness of TOP and BOT. */
1772
1773 static bool
1774 constant_multiple_of (tree top, tree bot, widest_int *mul)
1775 {
1776 tree mby;
1777 enum tree_code code;
1778 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1779 widest_int res, p0, p1;
1780
1781 STRIP_NOPS (top);
1782 STRIP_NOPS (bot);
1783
1784 if (operand_equal_p (top, bot, 0))
1785 {
1786 *mul = 1;
1787 return true;
1788 }
1789
1790 code = TREE_CODE (top);
1791 switch (code)
1792 {
1793 case MULT_EXPR:
1794 mby = TREE_OPERAND (top, 1);
1795 if (TREE_CODE (mby) != INTEGER_CST)
1796 return false;
1797
1798 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1799 return false;
1800
1801 *mul = wi::sext (res * wi::to_widest (mby), precision);
1802 return true;
1803
1804 case PLUS_EXPR:
1805 case MINUS_EXPR:
1806 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1807 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1808 return false;
1809
1810 if (code == MINUS_EXPR)
1811 p1 = -p1;
1812 *mul = wi::sext (p0 + p1, precision);
1813 return true;
1814
1815 case INTEGER_CST:
1816 if (TREE_CODE (bot) != INTEGER_CST)
1817 return false;
1818
1819 p0 = widest_int::from (top, SIGNED);
1820 p1 = widest_int::from (bot, SIGNED);
1821 if (p1 == 0)
1822 return false;
1823 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1824 return res == 0;
1825
1826 default:
1827 return false;
1828 }
1829 }
1830
1831 /* Return true if memory reference REF with step STEP may be unaligned. */
1832
1833 static bool
1834 may_be_unaligned_p (tree ref, tree step)
1835 {
1836 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1837 thus they are not misaligned. */
1838 if (TREE_CODE (ref) == TARGET_MEM_REF)
1839 return false;
1840
1841 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1842 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1843 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1844
1845 unsigned HOST_WIDE_INT bitpos;
1846 unsigned int ref_align;
1847 get_object_alignment_1 (ref, &ref_align, &bitpos);
1848 if (ref_align < align
1849 || (bitpos % align) != 0
1850 || (bitpos % BITS_PER_UNIT) != 0)
1851 return true;
1852
1853 unsigned int trailing_zeros = tree_ctz (step);
1854 if (trailing_zeros < HOST_BITS_PER_INT
1855 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1856 return true;
1857
1858 return false;
1859 }
1860
1861 /* Return true if EXPR may be non-addressable. */
1862
1863 bool
1864 may_be_nonaddressable_p (tree expr)
1865 {
1866 switch (TREE_CODE (expr))
1867 {
1868 case TARGET_MEM_REF:
1869 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1870 target, thus they are always addressable. */
1871 return false;
1872
1873 case COMPONENT_REF:
1874 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1875 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1876
1877 case VIEW_CONVERT_EXPR:
1878 /* This kind of view-conversions may wrap non-addressable objects
1879 and make them look addressable. After some processing the
1880 non-addressability may be uncovered again, causing ADDR_EXPRs
1881 of inappropriate objects to be built. */
1882 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1883 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1884 return true;
1885
1886 /* ... fall through ... */
1887
1888 case ARRAY_REF:
1889 case ARRAY_RANGE_REF:
1890 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1891
1892 CASE_CONVERT:
1893 return true;
1894
1895 default:
1896 break;
1897 }
1898
1899 return false;
1900 }
1901
1902 static tree
1903 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1904
1905 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1906 If there is an existing use which has same stripped iv base and step,
1907 this function records this one as a sub use to that; otherwise records
1908 it as a normal one. */
1909
1910 static struct iv_use *
1911 record_group_use (struct ivopts_data *data, tree *use_p,
1912 struct iv *iv, gimple stmt, enum use_type use_type)
1913 {
1914 unsigned int i;
1915 struct iv_use *use;
1916 tree addr_base;
1917 unsigned HOST_WIDE_INT addr_offset;
1918
1919 /* Only support sub use for address type uses, that is, with base
1920 object. */
1921 if (!iv->base_object)
1922 return record_use (data, use_p, iv, stmt, use_type);
1923
1924 addr_base = strip_offset (iv->base, &addr_offset);
1925 for (i = 0; i < n_iv_uses (data); i++)
1926 {
1927 use = iv_use (data, i);
1928 if (use->type != USE_ADDRESS || !use->iv->base_object)
1929 continue;
1930
1931 /* Check if it has the same stripped base and step. */
1932 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1933 && operand_equal_p (iv->step, use->iv->step, 0)
1934 && operand_equal_p (addr_base, use->addr_base, 0))
1935 break;
1936 }
1937
1938 if (i == n_iv_uses (data))
1939 return record_use (data, use_p, iv, stmt,
1940 use_type, addr_base, addr_offset);
1941 else
1942 return record_sub_use (data, use_p, iv, stmt,
1943 use_type, addr_base, addr_offset, i);
1944 }
1945
1946 /* Finds addresses in *OP_P inside STMT. */
1947
1948 static void
1949 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1950 {
1951 tree base = *op_p, step = size_zero_node;
1952 struct iv *civ;
1953 struct ifs_ivopts_data ifs_ivopts_data;
1954
1955 /* Do not play with volatile memory references. A bit too conservative,
1956 perhaps, but safe. */
1957 if (gimple_has_volatile_ops (stmt))
1958 goto fail;
1959
1960 /* Ignore bitfields for now. Not really something terribly complicated
1961 to handle. TODO. */
1962 if (TREE_CODE (base) == BIT_FIELD_REF)
1963 goto fail;
1964
1965 base = unshare_expr (base);
1966
1967 if (TREE_CODE (base) == TARGET_MEM_REF)
1968 {
1969 tree type = build_pointer_type (TREE_TYPE (base));
1970 tree astep;
1971
1972 if (TMR_BASE (base)
1973 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1974 {
1975 civ = get_iv (data, TMR_BASE (base));
1976 if (!civ)
1977 goto fail;
1978
1979 TMR_BASE (base) = civ->base;
1980 step = civ->step;
1981 }
1982 if (TMR_INDEX2 (base)
1983 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1984 {
1985 civ = get_iv (data, TMR_INDEX2 (base));
1986 if (!civ)
1987 goto fail;
1988
1989 TMR_INDEX2 (base) = civ->base;
1990 step = civ->step;
1991 }
1992 if (TMR_INDEX (base)
1993 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1994 {
1995 civ = get_iv (data, TMR_INDEX (base));
1996 if (!civ)
1997 goto fail;
1998
1999 TMR_INDEX (base) = civ->base;
2000 astep = civ->step;
2001
2002 if (astep)
2003 {
2004 if (TMR_STEP (base))
2005 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2006
2007 step = fold_build2 (PLUS_EXPR, type, step, astep);
2008 }
2009 }
2010
2011 if (integer_zerop (step))
2012 goto fail;
2013 base = tree_mem_ref_addr (type, base);
2014 }
2015 else
2016 {
2017 ifs_ivopts_data.ivopts_data = data;
2018 ifs_ivopts_data.stmt = stmt;
2019 ifs_ivopts_data.step = size_zero_node;
2020 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2021 || integer_zerop (ifs_ivopts_data.step))
2022 goto fail;
2023 step = ifs_ivopts_data.step;
2024
2025 /* Check that the base expression is addressable. This needs
2026 to be done after substituting bases of IVs into it. */
2027 if (may_be_nonaddressable_p (base))
2028 goto fail;
2029
2030 /* Moreover, on strict alignment platforms, check that it is
2031 sufficiently aligned. */
2032 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2033 goto fail;
2034
2035 base = build_fold_addr_expr (base);
2036
2037 /* Substituting bases of IVs into the base expression might
2038 have caused folding opportunities. */
2039 if (TREE_CODE (base) == ADDR_EXPR)
2040 {
2041 tree *ref = &TREE_OPERAND (base, 0);
2042 while (handled_component_p (*ref))
2043 ref = &TREE_OPERAND (*ref, 0);
2044 if (TREE_CODE (*ref) == MEM_REF)
2045 {
2046 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2047 TREE_OPERAND (*ref, 0),
2048 TREE_OPERAND (*ref, 1));
2049 if (tem)
2050 *ref = tem;
2051 }
2052 }
2053 }
2054
2055 civ = alloc_iv (base, step);
2056 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2057 return;
2058
2059 fail:
2060 for_each_index (op_p, idx_record_use, data);
2061 }
2062
2063 /* Finds and records invariants used in STMT. */
2064
2065 static void
2066 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2067 {
2068 ssa_op_iter iter;
2069 use_operand_p use_p;
2070 tree op;
2071
2072 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2073 {
2074 op = USE_FROM_PTR (use_p);
2075 record_invariant (data, op, false);
2076 }
2077 }
2078
2079 /* Finds interesting uses of induction variables in the statement STMT. */
2080
2081 static void
2082 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2083 {
2084 struct iv *iv;
2085 tree op, *lhs, *rhs;
2086 ssa_op_iter iter;
2087 use_operand_p use_p;
2088 enum tree_code code;
2089
2090 find_invariants_stmt (data, stmt);
2091
2092 if (gimple_code (stmt) == GIMPLE_COND)
2093 {
2094 find_interesting_uses_cond (data, stmt);
2095 return;
2096 }
2097
2098 if (is_gimple_assign (stmt))
2099 {
2100 lhs = gimple_assign_lhs_ptr (stmt);
2101 rhs = gimple_assign_rhs1_ptr (stmt);
2102
2103 if (TREE_CODE (*lhs) == SSA_NAME)
2104 {
2105 /* If the statement defines an induction variable, the uses are not
2106 interesting by themselves. */
2107
2108 iv = get_iv (data, *lhs);
2109
2110 if (iv && !integer_zerop (iv->step))
2111 return;
2112 }
2113
2114 code = gimple_assign_rhs_code (stmt);
2115 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2116 && (REFERENCE_CLASS_P (*rhs)
2117 || is_gimple_val (*rhs)))
2118 {
2119 if (REFERENCE_CLASS_P (*rhs))
2120 find_interesting_uses_address (data, stmt, rhs);
2121 else
2122 find_interesting_uses_op (data, *rhs);
2123
2124 if (REFERENCE_CLASS_P (*lhs))
2125 find_interesting_uses_address (data, stmt, lhs);
2126 return;
2127 }
2128 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2129 {
2130 find_interesting_uses_cond (data, stmt);
2131 return;
2132 }
2133
2134 /* TODO -- we should also handle address uses of type
2135
2136 memory = call (whatever);
2137
2138 and
2139
2140 call (memory). */
2141 }
2142
2143 if (gimple_code (stmt) == GIMPLE_PHI
2144 && gimple_bb (stmt) == data->current_loop->header)
2145 {
2146 iv = get_iv (data, PHI_RESULT (stmt));
2147
2148 if (iv && !integer_zerop (iv->step))
2149 return;
2150 }
2151
2152 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2153 {
2154 op = USE_FROM_PTR (use_p);
2155
2156 if (TREE_CODE (op) != SSA_NAME)
2157 continue;
2158
2159 iv = get_iv (data, op);
2160 if (!iv)
2161 continue;
2162
2163 find_interesting_uses_op (data, op);
2164 }
2165 }
2166
2167 /* Finds interesting uses of induction variables outside of loops
2168 on loop exit edge EXIT. */
2169
2170 static void
2171 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2172 {
2173 gphi *phi;
2174 gphi_iterator psi;
2175 tree def;
2176
2177 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2178 {
2179 phi = psi.phi ();
2180 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2181 if (!virtual_operand_p (def))
2182 find_interesting_uses_op (data, def);
2183 }
2184 }
2185
2186 /* Finds uses of the induction variables that are interesting. */
2187
2188 static void
2189 find_interesting_uses (struct ivopts_data *data)
2190 {
2191 basic_block bb;
2192 gimple_stmt_iterator bsi;
2193 basic_block *body = get_loop_body (data->current_loop);
2194 unsigned i;
2195 struct version_info *info;
2196 edge e;
2197
2198 if (dump_file && (dump_flags & TDF_DETAILS))
2199 fprintf (dump_file, "Uses:\n\n");
2200
2201 for (i = 0; i < data->current_loop->num_nodes; i++)
2202 {
2203 edge_iterator ei;
2204 bb = body[i];
2205
2206 FOR_EACH_EDGE (e, ei, bb->succs)
2207 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2208 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2209 find_interesting_uses_outside (data, e);
2210
2211 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2212 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2213 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2214 if (!is_gimple_debug (gsi_stmt (bsi)))
2215 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2216 }
2217
2218 if (dump_file && (dump_flags & TDF_DETAILS))
2219 {
2220 bitmap_iterator bi;
2221
2222 fprintf (dump_file, "\n");
2223
2224 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2225 {
2226 info = ver_info (data, i);
2227 if (info->inv_id)
2228 {
2229 fprintf (dump_file, " ");
2230 print_generic_expr (dump_file, info->name, TDF_SLIM);
2231 fprintf (dump_file, " is invariant (%d)%s\n",
2232 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2233 }
2234 }
2235
2236 fprintf (dump_file, "\n");
2237 }
2238
2239 free (body);
2240 }
2241
2242 /* Compute maximum offset of [base + offset] addressing mode
2243 for memory reference represented by USE. */
2244
2245 static HOST_WIDE_INT
2246 compute_max_addr_offset (struct iv_use *use)
2247 {
2248 int width;
2249 rtx reg, addr;
2250 HOST_WIDE_INT i, off;
2251 unsigned list_index, num;
2252 addr_space_t as;
2253 machine_mode mem_mode, addr_mode;
2254 static vec<HOST_WIDE_INT> max_offset_list;
2255
2256 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2257 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2258
2259 num = max_offset_list.length ();
2260 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2261 if (list_index >= num)
2262 {
2263 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2264 for (; num < max_offset_list.length (); num++)
2265 max_offset_list[num] = -1;
2266 }
2267
2268 off = max_offset_list[list_index];
2269 if (off != -1)
2270 return off;
2271
2272 addr_mode = targetm.addr_space.address_mode (as);
2273 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2274 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2275
2276 width = GET_MODE_BITSIZE (addr_mode) - 1;
2277 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2278 width = HOST_BITS_PER_WIDE_INT - 1;
2279
2280 for (i = width; i > 0; i--)
2281 {
2282 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2283 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2284 if (memory_address_addr_space_p (mem_mode, addr, as))
2285 break;
2286
2287 /* For some strict-alignment targets, the offset must be naturally
2288 aligned. Try an aligned offset if mem_mode is not QImode. */
2289 off = ((unsigned HOST_WIDE_INT) 1 << i);
2290 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2291 {
2292 off -= GET_MODE_SIZE (mem_mode);
2293 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2294 if (memory_address_addr_space_p (mem_mode, addr, as))
2295 break;
2296 }
2297 }
2298 if (i == 0)
2299 off = 0;
2300
2301 max_offset_list[list_index] = off;
2302 return off;
2303 }
2304
2305 /* Check if all small groups should be split. Return true if and
2306 only if:
2307
2308 1) At least one groups contain two uses with different offsets.
2309 2) No group contains more than two uses with different offsets.
2310
2311 Return false otherwise. We want to split such groups because:
2312
2313 1) Small groups don't have much benefit and may interfer with
2314 general candidate selection.
2315 2) Size for problem with only small groups is usually small and
2316 general algorithm can handle it well.
2317
2318 TODO -- Above claim may not hold when auto increment is supported. */
2319
2320 static bool
2321 split_all_small_groups (struct ivopts_data *data)
2322 {
2323 bool split_p = false;
2324 unsigned int i, n, distinct;
2325 struct iv_use *pre, *use;
2326
2327 n = n_iv_uses (data);
2328 for (i = 0; i < n; i++)
2329 {
2330 use = iv_use (data, i);
2331 if (!use->next)
2332 continue;
2333
2334 distinct = 1;
2335 gcc_assert (use->type == USE_ADDRESS);
2336 for (pre = use, use = use->next; use; pre = use, use = use->next)
2337 {
2338 if (pre->addr_offset != use->addr_offset)
2339 distinct++;
2340
2341 if (distinct > 2)
2342 return false;
2343 }
2344 if (distinct == 2)
2345 split_p = true;
2346 }
2347
2348 return split_p;
2349 }
2350
2351 /* For each group of address type uses, this function further groups
2352 these uses according to the maximum offset supported by target's
2353 [base + offset] addressing mode. */
2354
2355 static void
2356 group_address_uses (struct ivopts_data *data)
2357 {
2358 HOST_WIDE_INT max_offset = -1;
2359 unsigned int i, n, sub_id;
2360 struct iv_use *pre, *use;
2361 unsigned HOST_WIDE_INT addr_offset_first;
2362
2363 /* Reset max offset to split all small groups. */
2364 if (split_all_small_groups (data))
2365 max_offset = 0;
2366
2367 n = n_iv_uses (data);
2368 for (i = 0; i < n; i++)
2369 {
2370 use = iv_use (data, i);
2371 if (!use->next)
2372 continue;
2373
2374 gcc_assert (use->type == USE_ADDRESS);
2375 if (max_offset != 0)
2376 max_offset = compute_max_addr_offset (use);
2377
2378 while (use)
2379 {
2380 sub_id = 0;
2381 addr_offset_first = use->addr_offset;
2382 /* Only uses with offset that can fit in offset part against
2383 the first use can be grouped together. */
2384 for (pre = use, use = use->next;
2385 use && (use->addr_offset - addr_offset_first
2386 <= (unsigned HOST_WIDE_INT) max_offset);
2387 pre = use, use = use->next)
2388 {
2389 use->id = pre->id;
2390 use->sub_id = ++sub_id;
2391 }
2392
2393 /* Break the list and create new group. */
2394 if (use)
2395 {
2396 pre->next = NULL;
2397 use->id = n_iv_uses (data);
2398 use->related_cands = BITMAP_ALLOC (NULL);
2399 data->iv_uses.safe_push (use);
2400 }
2401 }
2402 }
2403
2404 if (dump_file && (dump_flags & TDF_DETAILS))
2405 dump_uses (dump_file, data);
2406 }
2407
2408 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2409 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2410 we are at the top-level of the processed address. */
2411
2412 static tree
2413 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2414 HOST_WIDE_INT *offset)
2415 {
2416 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2417 enum tree_code code;
2418 tree type, orig_type = TREE_TYPE (expr);
2419 HOST_WIDE_INT off0, off1, st;
2420 tree orig_expr = expr;
2421
2422 STRIP_NOPS (expr);
2423
2424 type = TREE_TYPE (expr);
2425 code = TREE_CODE (expr);
2426 *offset = 0;
2427
2428 switch (code)
2429 {
2430 case INTEGER_CST:
2431 if (!cst_and_fits_in_hwi (expr)
2432 || integer_zerop (expr))
2433 return orig_expr;
2434
2435 *offset = int_cst_value (expr);
2436 return build_int_cst (orig_type, 0);
2437
2438 case POINTER_PLUS_EXPR:
2439 case PLUS_EXPR:
2440 case MINUS_EXPR:
2441 op0 = TREE_OPERAND (expr, 0);
2442 op1 = TREE_OPERAND (expr, 1);
2443
2444 op0 = strip_offset_1 (op0, false, false, &off0);
2445 op1 = strip_offset_1 (op1, false, false, &off1);
2446
2447 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2448 if (op0 == TREE_OPERAND (expr, 0)
2449 && op1 == TREE_OPERAND (expr, 1))
2450 return orig_expr;
2451
2452 if (integer_zerop (op1))
2453 expr = op0;
2454 else if (integer_zerop (op0))
2455 {
2456 if (code == MINUS_EXPR)
2457 expr = fold_build1 (NEGATE_EXPR, type, op1);
2458 else
2459 expr = op1;
2460 }
2461 else
2462 expr = fold_build2 (code, type, op0, op1);
2463
2464 return fold_convert (orig_type, expr);
2465
2466 case MULT_EXPR:
2467 op1 = TREE_OPERAND (expr, 1);
2468 if (!cst_and_fits_in_hwi (op1))
2469 return orig_expr;
2470
2471 op0 = TREE_OPERAND (expr, 0);
2472 op0 = strip_offset_1 (op0, false, false, &off0);
2473 if (op0 == TREE_OPERAND (expr, 0))
2474 return orig_expr;
2475
2476 *offset = off0 * int_cst_value (op1);
2477 if (integer_zerop (op0))
2478 expr = op0;
2479 else
2480 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2481
2482 return fold_convert (orig_type, expr);
2483
2484 case ARRAY_REF:
2485 case ARRAY_RANGE_REF:
2486 if (!inside_addr)
2487 return orig_expr;
2488
2489 step = array_ref_element_size (expr);
2490 if (!cst_and_fits_in_hwi (step))
2491 break;
2492
2493 st = int_cst_value (step);
2494 op1 = TREE_OPERAND (expr, 1);
2495 op1 = strip_offset_1 (op1, false, false, &off1);
2496 *offset = off1 * st;
2497
2498 if (top_compref
2499 && integer_zerop (op1))
2500 {
2501 /* Strip the component reference completely. */
2502 op0 = TREE_OPERAND (expr, 0);
2503 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2504 *offset += off0;
2505 return op0;
2506 }
2507 break;
2508
2509 case COMPONENT_REF:
2510 {
2511 tree field;
2512
2513 if (!inside_addr)
2514 return orig_expr;
2515
2516 tmp = component_ref_field_offset (expr);
2517 field = TREE_OPERAND (expr, 1);
2518 if (top_compref
2519 && cst_and_fits_in_hwi (tmp)
2520 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2521 {
2522 HOST_WIDE_INT boffset, abs_off;
2523
2524 /* Strip the component reference completely. */
2525 op0 = TREE_OPERAND (expr, 0);
2526 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2527 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2528 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2529 if (boffset < 0)
2530 abs_off = -abs_off;
2531
2532 *offset = off0 + int_cst_value (tmp) + abs_off;
2533 return op0;
2534 }
2535 }
2536 break;
2537
2538 case ADDR_EXPR:
2539 op0 = TREE_OPERAND (expr, 0);
2540 op0 = strip_offset_1 (op0, true, true, &off0);
2541 *offset += off0;
2542
2543 if (op0 == TREE_OPERAND (expr, 0))
2544 return orig_expr;
2545
2546 expr = build_fold_addr_expr (op0);
2547 return fold_convert (orig_type, expr);
2548
2549 case MEM_REF:
2550 /* ??? Offset operand? */
2551 inside_addr = false;
2552 break;
2553
2554 default:
2555 return orig_expr;
2556 }
2557
2558 /* Default handling of expressions for that we want to recurse into
2559 the first operand. */
2560 op0 = TREE_OPERAND (expr, 0);
2561 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2562 *offset += off0;
2563
2564 if (op0 == TREE_OPERAND (expr, 0)
2565 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2566 return orig_expr;
2567
2568 expr = copy_node (expr);
2569 TREE_OPERAND (expr, 0) = op0;
2570 if (op1)
2571 TREE_OPERAND (expr, 1) = op1;
2572
2573 /* Inside address, we might strip the top level component references,
2574 thus changing type of the expression. Handling of ADDR_EXPR
2575 will fix that. */
2576 expr = fold_convert (orig_type, expr);
2577
2578 return expr;
2579 }
2580
2581 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2582
2583 static tree
2584 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2585 {
2586 HOST_WIDE_INT off;
2587 tree core = strip_offset_1 (expr, false, false, &off);
2588 *offset = off;
2589 return core;
2590 }
2591
2592 /* Returns variant of TYPE that can be used as base for different uses.
2593 We return unsigned type with the same precision, which avoids problems
2594 with overflows. */
2595
2596 static tree
2597 generic_type_for (tree type)
2598 {
2599 if (POINTER_TYPE_P (type))
2600 return unsigned_type_for (type);
2601
2602 if (TYPE_UNSIGNED (type))
2603 return type;
2604
2605 return unsigned_type_for (type);
2606 }
2607
2608 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2609 the bitmap to that we should store it. */
2610
2611 static struct ivopts_data *fd_ivopts_data;
2612 static tree
2613 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2614 {
2615 bitmap *depends_on = (bitmap *) data;
2616 struct version_info *info;
2617
2618 if (TREE_CODE (*expr_p) != SSA_NAME)
2619 return NULL_TREE;
2620 info = name_info (fd_ivopts_data, *expr_p);
2621
2622 if (!info->inv_id || info->has_nonlin_use)
2623 return NULL_TREE;
2624
2625 if (!*depends_on)
2626 *depends_on = BITMAP_ALLOC (NULL);
2627 bitmap_set_bit (*depends_on, info->inv_id);
2628
2629 return NULL_TREE;
2630 }
2631
2632 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2633 position to POS. If USE is not NULL, the candidate is set as related to
2634 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2635 replacement of the final value of the iv by a direct computation. */
2636
2637 static struct iv_cand *
2638 add_candidate_1 (struct ivopts_data *data,
2639 tree base, tree step, bool important, enum iv_position pos,
2640 struct iv_use *use, gimple incremented_at)
2641 {
2642 unsigned i;
2643 struct iv_cand *cand = NULL;
2644 tree type, orig_type;
2645
2646 /* For non-original variables, make sure their values are computed in a type
2647 that does not invoke undefined behavior on overflows (since in general,
2648 we cannot prove that these induction variables are non-wrapping). */
2649 if (pos != IP_ORIGINAL)
2650 {
2651 orig_type = TREE_TYPE (base);
2652 type = generic_type_for (orig_type);
2653 if (type != orig_type)
2654 {
2655 base = fold_convert (type, base);
2656 step = fold_convert (type, step);
2657 }
2658 }
2659
2660 for (i = 0; i < n_iv_cands (data); i++)
2661 {
2662 cand = iv_cand (data, i);
2663
2664 if (cand->pos != pos)
2665 continue;
2666
2667 if (cand->incremented_at != incremented_at
2668 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2669 && cand->ainc_use != use))
2670 continue;
2671
2672 if (!cand->iv)
2673 {
2674 if (!base && !step)
2675 break;
2676
2677 continue;
2678 }
2679
2680 if (!base && !step)
2681 continue;
2682
2683 if (operand_equal_p (base, cand->iv->base, 0)
2684 && operand_equal_p (step, cand->iv->step, 0)
2685 && (TYPE_PRECISION (TREE_TYPE (base))
2686 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2687 break;
2688 }
2689
2690 if (i == n_iv_cands (data))
2691 {
2692 cand = XCNEW (struct iv_cand);
2693 cand->id = i;
2694
2695 if (!base && !step)
2696 cand->iv = NULL;
2697 else
2698 cand->iv = alloc_iv (base, step);
2699
2700 cand->pos = pos;
2701 if (pos != IP_ORIGINAL && cand->iv)
2702 {
2703 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2704 cand->var_after = cand->var_before;
2705 }
2706 cand->important = important;
2707 cand->incremented_at = incremented_at;
2708 data->iv_candidates.safe_push (cand);
2709
2710 if (step
2711 && TREE_CODE (step) != INTEGER_CST)
2712 {
2713 fd_ivopts_data = data;
2714 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2715 }
2716
2717 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2718 cand->ainc_use = use;
2719 else
2720 cand->ainc_use = NULL;
2721
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 dump_cand (dump_file, cand);
2724 }
2725
2726 if (important && !cand->important)
2727 {
2728 cand->important = true;
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2731 }
2732
2733 if (use)
2734 {
2735 bitmap_set_bit (use->related_cands, i);
2736 if (dump_file && (dump_flags & TDF_DETAILS))
2737 fprintf (dump_file, "Candidate %d is related to use %d\n",
2738 cand->id, use->id);
2739 }
2740
2741 return cand;
2742 }
2743
2744 /* Returns true if incrementing the induction variable at the end of the LOOP
2745 is allowed.
2746
2747 The purpose is to avoid splitting latch edge with a biv increment, thus
2748 creating a jump, possibly confusing other optimization passes and leaving
2749 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2750 is not available (so we do not have a better alternative), or if the latch
2751 edge is already nonempty. */
2752
2753 static bool
2754 allow_ip_end_pos_p (struct loop *loop)
2755 {
2756 if (!ip_normal_pos (loop))
2757 return true;
2758
2759 if (!empty_block_p (ip_end_pos (loop)))
2760 return true;
2761
2762 return false;
2763 }
2764
2765 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2766 Important field is set to IMPORTANT. */
2767
2768 static void
2769 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2770 bool important, struct iv_use *use)
2771 {
2772 basic_block use_bb = gimple_bb (use->stmt);
2773 machine_mode mem_mode;
2774 unsigned HOST_WIDE_INT cstepi;
2775
2776 /* If we insert the increment in any position other than the standard
2777 ones, we must ensure that it is incremented once per iteration.
2778 It must not be in an inner nested loop, or one side of an if
2779 statement. */
2780 if (use_bb->loop_father != data->current_loop
2781 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2782 || stmt_could_throw_p (use->stmt)
2783 || !cst_and_fits_in_hwi (step))
2784 return;
2785
2786 cstepi = int_cst_value (step);
2787
2788 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2789 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2790 || USE_STORE_PRE_INCREMENT (mem_mode))
2791 && GET_MODE_SIZE (mem_mode) == cstepi)
2792 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2793 || USE_STORE_PRE_DECREMENT (mem_mode))
2794 && GET_MODE_SIZE (mem_mode) == -cstepi))
2795 {
2796 enum tree_code code = MINUS_EXPR;
2797 tree new_base;
2798 tree new_step = step;
2799
2800 if (POINTER_TYPE_P (TREE_TYPE (base)))
2801 {
2802 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2803 code = POINTER_PLUS_EXPR;
2804 }
2805 else
2806 new_step = fold_convert (TREE_TYPE (base), new_step);
2807 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2808 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2809 use->stmt);
2810 }
2811 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2812 || USE_STORE_POST_INCREMENT (mem_mode))
2813 && GET_MODE_SIZE (mem_mode) == cstepi)
2814 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2815 || USE_STORE_POST_DECREMENT (mem_mode))
2816 && GET_MODE_SIZE (mem_mode) == -cstepi))
2817 {
2818 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2819 use->stmt);
2820 }
2821 }
2822
2823 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2824 position to POS. If USE is not NULL, the candidate is set as related to
2825 it. The candidate computation is scheduled on all available positions. */
2826
2827 static void
2828 add_candidate (struct ivopts_data *data,
2829 tree base, tree step, bool important, struct iv_use *use)
2830 {
2831 gcc_assert (use == NULL || use->sub_id == 0);
2832
2833 if (ip_normal_pos (data->current_loop))
2834 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2835 if (ip_end_pos (data->current_loop)
2836 && allow_ip_end_pos_p (data->current_loop))
2837 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2838
2839 if (use != NULL && use->type == USE_ADDRESS)
2840 add_autoinc_candidates (data, base, step, important, use);
2841 }
2842
2843 /* Adds standard iv candidates. */
2844
2845 static void
2846 add_standard_iv_candidates (struct ivopts_data *data)
2847 {
2848 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2849
2850 /* The same for a double-integer type if it is still fast enough. */
2851 if (TYPE_PRECISION
2852 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2853 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2854 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2855 build_int_cst (long_integer_type_node, 1), true, NULL);
2856
2857 /* The same for a double-integer type if it is still fast enough. */
2858 if (TYPE_PRECISION
2859 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2860 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2861 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2862 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2863 }
2864
2865
2866 /* Adds candidates bases on the old induction variable IV. */
2867
2868 static void
2869 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2870 {
2871 gimple phi;
2872 tree def;
2873 struct iv_cand *cand;
2874
2875 add_candidate (data, iv->base, iv->step, true, NULL);
2876
2877 /* The same, but with initial value zero. */
2878 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2879 add_candidate (data, size_int (0), iv->step, true, NULL);
2880 else
2881 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2882 iv->step, true, NULL);
2883
2884 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2885 if (gimple_code (phi) == GIMPLE_PHI)
2886 {
2887 /* Additionally record the possibility of leaving the original iv
2888 untouched. */
2889 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2890 /* Don't add candidate if it's from another PHI node because
2891 it's an affine iv appearing in the form of PEELED_CHREC. */
2892 phi = SSA_NAME_DEF_STMT (def);
2893 if (gimple_code (phi) != GIMPLE_PHI)
2894 {
2895 cand = add_candidate_1 (data,
2896 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2897 SSA_NAME_DEF_STMT (def));
2898 cand->var_before = iv->ssa_name;
2899 cand->var_after = def;
2900 }
2901 else
2902 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2903 }
2904 }
2905
2906 /* Adds candidates based on the old induction variables. */
2907
2908 static void
2909 add_old_ivs_candidates (struct ivopts_data *data)
2910 {
2911 unsigned i;
2912 struct iv *iv;
2913 bitmap_iterator bi;
2914
2915 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2916 {
2917 iv = ver_info (data, i)->iv;
2918 if (iv && iv->biv_p && !integer_zerop (iv->step))
2919 add_old_iv_candidates (data, iv);
2920 }
2921 }
2922
2923 /* Adds candidates based on the value of the induction variable IV and USE. */
2924
2925 static void
2926 add_iv_value_candidates (struct ivopts_data *data,
2927 struct iv *iv, struct iv_use *use)
2928 {
2929 unsigned HOST_WIDE_INT offset;
2930 tree base;
2931 tree basetype;
2932
2933 add_candidate (data, iv->base, iv->step, false, use);
2934
2935 /* The same, but with initial value zero. Make such variable important,
2936 since it is generic enough so that possibly many uses may be based
2937 on it. */
2938 basetype = TREE_TYPE (iv->base);
2939 if (POINTER_TYPE_P (basetype))
2940 basetype = sizetype;
2941 add_candidate (data, build_int_cst (basetype, 0),
2942 iv->step, true, use);
2943
2944 /* Third, try removing the constant offset. Make sure to even
2945 add a candidate for &a[0] vs. (T *)&a. */
2946 base = strip_offset (iv->base, &offset);
2947 if (offset
2948 || base != iv->base)
2949 add_candidate (data, base, iv->step, false, use);
2950 }
2951
2952 /* Adds candidates based on the uses. */
2953
2954 static void
2955 add_derived_ivs_candidates (struct ivopts_data *data)
2956 {
2957 unsigned i;
2958
2959 for (i = 0; i < n_iv_uses (data); i++)
2960 {
2961 struct iv_use *use = iv_use (data, i);
2962
2963 if (!use)
2964 continue;
2965
2966 switch (use->type)
2967 {
2968 case USE_NONLINEAR_EXPR:
2969 case USE_COMPARE:
2970 case USE_ADDRESS:
2971 /* Just add the ivs based on the value of the iv used here. */
2972 add_iv_value_candidates (data, use->iv, use);
2973 break;
2974
2975 default:
2976 gcc_unreachable ();
2977 }
2978 }
2979 }
2980
2981 /* Record important candidates and add them to related_cands bitmaps
2982 if needed. */
2983
2984 static void
2985 record_important_candidates (struct ivopts_data *data)
2986 {
2987 unsigned i;
2988 struct iv_use *use;
2989
2990 for (i = 0; i < n_iv_cands (data); i++)
2991 {
2992 struct iv_cand *cand = iv_cand (data, i);
2993
2994 if (cand->important)
2995 bitmap_set_bit (data->important_candidates, i);
2996 }
2997
2998 data->consider_all_candidates = (n_iv_cands (data)
2999 <= CONSIDER_ALL_CANDIDATES_BOUND);
3000
3001 if (data->consider_all_candidates)
3002 {
3003 /* We will not need "related_cands" bitmaps in this case,
3004 so release them to decrease peak memory consumption. */
3005 for (i = 0; i < n_iv_uses (data); i++)
3006 {
3007 use = iv_use (data, i);
3008 BITMAP_FREE (use->related_cands);
3009 }
3010 }
3011 else
3012 {
3013 /* Add important candidates to the related_cands bitmaps. */
3014 for (i = 0; i < n_iv_uses (data); i++)
3015 bitmap_ior_into (iv_use (data, i)->related_cands,
3016 data->important_candidates);
3017 }
3018 }
3019
3020 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3021 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3022 we allocate a simple list to every use. */
3023
3024 static void
3025 alloc_use_cost_map (struct ivopts_data *data)
3026 {
3027 unsigned i, size, s;
3028
3029 for (i = 0; i < n_iv_uses (data); i++)
3030 {
3031 struct iv_use *use = iv_use (data, i);
3032
3033 if (data->consider_all_candidates)
3034 size = n_iv_cands (data);
3035 else
3036 {
3037 s = bitmap_count_bits (use->related_cands);
3038
3039 /* Round up to the power of two, so that moduling by it is fast. */
3040 size = s ? (1 << ceil_log2 (s)) : 1;
3041 }
3042
3043 use->n_map_members = size;
3044 use->cost_map = XCNEWVEC (struct cost_pair, size);
3045 }
3046 }
3047
3048 /* Returns description of computation cost of expression whose runtime
3049 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3050
3051 static comp_cost
3052 new_cost (unsigned runtime, unsigned complexity)
3053 {
3054 comp_cost cost;
3055
3056 cost.cost = runtime;
3057 cost.complexity = complexity;
3058
3059 return cost;
3060 }
3061
3062 /* Returns true if COST is infinite. */
3063
3064 static bool
3065 infinite_cost_p (comp_cost cost)
3066 {
3067 return cost.cost == INFTY;
3068 }
3069
3070 /* Adds costs COST1 and COST2. */
3071
3072 static comp_cost
3073 add_costs (comp_cost cost1, comp_cost cost2)
3074 {
3075 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3076 return infinite_cost;
3077
3078 cost1.cost += cost2.cost;
3079 cost1.complexity += cost2.complexity;
3080
3081 return cost1;
3082 }
3083 /* Subtracts costs COST1 and COST2. */
3084
3085 static comp_cost
3086 sub_costs (comp_cost cost1, comp_cost cost2)
3087 {
3088 cost1.cost -= cost2.cost;
3089 cost1.complexity -= cost2.complexity;
3090
3091 return cost1;
3092 }
3093
3094 /* Returns a negative number if COST1 < COST2, a positive number if
3095 COST1 > COST2, and 0 if COST1 = COST2. */
3096
3097 static int
3098 compare_costs (comp_cost cost1, comp_cost cost2)
3099 {
3100 if (cost1.cost == cost2.cost)
3101 return cost1.complexity - cost2.complexity;
3102
3103 return cost1.cost - cost2.cost;
3104 }
3105
3106 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3107 on invariants DEPENDS_ON and that the value used in expressing it
3108 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3109
3110 static void
3111 set_use_iv_cost (struct ivopts_data *data,
3112 struct iv_use *use, struct iv_cand *cand,
3113 comp_cost cost, bitmap depends_on, tree value,
3114 enum tree_code comp, int inv_expr_id)
3115 {
3116 unsigned i, s;
3117
3118 if (infinite_cost_p (cost))
3119 {
3120 BITMAP_FREE (depends_on);
3121 return;
3122 }
3123
3124 if (data->consider_all_candidates)
3125 {
3126 use->cost_map[cand->id].cand = cand;
3127 use->cost_map[cand->id].cost = cost;
3128 use->cost_map[cand->id].depends_on = depends_on;
3129 use->cost_map[cand->id].value = value;
3130 use->cost_map[cand->id].comp = comp;
3131 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3132 return;
3133 }
3134
3135 /* n_map_members is a power of two, so this computes modulo. */
3136 s = cand->id & (use->n_map_members - 1);
3137 for (i = s; i < use->n_map_members; i++)
3138 if (!use->cost_map[i].cand)
3139 goto found;
3140 for (i = 0; i < s; i++)
3141 if (!use->cost_map[i].cand)
3142 goto found;
3143
3144 gcc_unreachable ();
3145
3146 found:
3147 use->cost_map[i].cand = cand;
3148 use->cost_map[i].cost = cost;
3149 use->cost_map[i].depends_on = depends_on;
3150 use->cost_map[i].value = value;
3151 use->cost_map[i].comp = comp;
3152 use->cost_map[i].inv_expr_id = inv_expr_id;
3153 }
3154
3155 /* Gets cost of (USE, CANDIDATE) pair. */
3156
3157 static struct cost_pair *
3158 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3159 struct iv_cand *cand)
3160 {
3161 unsigned i, s;
3162 struct cost_pair *ret;
3163
3164 if (!cand)
3165 return NULL;
3166
3167 if (data->consider_all_candidates)
3168 {
3169 ret = use->cost_map + cand->id;
3170 if (!ret->cand)
3171 return NULL;
3172
3173 return ret;
3174 }
3175
3176 /* n_map_members is a power of two, so this computes modulo. */
3177 s = cand->id & (use->n_map_members - 1);
3178 for (i = s; i < use->n_map_members; i++)
3179 if (use->cost_map[i].cand == cand)
3180 return use->cost_map + i;
3181 else if (use->cost_map[i].cand == NULL)
3182 return NULL;
3183 for (i = 0; i < s; i++)
3184 if (use->cost_map[i].cand == cand)
3185 return use->cost_map + i;
3186 else if (use->cost_map[i].cand == NULL)
3187 return NULL;
3188
3189 return NULL;
3190 }
3191
3192 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3193 static rtx
3194 produce_memory_decl_rtl (tree obj, int *regno)
3195 {
3196 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3197 machine_mode address_mode = targetm.addr_space.address_mode (as);
3198 rtx x;
3199
3200 gcc_assert (obj);
3201 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3202 {
3203 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3204 x = gen_rtx_SYMBOL_REF (address_mode, name);
3205 SET_SYMBOL_REF_DECL (x, obj);
3206 x = gen_rtx_MEM (DECL_MODE (obj), x);
3207 set_mem_addr_space (x, as);
3208 targetm.encode_section_info (obj, x, true);
3209 }
3210 else
3211 {
3212 x = gen_raw_REG (address_mode, (*regno)++);
3213 x = gen_rtx_MEM (DECL_MODE (obj), x);
3214 set_mem_addr_space (x, as);
3215 }
3216
3217 return x;
3218 }
3219
3220 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3221 walk_tree. DATA contains the actual fake register number. */
3222
3223 static tree
3224 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3225 {
3226 tree obj = NULL_TREE;
3227 rtx x = NULL_RTX;
3228 int *regno = (int *) data;
3229
3230 switch (TREE_CODE (*expr_p))
3231 {
3232 case ADDR_EXPR:
3233 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3234 handled_component_p (*expr_p);
3235 expr_p = &TREE_OPERAND (*expr_p, 0))
3236 continue;
3237 obj = *expr_p;
3238 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3239 x = produce_memory_decl_rtl (obj, regno);
3240 break;
3241
3242 case SSA_NAME:
3243 *ws = 0;
3244 obj = SSA_NAME_VAR (*expr_p);
3245 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3246 if (!obj)
3247 return NULL_TREE;
3248 if (!DECL_RTL_SET_P (obj))
3249 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3250 break;
3251
3252 case VAR_DECL:
3253 case PARM_DECL:
3254 case RESULT_DECL:
3255 *ws = 0;
3256 obj = *expr_p;
3257
3258 if (DECL_RTL_SET_P (obj))
3259 break;
3260
3261 if (DECL_MODE (obj) == BLKmode)
3262 x = produce_memory_decl_rtl (obj, regno);
3263 else
3264 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3265
3266 break;
3267
3268 default:
3269 break;
3270 }
3271
3272 if (x)
3273 {
3274 decl_rtl_to_reset.safe_push (obj);
3275 SET_DECL_RTL (obj, x);
3276 }
3277
3278 return NULL_TREE;
3279 }
3280
3281 /* Determines cost of the computation of EXPR. */
3282
3283 static unsigned
3284 computation_cost (tree expr, bool speed)
3285 {
3286 rtx_insn *seq;
3287 rtx rslt;
3288 tree type = TREE_TYPE (expr);
3289 unsigned cost;
3290 /* Avoid using hard regs in ways which may be unsupported. */
3291 int regno = LAST_VIRTUAL_REGISTER + 1;
3292 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3293 enum node_frequency real_frequency = node->frequency;
3294
3295 node->frequency = NODE_FREQUENCY_NORMAL;
3296 crtl->maybe_hot_insn_p = speed;
3297 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3298 start_sequence ();
3299 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3300 seq = get_insns ();
3301 end_sequence ();
3302 default_rtl_profile ();
3303 node->frequency = real_frequency;
3304
3305 cost = seq_cost (seq, speed);
3306 if (MEM_P (rslt))
3307 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3308 TYPE_ADDR_SPACE (type), speed);
3309 else if (!REG_P (rslt))
3310 cost += set_src_cost (rslt, speed);
3311
3312 return cost;
3313 }
3314
3315 /* Returns variable containing the value of candidate CAND at statement AT. */
3316
3317 static tree
3318 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3319 {
3320 if (stmt_after_increment (loop, cand, stmt))
3321 return cand->var_after;
3322 else
3323 return cand->var_before;
3324 }
3325
3326 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3327 same precision that is at least as wide as the precision of TYPE, stores
3328 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3329 type of A and B. */
3330
3331 static tree
3332 determine_common_wider_type (tree *a, tree *b)
3333 {
3334 tree wider_type = NULL;
3335 tree suba, subb;
3336 tree atype = TREE_TYPE (*a);
3337
3338 if (CONVERT_EXPR_P (*a))
3339 {
3340 suba = TREE_OPERAND (*a, 0);
3341 wider_type = TREE_TYPE (suba);
3342 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3343 return atype;
3344 }
3345 else
3346 return atype;
3347
3348 if (CONVERT_EXPR_P (*b))
3349 {
3350 subb = TREE_OPERAND (*b, 0);
3351 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3352 return atype;
3353 }
3354 else
3355 return atype;
3356
3357 *a = suba;
3358 *b = subb;
3359 return wider_type;
3360 }
3361
3362 /* Determines the expression by that USE is expressed from induction variable
3363 CAND at statement AT in LOOP. The expression is stored in a decomposed
3364 form into AFF. Returns false if USE cannot be expressed using CAND. */
3365
3366 static bool
3367 get_computation_aff (struct loop *loop,
3368 struct iv_use *use, struct iv_cand *cand, gimple at,
3369 struct aff_tree *aff)
3370 {
3371 tree ubase = use->iv->base;
3372 tree ustep = use->iv->step;
3373 tree cbase = cand->iv->base;
3374 tree cstep = cand->iv->step, cstep_common;
3375 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3376 tree common_type, var;
3377 tree uutype;
3378 aff_tree cbase_aff, var_aff;
3379 widest_int rat;
3380
3381 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3382 {
3383 /* We do not have a precision to express the values of use. */
3384 return false;
3385 }
3386
3387 var = var_at_stmt (loop, cand, at);
3388 uutype = unsigned_type_for (utype);
3389
3390 /* If the conversion is not noop, perform it. */
3391 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3392 {
3393 cstep = fold_convert (uutype, cstep);
3394 cbase = fold_convert (uutype, cbase);
3395 var = fold_convert (uutype, var);
3396 }
3397
3398 if (!constant_multiple_of (ustep, cstep, &rat))
3399 return false;
3400
3401 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3402 type, we achieve better folding by computing their difference in this
3403 wider type, and cast the result to UUTYPE. We do not need to worry about
3404 overflows, as all the arithmetics will in the end be performed in UUTYPE
3405 anyway. */
3406 common_type = determine_common_wider_type (&ubase, &cbase);
3407
3408 /* use = ubase - ratio * cbase + ratio * var. */
3409 tree_to_aff_combination (ubase, common_type, aff);
3410 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3411 tree_to_aff_combination (var, uutype, &var_aff);
3412
3413 /* We need to shift the value if we are after the increment. */
3414 if (stmt_after_increment (loop, cand, at))
3415 {
3416 aff_tree cstep_aff;
3417
3418 if (common_type != uutype)
3419 cstep_common = fold_convert (common_type, cstep);
3420 else
3421 cstep_common = cstep;
3422
3423 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3424 aff_combination_add (&cbase_aff, &cstep_aff);
3425 }
3426
3427 aff_combination_scale (&cbase_aff, -rat);
3428 aff_combination_add (aff, &cbase_aff);
3429 if (common_type != uutype)
3430 aff_combination_convert (aff, uutype);
3431
3432 aff_combination_scale (&var_aff, rat);
3433 aff_combination_add (aff, &var_aff);
3434
3435 return true;
3436 }
3437
3438 /* Return the type of USE. */
3439
3440 static tree
3441 get_use_type (struct iv_use *use)
3442 {
3443 tree base_type = TREE_TYPE (use->iv->base);
3444 tree type;
3445
3446 if (use->type == USE_ADDRESS)
3447 {
3448 /* The base_type may be a void pointer. Create a pointer type based on
3449 the mem_ref instead. */
3450 type = build_pointer_type (TREE_TYPE (*use->op_p));
3451 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3452 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3453 }
3454 else
3455 type = base_type;
3456
3457 return type;
3458 }
3459
3460 /* Determines the expression by that USE is expressed from induction variable
3461 CAND at statement AT in LOOP. The computation is unshared. */
3462
3463 static tree
3464 get_computation_at (struct loop *loop,
3465 struct iv_use *use, struct iv_cand *cand, gimple at)
3466 {
3467 aff_tree aff;
3468 tree type = get_use_type (use);
3469
3470 if (!get_computation_aff (loop, use, cand, at, &aff))
3471 return NULL_TREE;
3472 unshare_aff_combination (&aff);
3473 return fold_convert (type, aff_combination_to_tree (&aff));
3474 }
3475
3476 /* Determines the expression by that USE is expressed from induction variable
3477 CAND in LOOP. The computation is unshared. */
3478
3479 static tree
3480 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3481 {
3482 return get_computation_at (loop, use, cand, use->stmt);
3483 }
3484
3485 /* Adjust the cost COST for being in loop setup rather than loop body.
3486 If we're optimizing for space, the loop setup overhead is constant;
3487 if we're optimizing for speed, amortize it over the per-iteration cost. */
3488 static unsigned
3489 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3490 {
3491 if (cost == INFTY)
3492 return cost;
3493 else if (optimize_loop_for_speed_p (data->current_loop))
3494 return cost / avg_loop_niter (data->current_loop);
3495 else
3496 return cost;
3497 }
3498
3499 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3500 validity for a memory reference accessing memory of mode MODE in
3501 address space AS. */
3502
3503
3504 bool
3505 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3506 addr_space_t as)
3507 {
3508 #define MAX_RATIO 128
3509 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3510 static vec<sbitmap> valid_mult_list;
3511 sbitmap valid_mult;
3512
3513 if (data_index >= valid_mult_list.length ())
3514 valid_mult_list.safe_grow_cleared (data_index + 1);
3515
3516 valid_mult = valid_mult_list[data_index];
3517 if (!valid_mult)
3518 {
3519 machine_mode address_mode = targetm.addr_space.address_mode (as);
3520 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3521 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3522 rtx addr, scaled;
3523 HOST_WIDE_INT i;
3524
3525 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3526 bitmap_clear (valid_mult);
3527 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3528 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3529 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3530 {
3531 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3532 if (memory_address_addr_space_p (mode, addr, as)
3533 || memory_address_addr_space_p (mode, scaled, as))
3534 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3535 }
3536
3537 if (dump_file && (dump_flags & TDF_DETAILS))
3538 {
3539 fprintf (dump_file, " allowed multipliers:");
3540 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3541 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3542 fprintf (dump_file, " %d", (int) i);
3543 fprintf (dump_file, "\n");
3544 fprintf (dump_file, "\n");
3545 }
3546
3547 valid_mult_list[data_index] = valid_mult;
3548 }
3549
3550 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3551 return false;
3552
3553 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3554 }
3555
3556 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3557 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3558 variable is omitted. Compute the cost for a memory reference that accesses
3559 a memory location of mode MEM_MODE in address space AS.
3560
3561 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3562 size of MEM_MODE / RATIO) is available. To make this determination, we
3563 look at the size of the increment to be made, which is given in CSTEP.
3564 CSTEP may be zero if the step is unknown.
3565 STMT_AFTER_INC is true iff the statement we're looking at is after the
3566 increment of the original biv.
3567
3568 TODO -- there must be some better way. This all is quite crude. */
3569
3570 enum ainc_type
3571 {
3572 AINC_PRE_INC, /* Pre increment. */
3573 AINC_PRE_DEC, /* Pre decrement. */
3574 AINC_POST_INC, /* Post increment. */
3575 AINC_POST_DEC, /* Post decrement. */
3576 AINC_NONE /* Also the number of auto increment types. */
3577 };
3578
3579 typedef struct address_cost_data_s
3580 {
3581 HOST_WIDE_INT min_offset, max_offset;
3582 unsigned costs[2][2][2][2];
3583 unsigned ainc_costs[AINC_NONE];
3584 } *address_cost_data;
3585
3586
3587 static comp_cost
3588 get_address_cost (bool symbol_present, bool var_present,
3589 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3590 HOST_WIDE_INT cstep, machine_mode mem_mode,
3591 addr_space_t as, bool speed,
3592 bool stmt_after_inc, bool *may_autoinc)
3593 {
3594 machine_mode address_mode = targetm.addr_space.address_mode (as);
3595 static vec<address_cost_data> address_cost_data_list;
3596 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3597 address_cost_data data;
3598 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3599 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3600 unsigned cost, acost, complexity;
3601 enum ainc_type autoinc_type;
3602 bool offset_p, ratio_p, autoinc;
3603 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3604 unsigned HOST_WIDE_INT mask;
3605 unsigned bits;
3606
3607 if (data_index >= address_cost_data_list.length ())
3608 address_cost_data_list.safe_grow_cleared (data_index + 1);
3609
3610 data = address_cost_data_list[data_index];
3611 if (!data)
3612 {
3613 HOST_WIDE_INT i;
3614 HOST_WIDE_INT rat, off = 0;
3615 int old_cse_not_expected, width;
3616 unsigned sym_p, var_p, off_p, rat_p, add_c;
3617 rtx_insn *seq;
3618 rtx addr, base;
3619 rtx reg0, reg1;
3620
3621 data = (address_cost_data) xcalloc (1, sizeof (*data));
3622
3623 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3624
3625 width = GET_MODE_BITSIZE (address_mode) - 1;
3626 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3627 width = HOST_BITS_PER_WIDE_INT - 1;
3628 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3629
3630 for (i = width; i >= 0; i--)
3631 {
3632 off = -((unsigned HOST_WIDE_INT) 1 << i);
3633 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3634 if (memory_address_addr_space_p (mem_mode, addr, as))
3635 break;
3636 }
3637 data->min_offset = (i == -1? 0 : off);
3638
3639 for (i = width; i >= 0; i--)
3640 {
3641 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3642 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3643 if (memory_address_addr_space_p (mem_mode, addr, as))
3644 break;
3645 /* For some strict-alignment targets, the offset must be naturally
3646 aligned. Try an aligned offset if mem_mode is not QImode. */
3647 off = mem_mode != QImode
3648 ? ((unsigned HOST_WIDE_INT) 1 << i)
3649 - GET_MODE_SIZE (mem_mode)
3650 : 0;
3651 if (off > 0)
3652 {
3653 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3654 if (memory_address_addr_space_p (mem_mode, addr, as))
3655 break;
3656 }
3657 }
3658 if (i == -1)
3659 off = 0;
3660 data->max_offset = off;
3661
3662 if (dump_file && (dump_flags & TDF_DETAILS))
3663 {
3664 fprintf (dump_file, "get_address_cost:\n");
3665 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3666 GET_MODE_NAME (mem_mode),
3667 data->min_offset);
3668 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3669 GET_MODE_NAME (mem_mode),
3670 data->max_offset);
3671 }
3672
3673 rat = 1;
3674 for (i = 2; i <= MAX_RATIO; i++)
3675 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3676 {
3677 rat = i;
3678 break;
3679 }
3680
3681 /* Compute the cost of various addressing modes. */
3682 acost = 0;
3683 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3684 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3685
3686 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3687 || USE_STORE_PRE_DECREMENT (mem_mode))
3688 {
3689 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3690 has_predec[mem_mode]
3691 = memory_address_addr_space_p (mem_mode, addr, as);
3692
3693 if (has_predec[mem_mode])
3694 data->ainc_costs[AINC_PRE_DEC]
3695 = address_cost (addr, mem_mode, as, speed);
3696 }
3697 if (USE_LOAD_POST_DECREMENT (mem_mode)
3698 || USE_STORE_POST_DECREMENT (mem_mode))
3699 {
3700 addr = gen_rtx_POST_DEC (address_mode, reg0);
3701 has_postdec[mem_mode]
3702 = memory_address_addr_space_p (mem_mode, addr, as);
3703
3704 if (has_postdec[mem_mode])
3705 data->ainc_costs[AINC_POST_DEC]
3706 = address_cost (addr, mem_mode, as, speed);
3707 }
3708 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3709 || USE_STORE_PRE_DECREMENT (mem_mode))
3710 {
3711 addr = gen_rtx_PRE_INC (address_mode, reg0);
3712 has_preinc[mem_mode]
3713 = memory_address_addr_space_p (mem_mode, addr, as);
3714
3715 if (has_preinc[mem_mode])
3716 data->ainc_costs[AINC_PRE_INC]
3717 = address_cost (addr, mem_mode, as, speed);
3718 }
3719 if (USE_LOAD_POST_INCREMENT (mem_mode)
3720 || USE_STORE_POST_INCREMENT (mem_mode))
3721 {
3722 addr = gen_rtx_POST_INC (address_mode, reg0);
3723 has_postinc[mem_mode]
3724 = memory_address_addr_space_p (mem_mode, addr, as);
3725
3726 if (has_postinc[mem_mode])
3727 data->ainc_costs[AINC_POST_INC]
3728 = address_cost (addr, mem_mode, as, speed);
3729 }
3730 for (i = 0; i < 16; i++)
3731 {
3732 sym_p = i & 1;
3733 var_p = (i >> 1) & 1;
3734 off_p = (i >> 2) & 1;
3735 rat_p = (i >> 3) & 1;
3736
3737 addr = reg0;
3738 if (rat_p)
3739 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3740 gen_int_mode (rat, address_mode));
3741
3742 if (var_p)
3743 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3744
3745 if (sym_p)
3746 {
3747 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3748 /* ??? We can run into trouble with some backends by presenting
3749 it with symbols which haven't been properly passed through
3750 targetm.encode_section_info. By setting the local bit, we
3751 enhance the probability of things working. */
3752 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3753
3754 if (off_p)
3755 base = gen_rtx_fmt_e (CONST, address_mode,
3756 gen_rtx_fmt_ee
3757 (PLUS, address_mode, base,
3758 gen_int_mode (off, address_mode)));
3759 }
3760 else if (off_p)
3761 base = gen_int_mode (off, address_mode);
3762 else
3763 base = NULL_RTX;
3764
3765 if (base)
3766 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3767
3768 start_sequence ();
3769 /* To avoid splitting addressing modes, pretend that no cse will
3770 follow. */
3771 old_cse_not_expected = cse_not_expected;
3772 cse_not_expected = true;
3773 addr = memory_address_addr_space (mem_mode, addr, as);
3774 cse_not_expected = old_cse_not_expected;
3775 seq = get_insns ();
3776 end_sequence ();
3777
3778 acost = seq_cost (seq, speed);
3779 acost += address_cost (addr, mem_mode, as, speed);
3780
3781 if (!acost)
3782 acost = 1;
3783 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3784 }
3785
3786 /* On some targets, it is quite expensive to load symbol to a register,
3787 which makes addresses that contain symbols look much more expensive.
3788 However, the symbol will have to be loaded in any case before the
3789 loop (and quite likely we have it in register already), so it does not
3790 make much sense to penalize them too heavily. So make some final
3791 tweaks for the SYMBOL_PRESENT modes:
3792
3793 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3794 var is cheaper, use this mode with small penalty.
3795 If VAR_PRESENT is true, try whether the mode with
3796 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3797 if this is the case, use it. */
3798 add_c = add_cost (speed, address_mode);
3799 for (i = 0; i < 8; i++)
3800 {
3801 var_p = i & 1;
3802 off_p = (i >> 1) & 1;
3803 rat_p = (i >> 2) & 1;
3804
3805 acost = data->costs[0][1][off_p][rat_p] + 1;
3806 if (var_p)
3807 acost += add_c;
3808
3809 if (acost < data->costs[1][var_p][off_p][rat_p])
3810 data->costs[1][var_p][off_p][rat_p] = acost;
3811 }
3812
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3814 {
3815 fprintf (dump_file, "Address costs:\n");
3816
3817 for (i = 0; i < 16; i++)
3818 {
3819 sym_p = i & 1;
3820 var_p = (i >> 1) & 1;
3821 off_p = (i >> 2) & 1;
3822 rat_p = (i >> 3) & 1;
3823
3824 fprintf (dump_file, " ");
3825 if (sym_p)
3826 fprintf (dump_file, "sym + ");
3827 if (var_p)
3828 fprintf (dump_file, "var + ");
3829 if (off_p)
3830 fprintf (dump_file, "cst + ");
3831 if (rat_p)
3832 fprintf (dump_file, "rat * ");
3833
3834 acost = data->costs[sym_p][var_p][off_p][rat_p];
3835 fprintf (dump_file, "index costs %d\n", acost);
3836 }
3837 if (has_predec[mem_mode] || has_postdec[mem_mode]
3838 || has_preinc[mem_mode] || has_postinc[mem_mode])
3839 fprintf (dump_file, " May include autoinc/dec\n");
3840 fprintf (dump_file, "\n");
3841 }
3842
3843 address_cost_data_list[data_index] = data;
3844 }
3845
3846 bits = GET_MODE_BITSIZE (address_mode);
3847 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3848 offset &= mask;
3849 if ((offset >> (bits - 1) & 1))
3850 offset |= ~mask;
3851 s_offset = offset;
3852
3853 autoinc = false;
3854 autoinc_type = AINC_NONE;
3855 msize = GET_MODE_SIZE (mem_mode);
3856 autoinc_offset = offset;
3857 if (stmt_after_inc)
3858 autoinc_offset += ratio * cstep;
3859 if (symbol_present || var_present || ratio != 1)
3860 autoinc = false;
3861 else
3862 {
3863 if (has_postinc[mem_mode] && autoinc_offset == 0
3864 && msize == cstep)
3865 autoinc_type = AINC_POST_INC;
3866 else if (has_postdec[mem_mode] && autoinc_offset == 0
3867 && msize == -cstep)
3868 autoinc_type = AINC_POST_DEC;
3869 else if (has_preinc[mem_mode] && autoinc_offset == msize
3870 && msize == cstep)
3871 autoinc_type = AINC_PRE_INC;
3872 else if (has_predec[mem_mode] && autoinc_offset == -msize
3873 && msize == -cstep)
3874 autoinc_type = AINC_PRE_DEC;
3875
3876 if (autoinc_type != AINC_NONE)
3877 autoinc = true;
3878 }
3879
3880 cost = 0;
3881 offset_p = (s_offset != 0
3882 && data->min_offset <= s_offset
3883 && s_offset <= data->max_offset);
3884 ratio_p = (ratio != 1
3885 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3886
3887 if (ratio != 1 && !ratio_p)
3888 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3889
3890 if (s_offset && !offset_p && !symbol_present)
3891 cost += add_cost (speed, address_mode);
3892
3893 if (may_autoinc)
3894 *may_autoinc = autoinc;
3895 if (autoinc)
3896 acost = data->ainc_costs[autoinc_type];
3897 else
3898 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3899 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3900 return new_cost (cost + acost, complexity);
3901 }
3902
3903 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3904 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3905 calculating the operands of EXPR. Returns true if successful, and returns
3906 the cost in COST. */
3907
3908 static bool
3909 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3910 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3911 {
3912 comp_cost res;
3913 tree op1 = TREE_OPERAND (expr, 1);
3914 tree cst = TREE_OPERAND (mult, 1);
3915 tree multop = TREE_OPERAND (mult, 0);
3916 int m = exact_log2 (int_cst_value (cst));
3917 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3918 int as_cost, sa_cost;
3919 bool mult_in_op1;
3920
3921 if (!(m >= 0 && m < maxm))
3922 return false;
3923
3924 mult_in_op1 = operand_equal_p (op1, mult, 0);
3925
3926 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3927
3928 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3929 use that in preference to a shift insn followed by an add insn. */
3930 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3931 ? shiftadd_cost (speed, mode, m)
3932 : (mult_in_op1
3933 ? shiftsub1_cost (speed, mode, m)
3934 : shiftsub0_cost (speed, mode, m)));
3935
3936 res = new_cost (MIN (as_cost, sa_cost), 0);
3937 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3938
3939 STRIP_NOPS (multop);
3940 if (!is_gimple_val (multop))
3941 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3942
3943 *cost = res;
3944 return true;
3945 }
3946
3947 /* Estimates cost of forcing expression EXPR into a variable. */
3948
3949 static comp_cost
3950 force_expr_to_var_cost (tree expr, bool speed)
3951 {
3952 static bool costs_initialized = false;
3953 static unsigned integer_cost [2];
3954 static unsigned symbol_cost [2];
3955 static unsigned address_cost [2];
3956 tree op0, op1;
3957 comp_cost cost0, cost1, cost;
3958 machine_mode mode;
3959
3960 if (!costs_initialized)
3961 {
3962 tree type = build_pointer_type (integer_type_node);
3963 tree var, addr;
3964 rtx x;
3965 int i;
3966
3967 var = create_tmp_var_raw (integer_type_node, "test_var");
3968 TREE_STATIC (var) = 1;
3969 x = produce_memory_decl_rtl (var, NULL);
3970 SET_DECL_RTL (var, x);
3971
3972 addr = build1 (ADDR_EXPR, type, var);
3973
3974
3975 for (i = 0; i < 2; i++)
3976 {
3977 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3978 2000), i);
3979
3980 symbol_cost[i] = computation_cost (addr, i) + 1;
3981
3982 address_cost[i]
3983 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3984 if (dump_file && (dump_flags & TDF_DETAILS))
3985 {
3986 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3987 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3988 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3989 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3990 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3991 fprintf (dump_file, "\n");
3992 }
3993 }
3994
3995 costs_initialized = true;
3996 }
3997
3998 STRIP_NOPS (expr);
3999
4000 if (SSA_VAR_P (expr))
4001 return no_cost;
4002
4003 if (is_gimple_min_invariant (expr))
4004 {
4005 if (TREE_CODE (expr) == INTEGER_CST)
4006 return new_cost (integer_cost [speed], 0);
4007
4008 if (TREE_CODE (expr) == ADDR_EXPR)
4009 {
4010 tree obj = TREE_OPERAND (expr, 0);
4011
4012 if (TREE_CODE (obj) == VAR_DECL
4013 || TREE_CODE (obj) == PARM_DECL
4014 || TREE_CODE (obj) == RESULT_DECL)
4015 return new_cost (symbol_cost [speed], 0);
4016 }
4017
4018 return new_cost (address_cost [speed], 0);
4019 }
4020
4021 switch (TREE_CODE (expr))
4022 {
4023 case POINTER_PLUS_EXPR:
4024 case PLUS_EXPR:
4025 case MINUS_EXPR:
4026 case MULT_EXPR:
4027 op0 = TREE_OPERAND (expr, 0);
4028 op1 = TREE_OPERAND (expr, 1);
4029 STRIP_NOPS (op0);
4030 STRIP_NOPS (op1);
4031 break;
4032
4033 CASE_CONVERT:
4034 case NEGATE_EXPR:
4035 op0 = TREE_OPERAND (expr, 0);
4036 STRIP_NOPS (op0);
4037 op1 = NULL_TREE;
4038 break;
4039
4040 default:
4041 /* Just an arbitrary value, FIXME. */
4042 return new_cost (target_spill_cost[speed], 0);
4043 }
4044
4045 if (op0 == NULL_TREE
4046 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4047 cost0 = no_cost;
4048 else
4049 cost0 = force_expr_to_var_cost (op0, speed);
4050
4051 if (op1 == NULL_TREE
4052 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4053 cost1 = no_cost;
4054 else
4055 cost1 = force_expr_to_var_cost (op1, speed);
4056
4057 mode = TYPE_MODE (TREE_TYPE (expr));
4058 switch (TREE_CODE (expr))
4059 {
4060 case POINTER_PLUS_EXPR:
4061 case PLUS_EXPR:
4062 case MINUS_EXPR:
4063 case NEGATE_EXPR:
4064 cost = new_cost (add_cost (speed, mode), 0);
4065 if (TREE_CODE (expr) != NEGATE_EXPR)
4066 {
4067 tree mult = NULL_TREE;
4068 comp_cost sa_cost;
4069 if (TREE_CODE (op1) == MULT_EXPR)
4070 mult = op1;
4071 else if (TREE_CODE (op0) == MULT_EXPR)
4072 mult = op0;
4073
4074 if (mult != NULL_TREE
4075 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4076 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4077 speed, &sa_cost))
4078 return sa_cost;
4079 }
4080 break;
4081
4082 CASE_CONVERT:
4083 {
4084 tree inner_mode, outer_mode;
4085 outer_mode = TREE_TYPE (expr);
4086 inner_mode = TREE_TYPE (op0);
4087 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4088 TYPE_MODE (inner_mode), speed), 0);
4089 }
4090 break;
4091
4092 case MULT_EXPR:
4093 if (cst_and_fits_in_hwi (op0))
4094 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4095 mode, speed), 0);
4096 else if (cst_and_fits_in_hwi (op1))
4097 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4098 mode, speed), 0);
4099 else
4100 return new_cost (target_spill_cost [speed], 0);
4101 break;
4102
4103 default:
4104 gcc_unreachable ();
4105 }
4106
4107 cost = add_costs (cost, cost0);
4108 cost = add_costs (cost, cost1);
4109
4110 /* Bound the cost by target_spill_cost. The parts of complicated
4111 computations often are either loop invariant or at least can
4112 be shared between several iv uses, so letting this grow without
4113 limits would not give reasonable results. */
4114 if (cost.cost > (int) target_spill_cost [speed])
4115 cost.cost = target_spill_cost [speed];
4116
4117 return cost;
4118 }
4119
4120 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4121 invariants the computation depends on. */
4122
4123 static comp_cost
4124 force_var_cost (struct ivopts_data *data,
4125 tree expr, bitmap *depends_on)
4126 {
4127 if (depends_on)
4128 {
4129 fd_ivopts_data = data;
4130 walk_tree (&expr, find_depends, depends_on, NULL);
4131 }
4132
4133 return force_expr_to_var_cost (expr, data->speed);
4134 }
4135
4136 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4137 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4138 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4139 invariants the computation depends on. */
4140
4141 static comp_cost
4142 split_address_cost (struct ivopts_data *data,
4143 tree addr, bool *symbol_present, bool *var_present,
4144 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4145 {
4146 tree core;
4147 HOST_WIDE_INT bitsize;
4148 HOST_WIDE_INT bitpos;
4149 tree toffset;
4150 machine_mode mode;
4151 int unsignedp, volatilep;
4152
4153 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4154 &unsignedp, &volatilep, false);
4155
4156 if (toffset != 0
4157 || bitpos % BITS_PER_UNIT != 0
4158 || TREE_CODE (core) != VAR_DECL)
4159 {
4160 *symbol_present = false;
4161 *var_present = true;
4162 fd_ivopts_data = data;
4163 walk_tree (&addr, find_depends, depends_on, NULL);
4164 return new_cost (target_spill_cost[data->speed], 0);
4165 }
4166
4167 *offset += bitpos / BITS_PER_UNIT;
4168 if (TREE_STATIC (core)
4169 || DECL_EXTERNAL (core))
4170 {
4171 *symbol_present = true;
4172 *var_present = false;
4173 return no_cost;
4174 }
4175
4176 *symbol_present = false;
4177 *var_present = true;
4178 return no_cost;
4179 }
4180
4181 /* Estimates cost of expressing difference of addresses E1 - E2 as
4182 var + symbol + offset. The value of offset is added to OFFSET,
4183 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4184 part is missing. DEPENDS_ON is a set of the invariants the computation
4185 depends on. */
4186
4187 static comp_cost
4188 ptr_difference_cost (struct ivopts_data *data,
4189 tree e1, tree e2, bool *symbol_present, bool *var_present,
4190 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4191 {
4192 HOST_WIDE_INT diff = 0;
4193 aff_tree aff_e1, aff_e2;
4194 tree type;
4195
4196 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4197
4198 if (ptr_difference_const (e1, e2, &diff))
4199 {
4200 *offset += diff;
4201 *symbol_present = false;
4202 *var_present = false;
4203 return no_cost;
4204 }
4205
4206 if (integer_zerop (e2))
4207 return split_address_cost (data, TREE_OPERAND (e1, 0),
4208 symbol_present, var_present, offset, depends_on);
4209
4210 *symbol_present = false;
4211 *var_present = true;
4212
4213 type = signed_type_for (TREE_TYPE (e1));
4214 tree_to_aff_combination (e1, type, &aff_e1);
4215 tree_to_aff_combination (e2, type, &aff_e2);
4216 aff_combination_scale (&aff_e2, -1);
4217 aff_combination_add (&aff_e1, &aff_e2);
4218
4219 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4220 }
4221
4222 /* Estimates cost of expressing difference E1 - E2 as
4223 var + symbol + offset. The value of offset is added to OFFSET,
4224 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4225 part is missing. DEPENDS_ON is a set of the invariants the computation
4226 depends on. */
4227
4228 static comp_cost
4229 difference_cost (struct ivopts_data *data,
4230 tree e1, tree e2, bool *symbol_present, bool *var_present,
4231 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4232 {
4233 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4234 unsigned HOST_WIDE_INT off1, off2;
4235 aff_tree aff_e1, aff_e2;
4236 tree type;
4237
4238 e1 = strip_offset (e1, &off1);
4239 e2 = strip_offset (e2, &off2);
4240 *offset += off1 - off2;
4241
4242 STRIP_NOPS (e1);
4243 STRIP_NOPS (e2);
4244
4245 if (TREE_CODE (e1) == ADDR_EXPR)
4246 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4247 offset, depends_on);
4248 *symbol_present = false;
4249
4250 if (operand_equal_p (e1, e2, 0))
4251 {
4252 *var_present = false;
4253 return no_cost;
4254 }
4255
4256 *var_present = true;
4257
4258 if (integer_zerop (e2))
4259 return force_var_cost (data, e1, depends_on);
4260
4261 if (integer_zerop (e1))
4262 {
4263 comp_cost cost = force_var_cost (data, e2, depends_on);
4264 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4265 return cost;
4266 }
4267
4268 type = signed_type_for (TREE_TYPE (e1));
4269 tree_to_aff_combination (e1, type, &aff_e1);
4270 tree_to_aff_combination (e2, type, &aff_e2);
4271 aff_combination_scale (&aff_e2, -1);
4272 aff_combination_add (&aff_e1, &aff_e2);
4273
4274 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4275 }
4276
4277 /* Returns true if AFF1 and AFF2 are identical. */
4278
4279 static bool
4280 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4281 {
4282 unsigned i;
4283
4284 if (aff1->n != aff2->n)
4285 return false;
4286
4287 for (i = 0; i < aff1->n; i++)
4288 {
4289 if (aff1->elts[i].coef != aff2->elts[i].coef)
4290 return false;
4291
4292 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4293 return false;
4294 }
4295 return true;
4296 }
4297
4298 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4299
4300 static int
4301 get_expr_id (struct ivopts_data *data, tree expr)
4302 {
4303 struct iv_inv_expr_ent ent;
4304 struct iv_inv_expr_ent **slot;
4305
4306 ent.expr = expr;
4307 ent.hash = iterative_hash_expr (expr, 0);
4308 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4309 if (*slot)
4310 return (*slot)->id;
4311
4312 *slot = XNEW (struct iv_inv_expr_ent);
4313 (*slot)->expr = expr;
4314 (*slot)->hash = ent.hash;
4315 (*slot)->id = data->inv_expr_id++;
4316 return (*slot)->id;
4317 }
4318
4319 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4320 requires a new compiler generated temporary. Returns -1 otherwise.
4321 ADDRESS_P is a flag indicating if the expression is for address
4322 computation. */
4323
4324 static int
4325 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4326 tree cbase, HOST_WIDE_INT ratio,
4327 bool address_p)
4328 {
4329 aff_tree ubase_aff, cbase_aff;
4330 tree expr, ub, cb;
4331
4332 STRIP_NOPS (ubase);
4333 STRIP_NOPS (cbase);
4334 ub = ubase;
4335 cb = cbase;
4336
4337 if ((TREE_CODE (ubase) == INTEGER_CST)
4338 && (TREE_CODE (cbase) == INTEGER_CST))
4339 return -1;
4340
4341 /* Strips the constant part. */
4342 if (TREE_CODE (ubase) == PLUS_EXPR
4343 || TREE_CODE (ubase) == MINUS_EXPR
4344 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4345 {
4346 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4347 ubase = TREE_OPERAND (ubase, 0);
4348 }
4349
4350 /* Strips the constant part. */
4351 if (TREE_CODE (cbase) == PLUS_EXPR
4352 || TREE_CODE (cbase) == MINUS_EXPR
4353 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4354 {
4355 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4356 cbase = TREE_OPERAND (cbase, 0);
4357 }
4358
4359 if (address_p)
4360 {
4361 if (((TREE_CODE (ubase) == SSA_NAME)
4362 || (TREE_CODE (ubase) == ADDR_EXPR
4363 && is_gimple_min_invariant (ubase)))
4364 && (TREE_CODE (cbase) == INTEGER_CST))
4365 return -1;
4366
4367 if (((TREE_CODE (cbase) == SSA_NAME)
4368 || (TREE_CODE (cbase) == ADDR_EXPR
4369 && is_gimple_min_invariant (cbase)))
4370 && (TREE_CODE (ubase) == INTEGER_CST))
4371 return -1;
4372 }
4373
4374 if (ratio == 1)
4375 {
4376 if (operand_equal_p (ubase, cbase, 0))
4377 return -1;
4378
4379 if (TREE_CODE (ubase) == ADDR_EXPR
4380 && TREE_CODE (cbase) == ADDR_EXPR)
4381 {
4382 tree usym, csym;
4383
4384 usym = TREE_OPERAND (ubase, 0);
4385 csym = TREE_OPERAND (cbase, 0);
4386 if (TREE_CODE (usym) == ARRAY_REF)
4387 {
4388 tree ind = TREE_OPERAND (usym, 1);
4389 if (TREE_CODE (ind) == INTEGER_CST
4390 && tree_fits_shwi_p (ind)
4391 && tree_to_shwi (ind) == 0)
4392 usym = TREE_OPERAND (usym, 0);
4393 }
4394 if (TREE_CODE (csym) == ARRAY_REF)
4395 {
4396 tree ind = TREE_OPERAND (csym, 1);
4397 if (TREE_CODE (ind) == INTEGER_CST
4398 && tree_fits_shwi_p (ind)
4399 && tree_to_shwi (ind) == 0)
4400 csym = TREE_OPERAND (csym, 0);
4401 }
4402 if (operand_equal_p (usym, csym, 0))
4403 return -1;
4404 }
4405 /* Now do more complex comparison */
4406 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4407 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4408 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4409 return -1;
4410 }
4411
4412 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4413 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4414
4415 aff_combination_scale (&cbase_aff, -1 * ratio);
4416 aff_combination_add (&ubase_aff, &cbase_aff);
4417 expr = aff_combination_to_tree (&ubase_aff);
4418 return get_expr_id (data, expr);
4419 }
4420
4421
4422
4423 /* Determines the cost of the computation by that USE is expressed
4424 from induction variable CAND. If ADDRESS_P is true, we just need
4425 to create an address from it, otherwise we want to get it into
4426 register. A set of invariants we depend on is stored in
4427 DEPENDS_ON. AT is the statement at that the value is computed.
4428 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4429 addressing is likely. */
4430
4431 static comp_cost
4432 get_computation_cost_at (struct ivopts_data *data,
4433 struct iv_use *use, struct iv_cand *cand,
4434 bool address_p, bitmap *depends_on, gimple at,
4435 bool *can_autoinc,
4436 int *inv_expr_id)
4437 {
4438 tree ubase = use->iv->base, ustep = use->iv->step;
4439 tree cbase, cstep;
4440 tree utype = TREE_TYPE (ubase), ctype;
4441 unsigned HOST_WIDE_INT cstepi, offset = 0;
4442 HOST_WIDE_INT ratio, aratio;
4443 bool var_present, symbol_present, stmt_is_after_inc;
4444 comp_cost cost;
4445 widest_int rat;
4446 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4447 machine_mode mem_mode = (address_p
4448 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4449 : VOIDmode);
4450
4451 *depends_on = NULL;
4452
4453 /* Only consider real candidates. */
4454 if (!cand->iv)
4455 return infinite_cost;
4456
4457 cbase = cand->iv->base;
4458 cstep = cand->iv->step;
4459 ctype = TREE_TYPE (cbase);
4460
4461 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4462 {
4463 /* We do not have a precision to express the values of use. */
4464 return infinite_cost;
4465 }
4466
4467 if (address_p
4468 || (use->iv->base_object
4469 && cand->iv->base_object
4470 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4471 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4472 {
4473 /* Do not try to express address of an object with computation based
4474 on address of a different object. This may cause problems in rtl
4475 level alias analysis (that does not expect this to be happening,
4476 as this is illegal in C), and would be unlikely to be useful
4477 anyway. */
4478 if (use->iv->base_object
4479 && cand->iv->base_object
4480 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4481 return infinite_cost;
4482 }
4483
4484 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4485 {
4486 /* TODO -- add direct handling of this case. */
4487 goto fallback;
4488 }
4489
4490 /* CSTEPI is removed from the offset in case statement is after the
4491 increment. If the step is not constant, we use zero instead.
4492 This is a bit imprecise (there is the extra addition), but
4493 redundancy elimination is likely to transform the code so that
4494 it uses value of the variable before increment anyway,
4495 so it is not that much unrealistic. */
4496 if (cst_and_fits_in_hwi (cstep))
4497 cstepi = int_cst_value (cstep);
4498 else
4499 cstepi = 0;
4500
4501 if (!constant_multiple_of (ustep, cstep, &rat))
4502 return infinite_cost;
4503
4504 if (wi::fits_shwi_p (rat))
4505 ratio = rat.to_shwi ();
4506 else
4507 return infinite_cost;
4508
4509 STRIP_NOPS (cbase);
4510 ctype = TREE_TYPE (cbase);
4511
4512 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4513
4514 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4515 or ratio == 1, it is better to handle this like
4516
4517 ubase - ratio * cbase + ratio * var
4518
4519 (also holds in the case ratio == -1, TODO. */
4520
4521 if (cst_and_fits_in_hwi (cbase))
4522 {
4523 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4524 cost = difference_cost (data,
4525 ubase, build_int_cst (utype, 0),
4526 &symbol_present, &var_present, &offset,
4527 depends_on);
4528 cost.cost /= avg_loop_niter (data->current_loop);
4529 }
4530 else if (ratio == 1)
4531 {
4532 tree real_cbase = cbase;
4533
4534 /* Check to see if any adjustment is needed. */
4535 if (cstepi == 0 && stmt_is_after_inc)
4536 {
4537 aff_tree real_cbase_aff;
4538 aff_tree cstep_aff;
4539
4540 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4541 &real_cbase_aff);
4542 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4543
4544 aff_combination_add (&real_cbase_aff, &cstep_aff);
4545 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4546 }
4547
4548 cost = difference_cost (data,
4549 ubase, real_cbase,
4550 &symbol_present, &var_present, &offset,
4551 depends_on);
4552 cost.cost /= avg_loop_niter (data->current_loop);
4553 }
4554 else if (address_p
4555 && !POINTER_TYPE_P (ctype)
4556 && multiplier_allowed_in_address_p
4557 (ratio, mem_mode,
4558 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4559 {
4560 cbase
4561 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4562 cost = difference_cost (data,
4563 ubase, cbase,
4564 &symbol_present, &var_present, &offset,
4565 depends_on);
4566 cost.cost /= avg_loop_niter (data->current_loop);
4567 }
4568 else
4569 {
4570 cost = force_var_cost (data, cbase, depends_on);
4571 cost = add_costs (cost,
4572 difference_cost (data,
4573 ubase, build_int_cst (utype, 0),
4574 &symbol_present, &var_present,
4575 &offset, depends_on));
4576 cost.cost /= avg_loop_niter (data->current_loop);
4577 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4578 }
4579
4580 /* Set of invariants depended on by sub use has already been computed
4581 for the first use in the group. */
4582 if (use->sub_id)
4583 {
4584 cost.cost = 0;
4585 if (depends_on && *depends_on)
4586 bitmap_clear (*depends_on);
4587 }
4588 else if (inv_expr_id)
4589 {
4590 *inv_expr_id =
4591 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4592 /* Clear depends on. */
4593 if (*inv_expr_id != -1 && depends_on && *depends_on)
4594 bitmap_clear (*depends_on);
4595 }
4596
4597 /* If we are after the increment, the value of the candidate is higher by
4598 one iteration. */
4599 if (stmt_is_after_inc)
4600 offset -= ratio * cstepi;
4601
4602 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4603 (symbol/var1/const parts may be omitted). If we are looking for an
4604 address, find the cost of addressing this. */
4605 if (address_p)
4606 return add_costs (cost,
4607 get_address_cost (symbol_present, var_present,
4608 offset, ratio, cstepi,
4609 mem_mode,
4610 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4611 speed, stmt_is_after_inc,
4612 can_autoinc));
4613
4614 /* Otherwise estimate the costs for computing the expression. */
4615 if (!symbol_present && !var_present && !offset)
4616 {
4617 if (ratio != 1)
4618 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4619 return cost;
4620 }
4621
4622 /* Symbol + offset should be compile-time computable so consider that they
4623 are added once to the variable, if present. */
4624 if (var_present && (symbol_present || offset))
4625 cost.cost += adjust_setup_cost (data,
4626 add_cost (speed, TYPE_MODE (ctype)));
4627
4628 /* Having offset does not affect runtime cost in case it is added to
4629 symbol, but it increases complexity. */
4630 if (offset)
4631 cost.complexity++;
4632
4633 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4634
4635 aratio = ratio > 0 ? ratio : -ratio;
4636 if (aratio != 1)
4637 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4638 return cost;
4639
4640 fallback:
4641 if (can_autoinc)
4642 *can_autoinc = false;
4643
4644 {
4645 /* Just get the expression, expand it and measure the cost. */
4646 tree comp = get_computation_at (data->current_loop, use, cand, at);
4647
4648 if (!comp)
4649 return infinite_cost;
4650
4651 if (address_p)
4652 comp = build_simple_mem_ref (comp);
4653
4654 return new_cost (computation_cost (comp, speed), 0);
4655 }
4656 }
4657
4658 /* Determines the cost of the computation by that USE is expressed
4659 from induction variable CAND. If ADDRESS_P is true, we just need
4660 to create an address from it, otherwise we want to get it into
4661 register. A set of invariants we depend on is stored in
4662 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4663 autoinc addressing is likely. */
4664
4665 static comp_cost
4666 get_computation_cost (struct ivopts_data *data,
4667 struct iv_use *use, struct iv_cand *cand,
4668 bool address_p, bitmap *depends_on,
4669 bool *can_autoinc, int *inv_expr_id)
4670 {
4671 return get_computation_cost_at (data,
4672 use, cand, address_p, depends_on, use->stmt,
4673 can_autoinc, inv_expr_id);
4674 }
4675
4676 /* Determines cost of basing replacement of USE on CAND in a generic
4677 expression. */
4678
4679 static bool
4680 determine_use_iv_cost_generic (struct ivopts_data *data,
4681 struct iv_use *use, struct iv_cand *cand)
4682 {
4683 bitmap depends_on;
4684 comp_cost cost;
4685 int inv_expr_id = -1;
4686
4687 /* The simple case first -- if we need to express value of the preserved
4688 original biv, the cost is 0. This also prevents us from counting the
4689 cost of increment twice -- once at this use and once in the cost of
4690 the candidate. */
4691 if (cand->pos == IP_ORIGINAL
4692 && cand->incremented_at == use->stmt)
4693 {
4694 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4695 ERROR_MARK, -1);
4696 return true;
4697 }
4698
4699 cost = get_computation_cost (data, use, cand, false, &depends_on,
4700 NULL, &inv_expr_id);
4701
4702 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4703 inv_expr_id);
4704
4705 return !infinite_cost_p (cost);
4706 }
4707
4708 /* Determines cost of basing replacement of USE on CAND in an address. */
4709
4710 static bool
4711 determine_use_iv_cost_address (struct ivopts_data *data,
4712 struct iv_use *use, struct iv_cand *cand)
4713 {
4714 bitmap depends_on;
4715 bool can_autoinc;
4716 int inv_expr_id = -1;
4717 struct iv_use *sub_use;
4718 comp_cost sub_cost;
4719 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4720 &can_autoinc, &inv_expr_id);
4721
4722 if (cand->ainc_use == use)
4723 {
4724 if (can_autoinc)
4725 cost.cost -= cand->cost_step;
4726 /* If we generated the candidate solely for exploiting autoincrement
4727 opportunities, and it turns out it can't be used, set the cost to
4728 infinity to make sure we ignore it. */
4729 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4730 cost = infinite_cost;
4731 }
4732 for (sub_use = use->next;
4733 sub_use && !infinite_cost_p (cost);
4734 sub_use = sub_use->next)
4735 {
4736 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4737 &can_autoinc, &inv_expr_id);
4738 cost = add_costs (cost, sub_cost);
4739 }
4740
4741 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4742 inv_expr_id);
4743
4744 return !infinite_cost_p (cost);
4745 }
4746
4747 /* Computes value of candidate CAND at position AT in iteration NITER, and
4748 stores it to VAL. */
4749
4750 static void
4751 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4752 aff_tree *val)
4753 {
4754 aff_tree step, delta, nit;
4755 struct iv *iv = cand->iv;
4756 tree type = TREE_TYPE (iv->base);
4757 tree steptype = type;
4758 if (POINTER_TYPE_P (type))
4759 steptype = sizetype;
4760 steptype = unsigned_type_for (type);
4761
4762 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4763 aff_combination_convert (&step, steptype);
4764 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4765 aff_combination_convert (&nit, steptype);
4766 aff_combination_mult (&nit, &step, &delta);
4767 if (stmt_after_increment (loop, cand, at))
4768 aff_combination_add (&delta, &step);
4769
4770 tree_to_aff_combination (iv->base, type, val);
4771 if (!POINTER_TYPE_P (type))
4772 aff_combination_convert (val, steptype);
4773 aff_combination_add (val, &delta);
4774 }
4775
4776 /* Returns period of induction variable iv. */
4777
4778 static tree
4779 iv_period (struct iv *iv)
4780 {
4781 tree step = iv->step, period, type;
4782 tree pow2div;
4783
4784 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4785
4786 type = unsigned_type_for (TREE_TYPE (step));
4787 /* Period of the iv is lcm (step, type_range)/step -1,
4788 i.e., N*type_range/step - 1. Since type range is power
4789 of two, N == (step >> num_of_ending_zeros_binary (step),
4790 so the final result is
4791
4792 (type_range >> num_of_ending_zeros_binary (step)) - 1
4793
4794 */
4795 pow2div = num_ending_zeros (step);
4796
4797 period = build_low_bits_mask (type,
4798 (TYPE_PRECISION (type)
4799 - tree_to_uhwi (pow2div)));
4800
4801 return period;
4802 }
4803
4804 /* Returns the comparison operator used when eliminating the iv USE. */
4805
4806 static enum tree_code
4807 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4808 {
4809 struct loop *loop = data->current_loop;
4810 basic_block ex_bb;
4811 edge exit;
4812
4813 ex_bb = gimple_bb (use->stmt);
4814 exit = EDGE_SUCC (ex_bb, 0);
4815 if (flow_bb_inside_loop_p (loop, exit->dest))
4816 exit = EDGE_SUCC (ex_bb, 1);
4817
4818 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4819 }
4820
4821 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4822 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4823 calculation is performed in non-wrapping type.
4824
4825 TODO: More generally, we could test for the situation that
4826 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4827 This would require knowing the sign of OFFSET. */
4828
4829 static bool
4830 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4831 {
4832 enum tree_code code;
4833 tree e1, e2;
4834 aff_tree aff_e1, aff_e2, aff_offset;
4835
4836 if (!nowrap_type_p (TREE_TYPE (base)))
4837 return false;
4838
4839 base = expand_simple_operations (base);
4840
4841 if (TREE_CODE (base) == SSA_NAME)
4842 {
4843 gimple stmt = SSA_NAME_DEF_STMT (base);
4844
4845 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4846 return false;
4847
4848 code = gimple_assign_rhs_code (stmt);
4849 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4850 return false;
4851
4852 e1 = gimple_assign_rhs1 (stmt);
4853 e2 = gimple_assign_rhs2 (stmt);
4854 }
4855 else
4856 {
4857 code = TREE_CODE (base);
4858 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4859 return false;
4860 e1 = TREE_OPERAND (base, 0);
4861 e2 = TREE_OPERAND (base, 1);
4862 }
4863
4864 /* Use affine expansion as deeper inspection to prove the equality. */
4865 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4866 &aff_e2, &data->name_expansion_cache);
4867 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4868 &aff_offset, &data->name_expansion_cache);
4869 aff_combination_scale (&aff_offset, -1);
4870 switch (code)
4871 {
4872 case PLUS_EXPR:
4873 aff_combination_add (&aff_e2, &aff_offset);
4874 if (aff_combination_zero_p (&aff_e2))
4875 return true;
4876
4877 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4878 &aff_e1, &data->name_expansion_cache);
4879 aff_combination_add (&aff_e1, &aff_offset);
4880 return aff_combination_zero_p (&aff_e1);
4881
4882 case POINTER_PLUS_EXPR:
4883 aff_combination_add (&aff_e2, &aff_offset);
4884 return aff_combination_zero_p (&aff_e2);
4885
4886 default:
4887 return false;
4888 }
4889 }
4890
4891 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4892 comparison with CAND. NITER describes the number of iterations of
4893 the loops. If successful, the comparison in COMP_P is altered accordingly.
4894
4895 We aim to handle the following situation:
4896
4897 sometype *base, *p;
4898 int a, b, i;
4899
4900 i = a;
4901 p = p_0 = base + a;
4902
4903 do
4904 {
4905 bla (*p);
4906 p++;
4907 i++;
4908 }
4909 while (i < b);
4910
4911 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4912 We aim to optimize this to
4913
4914 p = p_0 = base + a;
4915 do
4916 {
4917 bla (*p);
4918 p++;
4919 }
4920 while (p < p_0 - a + b);
4921
4922 This preserves the correctness, since the pointer arithmetics does not
4923 overflow. More precisely:
4924
4925 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4926 overflow in computing it or the values of p.
4927 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4928 overflow. To prove this, we use the fact that p_0 = base + a. */
4929
4930 static bool
4931 iv_elimination_compare_lt (struct ivopts_data *data,
4932 struct iv_cand *cand, enum tree_code *comp_p,
4933 struct tree_niter_desc *niter)
4934 {
4935 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4936 struct aff_tree nit, tmpa, tmpb;
4937 enum tree_code comp;
4938 HOST_WIDE_INT step;
4939
4940 /* We need to know that the candidate induction variable does not overflow.
4941 While more complex analysis may be used to prove this, for now just
4942 check that the variable appears in the original program and that it
4943 is computed in a type that guarantees no overflows. */
4944 cand_type = TREE_TYPE (cand->iv->base);
4945 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4946 return false;
4947
4948 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4949 the calculation of the BOUND could overflow, making the comparison
4950 invalid. */
4951 if (!data->loop_single_exit_p)
4952 return false;
4953
4954 /* We need to be able to decide whether candidate is increasing or decreasing
4955 in order to choose the right comparison operator. */
4956 if (!cst_and_fits_in_hwi (cand->iv->step))
4957 return false;
4958 step = int_cst_value (cand->iv->step);
4959
4960 /* Check that the number of iterations matches the expected pattern:
4961 a + 1 > b ? 0 : b - a - 1. */
4962 mbz = niter->may_be_zero;
4963 if (TREE_CODE (mbz) == GT_EXPR)
4964 {
4965 /* Handle a + 1 > b. */
4966 tree op0 = TREE_OPERAND (mbz, 0);
4967 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4968 {
4969 a = TREE_OPERAND (op0, 0);
4970 b = TREE_OPERAND (mbz, 1);
4971 }
4972 else
4973 return false;
4974 }
4975 else if (TREE_CODE (mbz) == LT_EXPR)
4976 {
4977 tree op1 = TREE_OPERAND (mbz, 1);
4978
4979 /* Handle b < a + 1. */
4980 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4981 {
4982 a = TREE_OPERAND (op1, 0);
4983 b = TREE_OPERAND (mbz, 0);
4984 }
4985 else
4986 return false;
4987 }
4988 else
4989 return false;
4990
4991 /* Expected number of iterations is B - A - 1. Check that it matches
4992 the actual number, i.e., that B - A - NITER = 1. */
4993 tree_to_aff_combination (niter->niter, nit_type, &nit);
4994 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4995 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4996 aff_combination_scale (&nit, -1);
4997 aff_combination_scale (&tmpa, -1);
4998 aff_combination_add (&tmpb, &tmpa);
4999 aff_combination_add (&tmpb, &nit);
5000 if (tmpb.n != 0 || tmpb.offset != 1)
5001 return false;
5002
5003 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5004 overflow. */
5005 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5006 cand->iv->step,
5007 fold_convert (TREE_TYPE (cand->iv->step), a));
5008 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5009 return false;
5010
5011 /* Determine the new comparison operator. */
5012 comp = step < 0 ? GT_EXPR : LT_EXPR;
5013 if (*comp_p == NE_EXPR)
5014 *comp_p = comp;
5015 else if (*comp_p == EQ_EXPR)
5016 *comp_p = invert_tree_comparison (comp, false);
5017 else
5018 gcc_unreachable ();
5019
5020 return true;
5021 }
5022
5023 /* Check whether it is possible to express the condition in USE by comparison
5024 of candidate CAND. If so, store the value compared with to BOUND, and the
5025 comparison operator to COMP. */
5026
5027 static bool
5028 may_eliminate_iv (struct ivopts_data *data,
5029 struct iv_use *use, struct iv_cand *cand, tree *bound,
5030 enum tree_code *comp)
5031 {
5032 basic_block ex_bb;
5033 edge exit;
5034 tree period;
5035 struct loop *loop = data->current_loop;
5036 aff_tree bnd;
5037 struct tree_niter_desc *desc = NULL;
5038
5039 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5040 return false;
5041
5042 /* For now works only for exits that dominate the loop latch.
5043 TODO: extend to other conditions inside loop body. */
5044 ex_bb = gimple_bb (use->stmt);
5045 if (use->stmt != last_stmt (ex_bb)
5046 || gimple_code (use->stmt) != GIMPLE_COND
5047 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5048 return false;
5049
5050 exit = EDGE_SUCC (ex_bb, 0);
5051 if (flow_bb_inside_loop_p (loop, exit->dest))
5052 exit = EDGE_SUCC (ex_bb, 1);
5053 if (flow_bb_inside_loop_p (loop, exit->dest))
5054 return false;
5055
5056 desc = niter_for_exit (data, exit);
5057 if (!desc)
5058 return false;
5059
5060 /* Determine whether we can use the variable to test the exit condition.
5061 This is the case iff the period of the induction variable is greater
5062 than the number of iterations for which the exit condition is true. */
5063 period = iv_period (cand->iv);
5064
5065 /* If the number of iterations is constant, compare against it directly. */
5066 if (TREE_CODE (desc->niter) == INTEGER_CST)
5067 {
5068 /* See cand_value_at. */
5069 if (stmt_after_increment (loop, cand, use->stmt))
5070 {
5071 if (!tree_int_cst_lt (desc->niter, period))
5072 return false;
5073 }
5074 else
5075 {
5076 if (tree_int_cst_lt (period, desc->niter))
5077 return false;
5078 }
5079 }
5080
5081 /* If not, and if this is the only possible exit of the loop, see whether
5082 we can get a conservative estimate on the number of iterations of the
5083 entire loop and compare against that instead. */
5084 else
5085 {
5086 widest_int period_value, max_niter;
5087
5088 max_niter = desc->max;
5089 if (stmt_after_increment (loop, cand, use->stmt))
5090 max_niter += 1;
5091 period_value = wi::to_widest (period);
5092 if (wi::gtu_p (max_niter, period_value))
5093 {
5094 /* See if we can take advantage of inferred loop bound information. */
5095 if (data->loop_single_exit_p)
5096 {
5097 if (!max_loop_iterations (loop, &max_niter))
5098 return false;
5099 /* The loop bound is already adjusted by adding 1. */
5100 if (wi::gtu_p (max_niter, period_value))
5101 return false;
5102 }
5103 else
5104 return false;
5105 }
5106 }
5107
5108 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5109
5110 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5111 aff_combination_to_tree (&bnd));
5112 *comp = iv_elimination_compare (data, use);
5113
5114 /* It is unlikely that computing the number of iterations using division
5115 would be more profitable than keeping the original induction variable. */
5116 if (expression_expensive_p (*bound))
5117 return false;
5118
5119 /* Sometimes, it is possible to handle the situation that the number of
5120 iterations may be zero unless additional assumtions by using <
5121 instead of != in the exit condition.
5122
5123 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5124 base the exit condition on it. However, that is often too
5125 expensive. */
5126 if (!integer_zerop (desc->may_be_zero))
5127 return iv_elimination_compare_lt (data, cand, comp, desc);
5128
5129 return true;
5130 }
5131
5132 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5133 be copied, if is is used in the loop body and DATA->body_includes_call. */
5134
5135 static int
5136 parm_decl_cost (struct ivopts_data *data, tree bound)
5137 {
5138 tree sbound = bound;
5139 STRIP_NOPS (sbound);
5140
5141 if (TREE_CODE (sbound) == SSA_NAME
5142 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5143 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5144 && data->body_includes_call)
5145 return COSTS_N_INSNS (1);
5146
5147 return 0;
5148 }
5149
5150 /* Determines cost of basing replacement of USE on CAND in a condition. */
5151
5152 static bool
5153 determine_use_iv_cost_condition (struct ivopts_data *data,
5154 struct iv_use *use, struct iv_cand *cand)
5155 {
5156 tree bound = NULL_TREE;
5157 struct iv *cmp_iv;
5158 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5159 comp_cost elim_cost, express_cost, cost, bound_cost;
5160 bool ok;
5161 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5162 tree *control_var, *bound_cst;
5163 enum tree_code comp = ERROR_MARK;
5164
5165 /* Only consider real candidates. */
5166 if (!cand->iv)
5167 {
5168 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5169 ERROR_MARK, -1);
5170 return false;
5171 }
5172
5173 /* Try iv elimination. */
5174 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5175 {
5176 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5177 if (elim_cost.cost == 0)
5178 elim_cost.cost = parm_decl_cost (data, bound);
5179 else if (TREE_CODE (bound) == INTEGER_CST)
5180 elim_cost.cost = 0;
5181 /* If we replace a loop condition 'i < n' with 'p < base + n',
5182 depends_on_elim will have 'base' and 'n' set, which implies
5183 that both 'base' and 'n' will be live during the loop. More likely,
5184 'base + n' will be loop invariant, resulting in only one live value
5185 during the loop. So in that case we clear depends_on_elim and set
5186 elim_inv_expr_id instead. */
5187 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5188 {
5189 elim_inv_expr_id = get_expr_id (data, bound);
5190 bitmap_clear (depends_on_elim);
5191 }
5192 /* The bound is a loop invariant, so it will be only computed
5193 once. */
5194 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5195 }
5196 else
5197 elim_cost = infinite_cost;
5198
5199 /* Try expressing the original giv. If it is compared with an invariant,
5200 note that we cannot get rid of it. */
5201 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5202 NULL, &cmp_iv);
5203 gcc_assert (ok);
5204
5205 /* When the condition is a comparison of the candidate IV against
5206 zero, prefer this IV.
5207
5208 TODO: The constant that we're subtracting from the cost should
5209 be target-dependent. This information should be added to the
5210 target costs for each backend. */
5211 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5212 && integer_zerop (*bound_cst)
5213 && (operand_equal_p (*control_var, cand->var_after, 0)
5214 || operand_equal_p (*control_var, cand->var_before, 0)))
5215 elim_cost.cost -= 1;
5216
5217 express_cost = get_computation_cost (data, use, cand, false,
5218 &depends_on_express, NULL,
5219 &express_inv_expr_id);
5220 fd_ivopts_data = data;
5221 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5222
5223 /* Count the cost of the original bound as well. */
5224 bound_cost = force_var_cost (data, *bound_cst, NULL);
5225 if (bound_cost.cost == 0)
5226 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5227 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5228 bound_cost.cost = 0;
5229 express_cost.cost += bound_cost.cost;
5230
5231 /* Choose the better approach, preferring the eliminated IV. */
5232 if (compare_costs (elim_cost, express_cost) <= 0)
5233 {
5234 cost = elim_cost;
5235 depends_on = depends_on_elim;
5236 depends_on_elim = NULL;
5237 inv_expr_id = elim_inv_expr_id;
5238 }
5239 else
5240 {
5241 cost = express_cost;
5242 depends_on = depends_on_express;
5243 depends_on_express = NULL;
5244 bound = NULL_TREE;
5245 comp = ERROR_MARK;
5246 inv_expr_id = express_inv_expr_id;
5247 }
5248
5249 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5250
5251 if (depends_on_elim)
5252 BITMAP_FREE (depends_on_elim);
5253 if (depends_on_express)
5254 BITMAP_FREE (depends_on_express);
5255
5256 return !infinite_cost_p (cost);
5257 }
5258
5259 /* Determines cost of basing replacement of USE on CAND. Returns false
5260 if USE cannot be based on CAND. */
5261
5262 static bool
5263 determine_use_iv_cost (struct ivopts_data *data,
5264 struct iv_use *use, struct iv_cand *cand)
5265 {
5266 switch (use->type)
5267 {
5268 case USE_NONLINEAR_EXPR:
5269 return determine_use_iv_cost_generic (data, use, cand);
5270
5271 case USE_ADDRESS:
5272 return determine_use_iv_cost_address (data, use, cand);
5273
5274 case USE_COMPARE:
5275 return determine_use_iv_cost_condition (data, use, cand);
5276
5277 default:
5278 gcc_unreachable ();
5279 }
5280 }
5281
5282 /* Return true if get_computation_cost indicates that autoincrement is
5283 a possibility for the pair of USE and CAND, false otherwise. */
5284
5285 static bool
5286 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5287 struct iv_cand *cand)
5288 {
5289 bitmap depends_on;
5290 bool can_autoinc;
5291 comp_cost cost;
5292
5293 if (use->type != USE_ADDRESS)
5294 return false;
5295
5296 cost = get_computation_cost (data, use, cand, true, &depends_on,
5297 &can_autoinc, NULL);
5298
5299 BITMAP_FREE (depends_on);
5300
5301 return !infinite_cost_p (cost) && can_autoinc;
5302 }
5303
5304 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5305 use that allows autoincrement, and set their AINC_USE if possible. */
5306
5307 static void
5308 set_autoinc_for_original_candidates (struct ivopts_data *data)
5309 {
5310 unsigned i, j;
5311
5312 for (i = 0; i < n_iv_cands (data); i++)
5313 {
5314 struct iv_cand *cand = iv_cand (data, i);
5315 struct iv_use *closest_before = NULL;
5316 struct iv_use *closest_after = NULL;
5317 if (cand->pos != IP_ORIGINAL)
5318 continue;
5319
5320 for (j = 0; j < n_iv_uses (data); j++)
5321 {
5322 struct iv_use *use = iv_use (data, j);
5323 unsigned uid = gimple_uid (use->stmt);
5324
5325 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5326 continue;
5327
5328 if (uid < gimple_uid (cand->incremented_at)
5329 && (closest_before == NULL
5330 || uid > gimple_uid (closest_before->stmt)))
5331 closest_before = use;
5332
5333 if (uid > gimple_uid (cand->incremented_at)
5334 && (closest_after == NULL
5335 || uid < gimple_uid (closest_after->stmt)))
5336 closest_after = use;
5337 }
5338
5339 if (closest_before != NULL
5340 && autoinc_possible_for_pair (data, closest_before, cand))
5341 cand->ainc_use = closest_before;
5342 else if (closest_after != NULL
5343 && autoinc_possible_for_pair (data, closest_after, cand))
5344 cand->ainc_use = closest_after;
5345 }
5346 }
5347
5348 /* Finds the candidates for the induction variables. */
5349
5350 static void
5351 find_iv_candidates (struct ivopts_data *data)
5352 {
5353 /* Add commonly used ivs. */
5354 add_standard_iv_candidates (data);
5355
5356 /* Add old induction variables. */
5357 add_old_ivs_candidates (data);
5358
5359 /* Add induction variables derived from uses. */
5360 add_derived_ivs_candidates (data);
5361
5362 set_autoinc_for_original_candidates (data);
5363
5364 /* Record the important candidates. */
5365 record_important_candidates (data);
5366 }
5367
5368 /* Determines costs of basing the use of the iv on an iv candidate. */
5369
5370 static void
5371 determine_use_iv_costs (struct ivopts_data *data)
5372 {
5373 unsigned i, j;
5374 struct iv_use *use;
5375 struct iv_cand *cand;
5376 bitmap to_clear = BITMAP_ALLOC (NULL);
5377
5378 alloc_use_cost_map (data);
5379
5380 for (i = 0; i < n_iv_uses (data); i++)
5381 {
5382 use = iv_use (data, i);
5383
5384 if (data->consider_all_candidates)
5385 {
5386 for (j = 0; j < n_iv_cands (data); j++)
5387 {
5388 cand = iv_cand (data, j);
5389 determine_use_iv_cost (data, use, cand);
5390 }
5391 }
5392 else
5393 {
5394 bitmap_iterator bi;
5395
5396 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5397 {
5398 cand = iv_cand (data, j);
5399 if (!determine_use_iv_cost (data, use, cand))
5400 bitmap_set_bit (to_clear, j);
5401 }
5402
5403 /* Remove the candidates for that the cost is infinite from
5404 the list of related candidates. */
5405 bitmap_and_compl_into (use->related_cands, to_clear);
5406 bitmap_clear (to_clear);
5407 }
5408 }
5409
5410 BITMAP_FREE (to_clear);
5411
5412 if (dump_file && (dump_flags & TDF_DETAILS))
5413 {
5414 fprintf (dump_file, "Use-candidate costs:\n");
5415
5416 for (i = 0; i < n_iv_uses (data); i++)
5417 {
5418 use = iv_use (data, i);
5419
5420 fprintf (dump_file, "Use %d:\n", i);
5421 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5422 for (j = 0; j < use->n_map_members; j++)
5423 {
5424 if (!use->cost_map[j].cand
5425 || infinite_cost_p (use->cost_map[j].cost))
5426 continue;
5427
5428 fprintf (dump_file, " %d\t%d\t%d\t",
5429 use->cost_map[j].cand->id,
5430 use->cost_map[j].cost.cost,
5431 use->cost_map[j].cost.complexity);
5432 if (use->cost_map[j].depends_on)
5433 bitmap_print (dump_file,
5434 use->cost_map[j].depends_on, "","");
5435 if (use->cost_map[j].inv_expr_id != -1)
5436 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5437 fprintf (dump_file, "\n");
5438 }
5439
5440 fprintf (dump_file, "\n");
5441 }
5442 fprintf (dump_file, "\n");
5443 }
5444 }
5445
5446 /* Determines cost of the candidate CAND. */
5447
5448 static void
5449 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5450 {
5451 comp_cost cost_base;
5452 unsigned cost, cost_step;
5453 tree base;
5454
5455 if (!cand->iv)
5456 {
5457 cand->cost = 0;
5458 return;
5459 }
5460
5461 /* There are two costs associated with the candidate -- its increment
5462 and its initialization. The second is almost negligible for any loop
5463 that rolls enough, so we take it just very little into account. */
5464
5465 base = cand->iv->base;
5466 cost_base = force_var_cost (data, base, NULL);
5467 /* It will be exceptional that the iv register happens to be initialized with
5468 the proper value at no cost. In general, there will at least be a regcopy
5469 or a const set. */
5470 if (cost_base.cost == 0)
5471 cost_base.cost = COSTS_N_INSNS (1);
5472 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5473
5474 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5475
5476 /* Prefer the original ivs unless we may gain something by replacing it.
5477 The reason is to make debugging simpler; so this is not relevant for
5478 artificial ivs created by other optimization passes. */
5479 if (cand->pos != IP_ORIGINAL
5480 || !SSA_NAME_VAR (cand->var_before)
5481 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5482 cost++;
5483
5484 /* Prefer not to insert statements into latch unless there are some
5485 already (so that we do not create unnecessary jumps). */
5486 if (cand->pos == IP_END
5487 && empty_block_p (ip_end_pos (data->current_loop)))
5488 cost++;
5489
5490 cand->cost = cost;
5491 cand->cost_step = cost_step;
5492 }
5493
5494 /* Determines costs of computation of the candidates. */
5495
5496 static void
5497 determine_iv_costs (struct ivopts_data *data)
5498 {
5499 unsigned i;
5500
5501 if (dump_file && (dump_flags & TDF_DETAILS))
5502 {
5503 fprintf (dump_file, "Candidate costs:\n");
5504 fprintf (dump_file, " cand\tcost\n");
5505 }
5506
5507 for (i = 0; i < n_iv_cands (data); i++)
5508 {
5509 struct iv_cand *cand = iv_cand (data, i);
5510
5511 determine_iv_cost (data, cand);
5512
5513 if (dump_file && (dump_flags & TDF_DETAILS))
5514 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5515 }
5516
5517 if (dump_file && (dump_flags & TDF_DETAILS))
5518 fprintf (dump_file, "\n");
5519 }
5520
5521 /* Calculates cost for having SIZE induction variables. */
5522
5523 static unsigned
5524 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5525 {
5526 /* We add size to the cost, so that we prefer eliminating ivs
5527 if possible. */
5528 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5529 data->body_includes_call);
5530 }
5531
5532 /* For each size of the induction variable set determine the penalty. */
5533
5534 static void
5535 determine_set_costs (struct ivopts_data *data)
5536 {
5537 unsigned j, n;
5538 gphi *phi;
5539 gphi_iterator psi;
5540 tree op;
5541 struct loop *loop = data->current_loop;
5542 bitmap_iterator bi;
5543
5544 if (dump_file && (dump_flags & TDF_DETAILS))
5545 {
5546 fprintf (dump_file, "Global costs:\n");
5547 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5548 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5549 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5550 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5551 }
5552
5553 n = 0;
5554 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5555 {
5556 phi = psi.phi ();
5557 op = PHI_RESULT (phi);
5558
5559 if (virtual_operand_p (op))
5560 continue;
5561
5562 if (get_iv (data, op))
5563 continue;
5564
5565 n++;
5566 }
5567
5568 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5569 {
5570 struct version_info *info = ver_info (data, j);
5571
5572 if (info->inv_id && info->has_nonlin_use)
5573 n++;
5574 }
5575
5576 data->regs_used = n;
5577 if (dump_file && (dump_flags & TDF_DETAILS))
5578 fprintf (dump_file, " regs_used %d\n", n);
5579
5580 if (dump_file && (dump_flags & TDF_DETAILS))
5581 {
5582 fprintf (dump_file, " cost for size:\n");
5583 fprintf (dump_file, " ivs\tcost\n");
5584 for (j = 0; j <= 2 * target_avail_regs; j++)
5585 fprintf (dump_file, " %d\t%d\n", j,
5586 ivopts_global_cost_for_size (data, j));
5587 fprintf (dump_file, "\n");
5588 }
5589 }
5590
5591 /* Returns true if A is a cheaper cost pair than B. */
5592
5593 static bool
5594 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5595 {
5596 int cmp;
5597
5598 if (!a)
5599 return false;
5600
5601 if (!b)
5602 return true;
5603
5604 cmp = compare_costs (a->cost, b->cost);
5605 if (cmp < 0)
5606 return true;
5607
5608 if (cmp > 0)
5609 return false;
5610
5611 /* In case the costs are the same, prefer the cheaper candidate. */
5612 if (a->cand->cost < b->cand->cost)
5613 return true;
5614
5615 return false;
5616 }
5617
5618
5619 /* Returns candidate by that USE is expressed in IVS. */
5620
5621 static struct cost_pair *
5622 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5623 {
5624 return ivs->cand_for_use[use->id];
5625 }
5626
5627 /* Computes the cost field of IVS structure. */
5628
5629 static void
5630 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5631 {
5632 comp_cost cost = ivs->cand_use_cost;
5633
5634 cost.cost += ivs->cand_cost;
5635
5636 cost.cost += ivopts_global_cost_for_size (data,
5637 ivs->n_regs + ivs->num_used_inv_expr);
5638
5639 ivs->cost = cost;
5640 }
5641
5642 /* Remove invariants in set INVS to set IVS. */
5643
5644 static void
5645 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5646 {
5647 bitmap_iterator bi;
5648 unsigned iid;
5649
5650 if (!invs)
5651 return;
5652
5653 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5654 {
5655 ivs->n_invariant_uses[iid]--;
5656 if (ivs->n_invariant_uses[iid] == 0)
5657 ivs->n_regs--;
5658 }
5659 }
5660
5661 /* Set USE not to be expressed by any candidate in IVS. */
5662
5663 static void
5664 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5665 struct iv_use *use)
5666 {
5667 unsigned uid = use->id, cid;
5668 struct cost_pair *cp;
5669
5670 cp = ivs->cand_for_use[uid];
5671 if (!cp)
5672 return;
5673 cid = cp->cand->id;
5674
5675 ivs->bad_uses++;
5676 ivs->cand_for_use[uid] = NULL;
5677 ivs->n_cand_uses[cid]--;
5678
5679 if (ivs->n_cand_uses[cid] == 0)
5680 {
5681 bitmap_clear_bit (ivs->cands, cid);
5682 /* Do not count the pseudocandidates. */
5683 if (cp->cand->iv)
5684 ivs->n_regs--;
5685 ivs->n_cands--;
5686 ivs->cand_cost -= cp->cand->cost;
5687
5688 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5689 }
5690
5691 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5692
5693 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5694
5695 if (cp->inv_expr_id != -1)
5696 {
5697 ivs->used_inv_expr[cp->inv_expr_id]--;
5698 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5699 ivs->num_used_inv_expr--;
5700 }
5701 iv_ca_recount_cost (data, ivs);
5702 }
5703
5704 /* Add invariants in set INVS to set IVS. */
5705
5706 static void
5707 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5708 {
5709 bitmap_iterator bi;
5710 unsigned iid;
5711
5712 if (!invs)
5713 return;
5714
5715 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5716 {
5717 ivs->n_invariant_uses[iid]++;
5718 if (ivs->n_invariant_uses[iid] == 1)
5719 ivs->n_regs++;
5720 }
5721 }
5722
5723 /* Set cost pair for USE in set IVS to CP. */
5724
5725 static void
5726 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5727 struct iv_use *use, struct cost_pair *cp)
5728 {
5729 unsigned uid = use->id, cid;
5730
5731 if (ivs->cand_for_use[uid] == cp)
5732 return;
5733
5734 if (ivs->cand_for_use[uid])
5735 iv_ca_set_no_cp (data, ivs, use);
5736
5737 if (cp)
5738 {
5739 cid = cp->cand->id;
5740
5741 ivs->bad_uses--;
5742 ivs->cand_for_use[uid] = cp;
5743 ivs->n_cand_uses[cid]++;
5744 if (ivs->n_cand_uses[cid] == 1)
5745 {
5746 bitmap_set_bit (ivs->cands, cid);
5747 /* Do not count the pseudocandidates. */
5748 if (cp->cand->iv)
5749 ivs->n_regs++;
5750 ivs->n_cands++;
5751 ivs->cand_cost += cp->cand->cost;
5752
5753 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5754 }
5755
5756 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5757 iv_ca_set_add_invariants (ivs, cp->depends_on);
5758
5759 if (cp->inv_expr_id != -1)
5760 {
5761 ivs->used_inv_expr[cp->inv_expr_id]++;
5762 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5763 ivs->num_used_inv_expr++;
5764 }
5765 iv_ca_recount_cost (data, ivs);
5766 }
5767 }
5768
5769 /* Extend set IVS by expressing USE by some of the candidates in it
5770 if possible. Consider all important candidates if candidates in
5771 set IVS don't give any result. */
5772
5773 static void
5774 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5775 struct iv_use *use)
5776 {
5777 struct cost_pair *best_cp = NULL, *cp;
5778 bitmap_iterator bi;
5779 unsigned i;
5780 struct iv_cand *cand;
5781
5782 gcc_assert (ivs->upto >= use->id);
5783 ivs->upto++;
5784 ivs->bad_uses++;
5785
5786 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5787 {
5788 cand = iv_cand (data, i);
5789 cp = get_use_iv_cost (data, use, cand);
5790 if (cheaper_cost_pair (cp, best_cp))
5791 best_cp = cp;
5792 }
5793
5794 if (best_cp == NULL)
5795 {
5796 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5797 {
5798 cand = iv_cand (data, i);
5799 cp = get_use_iv_cost (data, use, cand);
5800 if (cheaper_cost_pair (cp, best_cp))
5801 best_cp = cp;
5802 }
5803 }
5804
5805 iv_ca_set_cp (data, ivs, use, best_cp);
5806 }
5807
5808 /* Get cost for assignment IVS. */
5809
5810 static comp_cost
5811 iv_ca_cost (struct iv_ca *ivs)
5812 {
5813 /* This was a conditional expression but it triggered a bug in
5814 Sun C 5.5. */
5815 if (ivs->bad_uses)
5816 return infinite_cost;
5817 else
5818 return ivs->cost;
5819 }
5820
5821 /* Returns true if all dependences of CP are among invariants in IVS. */
5822
5823 static bool
5824 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5825 {
5826 unsigned i;
5827 bitmap_iterator bi;
5828
5829 if (!cp->depends_on)
5830 return true;
5831
5832 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5833 {
5834 if (ivs->n_invariant_uses[i] == 0)
5835 return false;
5836 }
5837
5838 return true;
5839 }
5840
5841 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5842 it before NEXT_CHANGE. */
5843
5844 static struct iv_ca_delta *
5845 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5846 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5847 {
5848 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5849
5850 change->use = use;
5851 change->old_cp = old_cp;
5852 change->new_cp = new_cp;
5853 change->next_change = next_change;
5854
5855 return change;
5856 }
5857
5858 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5859 are rewritten. */
5860
5861 static struct iv_ca_delta *
5862 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5863 {
5864 struct iv_ca_delta *last;
5865
5866 if (!l2)
5867 return l1;
5868
5869 if (!l1)
5870 return l2;
5871
5872 for (last = l1; last->next_change; last = last->next_change)
5873 continue;
5874 last->next_change = l2;
5875
5876 return l1;
5877 }
5878
5879 /* Reverse the list of changes DELTA, forming the inverse to it. */
5880
5881 static struct iv_ca_delta *
5882 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5883 {
5884 struct iv_ca_delta *act, *next, *prev = NULL;
5885
5886 for (act = delta; act; act = next)
5887 {
5888 next = act->next_change;
5889 act->next_change = prev;
5890 prev = act;
5891
5892 std::swap (act->old_cp, act->new_cp);
5893 }
5894
5895 return prev;
5896 }
5897
5898 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5899 reverted instead. */
5900
5901 static void
5902 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5903 struct iv_ca_delta *delta, bool forward)
5904 {
5905 struct cost_pair *from, *to;
5906 struct iv_ca_delta *act;
5907
5908 if (!forward)
5909 delta = iv_ca_delta_reverse (delta);
5910
5911 for (act = delta; act; act = act->next_change)
5912 {
5913 from = act->old_cp;
5914 to = act->new_cp;
5915 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5916 iv_ca_set_cp (data, ivs, act->use, to);
5917 }
5918
5919 if (!forward)
5920 iv_ca_delta_reverse (delta);
5921 }
5922
5923 /* Returns true if CAND is used in IVS. */
5924
5925 static bool
5926 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5927 {
5928 return ivs->n_cand_uses[cand->id] > 0;
5929 }
5930
5931 /* Returns number of induction variable candidates in the set IVS. */
5932
5933 static unsigned
5934 iv_ca_n_cands (struct iv_ca *ivs)
5935 {
5936 return ivs->n_cands;
5937 }
5938
5939 /* Free the list of changes DELTA. */
5940
5941 static void
5942 iv_ca_delta_free (struct iv_ca_delta **delta)
5943 {
5944 struct iv_ca_delta *act, *next;
5945
5946 for (act = *delta; act; act = next)
5947 {
5948 next = act->next_change;
5949 free (act);
5950 }
5951
5952 *delta = NULL;
5953 }
5954
5955 /* Allocates new iv candidates assignment. */
5956
5957 static struct iv_ca *
5958 iv_ca_new (struct ivopts_data *data)
5959 {
5960 struct iv_ca *nw = XNEW (struct iv_ca);
5961
5962 nw->upto = 0;
5963 nw->bad_uses = 0;
5964 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5965 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5966 nw->cands = BITMAP_ALLOC (NULL);
5967 nw->n_cands = 0;
5968 nw->n_regs = 0;
5969 nw->cand_use_cost = no_cost;
5970 nw->cand_cost = 0;
5971 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5972 nw->cost = no_cost;
5973 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5974 nw->num_used_inv_expr = 0;
5975
5976 return nw;
5977 }
5978
5979 /* Free memory occupied by the set IVS. */
5980
5981 static void
5982 iv_ca_free (struct iv_ca **ivs)
5983 {
5984 free ((*ivs)->cand_for_use);
5985 free ((*ivs)->n_cand_uses);
5986 BITMAP_FREE ((*ivs)->cands);
5987 free ((*ivs)->n_invariant_uses);
5988 free ((*ivs)->used_inv_expr);
5989 free (*ivs);
5990 *ivs = NULL;
5991 }
5992
5993 /* Dumps IVS to FILE. */
5994
5995 static void
5996 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5997 {
5998 const char *pref = " invariants ";
5999 unsigned i;
6000 comp_cost cost = iv_ca_cost (ivs);
6001
6002 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6003 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6004 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6005 bitmap_print (file, ivs->cands, " candidates: ","\n");
6006
6007 for (i = 0; i < ivs->upto; i++)
6008 {
6009 struct iv_use *use = iv_use (data, i);
6010 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6011 if (cp)
6012 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6013 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6014 else
6015 fprintf (file, " use:%d --> ??\n", use->id);
6016 }
6017
6018 for (i = 1; i <= data->max_inv_id; i++)
6019 if (ivs->n_invariant_uses[i])
6020 {
6021 fprintf (file, "%s%d", pref, i);
6022 pref = ", ";
6023 }
6024 fprintf (file, "\n\n");
6025 }
6026
6027 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6028 new set, and store differences in DELTA. Number of induction variables
6029 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6030 the function will try to find a solution with mimimal iv candidates. */
6031
6032 static comp_cost
6033 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6034 struct iv_cand *cand, struct iv_ca_delta **delta,
6035 unsigned *n_ivs, bool min_ncand)
6036 {
6037 unsigned i;
6038 comp_cost cost;
6039 struct iv_use *use;
6040 struct cost_pair *old_cp, *new_cp;
6041
6042 *delta = NULL;
6043 for (i = 0; i < ivs->upto; i++)
6044 {
6045 use = iv_use (data, i);
6046 old_cp = iv_ca_cand_for_use (ivs, use);
6047
6048 if (old_cp
6049 && old_cp->cand == cand)
6050 continue;
6051
6052 new_cp = get_use_iv_cost (data, use, cand);
6053 if (!new_cp)
6054 continue;
6055
6056 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6057 continue;
6058
6059 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6060 continue;
6061
6062 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6063 }
6064
6065 iv_ca_delta_commit (data, ivs, *delta, true);
6066 cost = iv_ca_cost (ivs);
6067 if (n_ivs)
6068 *n_ivs = iv_ca_n_cands (ivs);
6069 iv_ca_delta_commit (data, ivs, *delta, false);
6070
6071 return cost;
6072 }
6073
6074 /* Try narrowing set IVS by removing CAND. Return the cost of
6075 the new set and store the differences in DELTA. START is
6076 the candidate with which we start narrowing. */
6077
6078 static comp_cost
6079 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6080 struct iv_cand *cand, struct iv_cand *start,
6081 struct iv_ca_delta **delta)
6082 {
6083 unsigned i, ci;
6084 struct iv_use *use;
6085 struct cost_pair *old_cp, *new_cp, *cp;
6086 bitmap_iterator bi;
6087 struct iv_cand *cnd;
6088 comp_cost cost, best_cost, acost;
6089
6090 *delta = NULL;
6091 for (i = 0; i < n_iv_uses (data); i++)
6092 {
6093 use = iv_use (data, i);
6094
6095 old_cp = iv_ca_cand_for_use (ivs, use);
6096 if (old_cp->cand != cand)
6097 continue;
6098
6099 best_cost = iv_ca_cost (ivs);
6100 /* Start narrowing with START. */
6101 new_cp = get_use_iv_cost (data, use, start);
6102
6103 if (data->consider_all_candidates)
6104 {
6105 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6106 {
6107 if (ci == cand->id || (start && ci == start->id))
6108 continue;
6109
6110 cnd = iv_cand (data, ci);
6111
6112 cp = get_use_iv_cost (data, use, cnd);
6113 if (!cp)
6114 continue;
6115
6116 iv_ca_set_cp (data, ivs, use, cp);
6117 acost = iv_ca_cost (ivs);
6118
6119 if (compare_costs (acost, best_cost) < 0)
6120 {
6121 best_cost = acost;
6122 new_cp = cp;
6123 }
6124 }
6125 }
6126 else
6127 {
6128 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6129 {
6130 if (ci == cand->id || (start && ci == start->id))
6131 continue;
6132
6133 cnd = iv_cand (data, ci);
6134
6135 cp = get_use_iv_cost (data, use, cnd);
6136 if (!cp)
6137 continue;
6138
6139 iv_ca_set_cp (data, ivs, use, cp);
6140 acost = iv_ca_cost (ivs);
6141
6142 if (compare_costs (acost, best_cost) < 0)
6143 {
6144 best_cost = acost;
6145 new_cp = cp;
6146 }
6147 }
6148 }
6149 /* Restore to old cp for use. */
6150 iv_ca_set_cp (data, ivs, use, old_cp);
6151
6152 if (!new_cp)
6153 {
6154 iv_ca_delta_free (delta);
6155 return infinite_cost;
6156 }
6157
6158 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6159 }
6160
6161 iv_ca_delta_commit (data, ivs, *delta, true);
6162 cost = iv_ca_cost (ivs);
6163 iv_ca_delta_commit (data, ivs, *delta, false);
6164
6165 return cost;
6166 }
6167
6168 /* Try optimizing the set of candidates IVS by removing candidates different
6169 from to EXCEPT_CAND from it. Return cost of the new set, and store
6170 differences in DELTA. */
6171
6172 static comp_cost
6173 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6174 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6175 {
6176 bitmap_iterator bi;
6177 struct iv_ca_delta *act_delta, *best_delta;
6178 unsigned i;
6179 comp_cost best_cost, acost;
6180 struct iv_cand *cand;
6181
6182 best_delta = NULL;
6183 best_cost = iv_ca_cost (ivs);
6184
6185 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6186 {
6187 cand = iv_cand (data, i);
6188
6189 if (cand == except_cand)
6190 continue;
6191
6192 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6193
6194 if (compare_costs (acost, best_cost) < 0)
6195 {
6196 best_cost = acost;
6197 iv_ca_delta_free (&best_delta);
6198 best_delta = act_delta;
6199 }
6200 else
6201 iv_ca_delta_free (&act_delta);
6202 }
6203
6204 if (!best_delta)
6205 {
6206 *delta = NULL;
6207 return best_cost;
6208 }
6209
6210 /* Recurse to possibly remove other unnecessary ivs. */
6211 iv_ca_delta_commit (data, ivs, best_delta, true);
6212 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6213 iv_ca_delta_commit (data, ivs, best_delta, false);
6214 *delta = iv_ca_delta_join (best_delta, *delta);
6215 return best_cost;
6216 }
6217
6218 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6219 cheaper local cost for USE than BEST_CP. Return pointer to
6220 the corresponding cost_pair, otherwise just return BEST_CP. */
6221
6222 static struct cost_pair*
6223 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6224 unsigned int cand_idx, struct iv_cand *old_cand,
6225 struct cost_pair *best_cp)
6226 {
6227 struct iv_cand *cand;
6228 struct cost_pair *cp;
6229
6230 gcc_assert (old_cand != NULL && best_cp != NULL);
6231 if (cand_idx == old_cand->id)
6232 return best_cp;
6233
6234 cand = iv_cand (data, cand_idx);
6235 cp = get_use_iv_cost (data, use, cand);
6236 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6237 return cp;
6238
6239 return best_cp;
6240 }
6241
6242 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6243 which are used by more than one iv uses. For each of those candidates,
6244 this function tries to represent iv uses under that candidate using
6245 other ones with lower local cost, then tries to prune the new set.
6246 If the new set has lower cost, It returns the new cost after recording
6247 candidate replacement in list DELTA. */
6248
6249 static comp_cost
6250 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6251 struct iv_ca_delta **delta)
6252 {
6253 bitmap_iterator bi, bj;
6254 unsigned int i, j, k;
6255 struct iv_use *use;
6256 struct iv_cand *cand;
6257 comp_cost orig_cost, acost;
6258 struct iv_ca_delta *act_delta, *tmp_delta;
6259 struct cost_pair *old_cp, *best_cp = NULL;
6260
6261 *delta = NULL;
6262 orig_cost = iv_ca_cost (ivs);
6263
6264 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6265 {
6266 if (ivs->n_cand_uses[i] == 1
6267 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6268 continue;
6269
6270 cand = iv_cand (data, i);
6271
6272 act_delta = NULL;
6273 /* Represent uses under current candidate using other ones with
6274 lower local cost. */
6275 for (j = 0; j < ivs->upto; j++)
6276 {
6277 use = iv_use (data, j);
6278 old_cp = iv_ca_cand_for_use (ivs, use);
6279
6280 if (old_cp->cand != cand)
6281 continue;
6282
6283 best_cp = old_cp;
6284 if (data->consider_all_candidates)
6285 for (k = 0; k < n_iv_cands (data); k++)
6286 best_cp = cheaper_cost_with_cand (data, use, k,
6287 old_cp->cand, best_cp);
6288 else
6289 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6290 best_cp = cheaper_cost_with_cand (data, use, k,
6291 old_cp->cand, best_cp);
6292
6293 if (best_cp == old_cp)
6294 continue;
6295
6296 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6297 }
6298 /* No need for further prune. */
6299 if (!act_delta)
6300 continue;
6301
6302 /* Prune the new candidate set. */
6303 iv_ca_delta_commit (data, ivs, act_delta, true);
6304 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6305 iv_ca_delta_commit (data, ivs, act_delta, false);
6306 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6307
6308 if (compare_costs (acost, orig_cost) < 0)
6309 {
6310 *delta = act_delta;
6311 return acost;
6312 }
6313 else
6314 iv_ca_delta_free (&act_delta);
6315 }
6316
6317 return orig_cost;
6318 }
6319
6320 /* Tries to extend the sets IVS in the best possible way in order
6321 to express the USE. If ORIGINALP is true, prefer candidates from
6322 the original set of IVs, otherwise favor important candidates not
6323 based on any memory object. */
6324
6325 static bool
6326 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6327 struct iv_use *use, bool originalp)
6328 {
6329 comp_cost best_cost, act_cost;
6330 unsigned i;
6331 bitmap_iterator bi;
6332 struct iv_cand *cand;
6333 struct iv_ca_delta *best_delta = NULL, *act_delta;
6334 struct cost_pair *cp;
6335
6336 iv_ca_add_use (data, ivs, use);
6337 best_cost = iv_ca_cost (ivs);
6338 cp = iv_ca_cand_for_use (ivs, use);
6339 if (cp)
6340 {
6341 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6342 iv_ca_set_no_cp (data, ivs, use);
6343 }
6344
6345 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6346 first try important candidates not based on any memory object. Only if
6347 this fails, try the specific ones. Rationale -- in loops with many
6348 variables the best choice often is to use just one generic biv. If we
6349 added here many ivs specific to the uses, the optimization algorithm later
6350 would be likely to get stuck in a local minimum, thus causing us to create
6351 too many ivs. The approach from few ivs to more seems more likely to be
6352 successful -- starting from few ivs, replacing an expensive use by a
6353 specific iv should always be a win. */
6354 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6355 {
6356 cand = iv_cand (data, i);
6357
6358 if (originalp && cand->pos !=IP_ORIGINAL)
6359 continue;
6360
6361 if (!originalp && cand->iv->base_object != NULL_TREE)
6362 continue;
6363
6364 if (iv_ca_cand_used_p (ivs, cand))
6365 continue;
6366
6367 cp = get_use_iv_cost (data, use, cand);
6368 if (!cp)
6369 continue;
6370
6371 iv_ca_set_cp (data, ivs, use, cp);
6372 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6373 true);
6374 iv_ca_set_no_cp (data, ivs, use);
6375 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6376
6377 if (compare_costs (act_cost, best_cost) < 0)
6378 {
6379 best_cost = act_cost;
6380
6381 iv_ca_delta_free (&best_delta);
6382 best_delta = act_delta;
6383 }
6384 else
6385 iv_ca_delta_free (&act_delta);
6386 }
6387
6388 if (infinite_cost_p (best_cost))
6389 {
6390 for (i = 0; i < use->n_map_members; i++)
6391 {
6392 cp = use->cost_map + i;
6393 cand = cp->cand;
6394 if (!cand)
6395 continue;
6396
6397 /* Already tried this. */
6398 if (cand->important)
6399 {
6400 if (originalp && cand->pos == IP_ORIGINAL)
6401 continue;
6402 if (!originalp && cand->iv->base_object == NULL_TREE)
6403 continue;
6404 }
6405
6406 if (iv_ca_cand_used_p (ivs, cand))
6407 continue;
6408
6409 act_delta = NULL;
6410 iv_ca_set_cp (data, ivs, use, cp);
6411 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6412 iv_ca_set_no_cp (data, ivs, use);
6413 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6414 cp, act_delta);
6415
6416 if (compare_costs (act_cost, best_cost) < 0)
6417 {
6418 best_cost = act_cost;
6419
6420 if (best_delta)
6421 iv_ca_delta_free (&best_delta);
6422 best_delta = act_delta;
6423 }
6424 else
6425 iv_ca_delta_free (&act_delta);
6426 }
6427 }
6428
6429 iv_ca_delta_commit (data, ivs, best_delta, true);
6430 iv_ca_delta_free (&best_delta);
6431
6432 return !infinite_cost_p (best_cost);
6433 }
6434
6435 /* Finds an initial assignment of candidates to uses. */
6436
6437 static struct iv_ca *
6438 get_initial_solution (struct ivopts_data *data, bool originalp)
6439 {
6440 struct iv_ca *ivs = iv_ca_new (data);
6441 unsigned i;
6442
6443 for (i = 0; i < n_iv_uses (data); i++)
6444 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6445 {
6446 iv_ca_free (&ivs);
6447 return NULL;
6448 }
6449
6450 return ivs;
6451 }
6452
6453 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6454 points to a bool variable, this function tries to break local
6455 optimal fixed-point by replacing candidates in IVS if it's true. */
6456
6457 static bool
6458 try_improve_iv_set (struct ivopts_data *data,
6459 struct iv_ca *ivs, bool *try_replace_p)
6460 {
6461 unsigned i, n_ivs;
6462 comp_cost acost, best_cost = iv_ca_cost (ivs);
6463 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6464 struct iv_cand *cand;
6465
6466 /* Try extending the set of induction variables by one. */
6467 for (i = 0; i < n_iv_cands (data); i++)
6468 {
6469 cand = iv_cand (data, i);
6470
6471 if (iv_ca_cand_used_p (ivs, cand))
6472 continue;
6473
6474 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6475 if (!act_delta)
6476 continue;
6477
6478 /* If we successfully added the candidate and the set is small enough,
6479 try optimizing it by removing other candidates. */
6480 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6481 {
6482 iv_ca_delta_commit (data, ivs, act_delta, true);
6483 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6484 iv_ca_delta_commit (data, ivs, act_delta, false);
6485 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6486 }
6487
6488 if (compare_costs (acost, best_cost) < 0)
6489 {
6490 best_cost = acost;
6491 iv_ca_delta_free (&best_delta);
6492 best_delta = act_delta;
6493 }
6494 else
6495 iv_ca_delta_free (&act_delta);
6496 }
6497
6498 if (!best_delta)
6499 {
6500 /* Try removing the candidates from the set instead. */
6501 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6502
6503 if (!best_delta && *try_replace_p)
6504 {
6505 *try_replace_p = false;
6506 /* So far candidate selecting algorithm tends to choose fewer IVs
6507 so that it can handle cases in which loops have many variables
6508 but the best choice is often to use only one general biv. One
6509 weakness is it can't handle opposite cases, in which different
6510 candidates should be chosen with respect to each use. To solve
6511 the problem, we replace candidates in a manner described by the
6512 comments of iv_ca_replace, thus give general algorithm a chance
6513 to break local optimal fixed-point in these cases. */
6514 best_cost = iv_ca_replace (data, ivs, &best_delta);
6515 }
6516
6517 if (!best_delta)
6518 return false;
6519 }
6520
6521 iv_ca_delta_commit (data, ivs, best_delta, true);
6522 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6523 iv_ca_delta_free (&best_delta);
6524 return true;
6525 }
6526
6527 /* Attempts to find the optimal set of induction variables. We do simple
6528 greedy heuristic -- we try to replace at most one candidate in the selected
6529 solution and remove the unused ivs while this improves the cost. */
6530
6531 static struct iv_ca *
6532 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6533 {
6534 struct iv_ca *set;
6535 bool try_replace_p = true;
6536
6537 /* Get the initial solution. */
6538 set = get_initial_solution (data, originalp);
6539 if (!set)
6540 {
6541 if (dump_file && (dump_flags & TDF_DETAILS))
6542 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6543 return NULL;
6544 }
6545
6546 if (dump_file && (dump_flags & TDF_DETAILS))
6547 {
6548 fprintf (dump_file, "Initial set of candidates:\n");
6549 iv_ca_dump (data, dump_file, set);
6550 }
6551
6552 while (try_improve_iv_set (data, set, &try_replace_p))
6553 {
6554 if (dump_file && (dump_flags & TDF_DETAILS))
6555 {
6556 fprintf (dump_file, "Improved to:\n");
6557 iv_ca_dump (data, dump_file, set);
6558 }
6559 }
6560
6561 return set;
6562 }
6563
6564 static struct iv_ca *
6565 find_optimal_iv_set (struct ivopts_data *data)
6566 {
6567 unsigned i;
6568 struct iv_ca *set, *origset;
6569 struct iv_use *use;
6570 comp_cost cost, origcost;
6571
6572 /* Determine the cost based on a strategy that starts with original IVs,
6573 and try again using a strategy that prefers candidates not based
6574 on any IVs. */
6575 origset = find_optimal_iv_set_1 (data, true);
6576 set = find_optimal_iv_set_1 (data, false);
6577
6578 if (!origset && !set)
6579 return NULL;
6580
6581 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6582 cost = set ? iv_ca_cost (set) : infinite_cost;
6583
6584 if (dump_file && (dump_flags & TDF_DETAILS))
6585 {
6586 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6587 origcost.cost, origcost.complexity);
6588 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6589 cost.cost, cost.complexity);
6590 }
6591
6592 /* Choose the one with the best cost. */
6593 if (compare_costs (origcost, cost) <= 0)
6594 {
6595 if (set)
6596 iv_ca_free (&set);
6597 set = origset;
6598 }
6599 else if (origset)
6600 iv_ca_free (&origset);
6601
6602 for (i = 0; i < n_iv_uses (data); i++)
6603 {
6604 use = iv_use (data, i);
6605 use->selected = iv_ca_cand_for_use (set, use)->cand;
6606 }
6607
6608 return set;
6609 }
6610
6611 /* Creates a new induction variable corresponding to CAND. */
6612
6613 static void
6614 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6615 {
6616 gimple_stmt_iterator incr_pos;
6617 tree base;
6618 bool after = false;
6619
6620 if (!cand->iv)
6621 return;
6622
6623 switch (cand->pos)
6624 {
6625 case IP_NORMAL:
6626 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6627 break;
6628
6629 case IP_END:
6630 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6631 after = true;
6632 break;
6633
6634 case IP_AFTER_USE:
6635 after = true;
6636 /* fall through */
6637 case IP_BEFORE_USE:
6638 incr_pos = gsi_for_stmt (cand->incremented_at);
6639 break;
6640
6641 case IP_ORIGINAL:
6642 /* Mark that the iv is preserved. */
6643 name_info (data, cand->var_before)->preserve_biv = true;
6644 name_info (data, cand->var_after)->preserve_biv = true;
6645
6646 /* Rewrite the increment so that it uses var_before directly. */
6647 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6648 return;
6649 }
6650
6651 gimple_add_tmp_var (cand->var_before);
6652
6653 base = unshare_expr (cand->iv->base);
6654
6655 create_iv (base, unshare_expr (cand->iv->step),
6656 cand->var_before, data->current_loop,
6657 &incr_pos, after, &cand->var_before, &cand->var_after);
6658 }
6659
6660 /* Creates new induction variables described in SET. */
6661
6662 static void
6663 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6664 {
6665 unsigned i;
6666 struct iv_cand *cand;
6667 bitmap_iterator bi;
6668
6669 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6670 {
6671 cand = iv_cand (data, i);
6672 create_new_iv (data, cand);
6673 }
6674
6675 if (dump_file && (dump_flags & TDF_DETAILS))
6676 {
6677 fprintf (dump_file, "Selected IV set for loop %d",
6678 data->current_loop->num);
6679 if (data->loop_loc != UNKNOWN_LOCATION)
6680 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6681 LOCATION_LINE (data->loop_loc));
6682 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6683 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6684 {
6685 cand = iv_cand (data, i);
6686 dump_cand (dump_file, cand);
6687 }
6688 fprintf (dump_file, "\n");
6689 }
6690 }
6691
6692 /* Rewrites USE (definition of iv used in a nonlinear expression)
6693 using candidate CAND. */
6694
6695 static void
6696 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6697 struct iv_use *use, struct iv_cand *cand)
6698 {
6699 tree comp;
6700 tree op, tgt;
6701 gassign *ass;
6702 gimple_stmt_iterator bsi;
6703
6704 /* An important special case -- if we are asked to express value of
6705 the original iv by itself, just exit; there is no need to
6706 introduce a new computation (that might also need casting the
6707 variable to unsigned and back). */
6708 if (cand->pos == IP_ORIGINAL
6709 && cand->incremented_at == use->stmt)
6710 {
6711 enum tree_code stmt_code;
6712
6713 gcc_assert (is_gimple_assign (use->stmt));
6714 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6715
6716 /* Check whether we may leave the computation unchanged.
6717 This is the case only if it does not rely on other
6718 computations in the loop -- otherwise, the computation
6719 we rely upon may be removed in remove_unused_ivs,
6720 thus leading to ICE. */
6721 stmt_code = gimple_assign_rhs_code (use->stmt);
6722 if (stmt_code == PLUS_EXPR
6723 || stmt_code == MINUS_EXPR
6724 || stmt_code == POINTER_PLUS_EXPR)
6725 {
6726 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6727 op = gimple_assign_rhs2 (use->stmt);
6728 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6729 op = gimple_assign_rhs1 (use->stmt);
6730 else
6731 op = NULL_TREE;
6732 }
6733 else
6734 op = NULL_TREE;
6735
6736 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6737 return;
6738 }
6739
6740 comp = get_computation (data->current_loop, use, cand);
6741 gcc_assert (comp != NULL_TREE);
6742
6743 switch (gimple_code (use->stmt))
6744 {
6745 case GIMPLE_PHI:
6746 tgt = PHI_RESULT (use->stmt);
6747
6748 /* If we should keep the biv, do not replace it. */
6749 if (name_info (data, tgt)->preserve_biv)
6750 return;
6751
6752 bsi = gsi_after_labels (gimple_bb (use->stmt));
6753 break;
6754
6755 case GIMPLE_ASSIGN:
6756 tgt = gimple_assign_lhs (use->stmt);
6757 bsi = gsi_for_stmt (use->stmt);
6758 break;
6759
6760 default:
6761 gcc_unreachable ();
6762 }
6763
6764 if (!valid_gimple_rhs_p (comp)
6765 || (gimple_code (use->stmt) != GIMPLE_PHI
6766 /* We can't allow re-allocating the stmt as it might be pointed
6767 to still. */
6768 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6769 >= gimple_num_ops (gsi_stmt (bsi)))))
6770 {
6771 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6772 true, GSI_SAME_STMT);
6773 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6774 {
6775 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6776 /* As this isn't a plain copy we have to reset alignment
6777 information. */
6778 if (SSA_NAME_PTR_INFO (comp))
6779 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6780 }
6781 }
6782
6783 if (gimple_code (use->stmt) == GIMPLE_PHI)
6784 {
6785 ass = gimple_build_assign (tgt, comp);
6786 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6787
6788 bsi = gsi_for_stmt (use->stmt);
6789 remove_phi_node (&bsi, false);
6790 }
6791 else
6792 {
6793 gimple_assign_set_rhs_from_tree (&bsi, comp);
6794 use->stmt = gsi_stmt (bsi);
6795 }
6796 }
6797
6798 /* Performs a peephole optimization to reorder the iv update statement with
6799 a mem ref to enable instruction combining in later phases. The mem ref uses
6800 the iv value before the update, so the reordering transformation requires
6801 adjustment of the offset. CAND is the selected IV_CAND.
6802
6803 Example:
6804
6805 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6806 iv2 = iv1 + 1;
6807
6808 if (t < val) (1)
6809 goto L;
6810 goto Head;
6811
6812
6813 directly propagating t over to (1) will introduce overlapping live range
6814 thus increase register pressure. This peephole transform it into:
6815
6816
6817 iv2 = iv1 + 1;
6818 t = MEM_REF (base, iv2, 8, 8);
6819 if (t < val)
6820 goto L;
6821 goto Head;
6822 */
6823
6824 static void
6825 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6826 {
6827 tree var_after;
6828 gimple iv_update, stmt;
6829 basic_block bb;
6830 gimple_stmt_iterator gsi, gsi_iv;
6831
6832 if (cand->pos != IP_NORMAL)
6833 return;
6834
6835 var_after = cand->var_after;
6836 iv_update = SSA_NAME_DEF_STMT (var_after);
6837
6838 bb = gimple_bb (iv_update);
6839 gsi = gsi_last_nondebug_bb (bb);
6840 stmt = gsi_stmt (gsi);
6841
6842 /* Only handle conditional statement for now. */
6843 if (gimple_code (stmt) != GIMPLE_COND)
6844 return;
6845
6846 gsi_prev_nondebug (&gsi);
6847 stmt = gsi_stmt (gsi);
6848 if (stmt != iv_update)
6849 return;
6850
6851 gsi_prev_nondebug (&gsi);
6852 if (gsi_end_p (gsi))
6853 return;
6854
6855 stmt = gsi_stmt (gsi);
6856 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6857 return;
6858
6859 if (stmt != use->stmt)
6860 return;
6861
6862 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6863 return;
6864
6865 if (dump_file && (dump_flags & TDF_DETAILS))
6866 {
6867 fprintf (dump_file, "Reordering \n");
6868 print_gimple_stmt (dump_file, iv_update, 0, 0);
6869 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6870 fprintf (dump_file, "\n");
6871 }
6872
6873 gsi = gsi_for_stmt (use->stmt);
6874 gsi_iv = gsi_for_stmt (iv_update);
6875 gsi_move_before (&gsi_iv, &gsi);
6876
6877 cand->pos = IP_BEFORE_USE;
6878 cand->incremented_at = use->stmt;
6879 }
6880
6881 /* Rewrites USE (address that is an iv) using candidate CAND. */
6882
6883 static void
6884 rewrite_use_address_1 (struct ivopts_data *data,
6885 struct iv_use *use, struct iv_cand *cand)
6886 {
6887 aff_tree aff;
6888 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6889 tree base_hint = NULL_TREE;
6890 tree ref, iv;
6891 bool ok;
6892
6893 adjust_iv_update_pos (cand, use);
6894 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6895 gcc_assert (ok);
6896 unshare_aff_combination (&aff);
6897
6898 /* To avoid undefined overflow problems, all IV candidates use unsigned
6899 integer types. The drawback is that this makes it impossible for
6900 create_mem_ref to distinguish an IV that is based on a memory object
6901 from one that represents simply an offset.
6902
6903 To work around this problem, we pass a hint to create_mem_ref that
6904 indicates which variable (if any) in aff is an IV based on a memory
6905 object. Note that we only consider the candidate. If this is not
6906 based on an object, the base of the reference is in some subexpression
6907 of the use -- but these will use pointer types, so they are recognized
6908 by the create_mem_ref heuristics anyway. */
6909 if (cand->iv->base_object)
6910 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6911
6912 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6913 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6914 reference_alias_ptr_type (*use->op_p),
6915 iv, base_hint, data->speed);
6916 copy_ref_info (ref, *use->op_p);
6917 *use->op_p = ref;
6918 }
6919
6920 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6921 first use of a group, rewrites sub uses in the group too. */
6922
6923 static void
6924 rewrite_use_address (struct ivopts_data *data,
6925 struct iv_use *use, struct iv_cand *cand)
6926 {
6927 struct iv_use *next;
6928
6929 gcc_assert (use->sub_id == 0);
6930 rewrite_use_address_1 (data, use, cand);
6931 update_stmt (use->stmt);
6932
6933 for (next = use->next; next != NULL; next = next->next)
6934 {
6935 rewrite_use_address_1 (data, next, cand);
6936 update_stmt (next->stmt);
6937 }
6938
6939 return;
6940 }
6941
6942 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6943 candidate CAND. */
6944
6945 static void
6946 rewrite_use_compare (struct ivopts_data *data,
6947 struct iv_use *use, struct iv_cand *cand)
6948 {
6949 tree comp, *var_p, op, bound;
6950 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6951 enum tree_code compare;
6952 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6953 bool ok;
6954
6955 bound = cp->value;
6956 if (bound)
6957 {
6958 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6959 tree var_type = TREE_TYPE (var);
6960 gimple_seq stmts;
6961
6962 if (dump_file && (dump_flags & TDF_DETAILS))
6963 {
6964 fprintf (dump_file, "Replacing exit test: ");
6965 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6966 }
6967 compare = cp->comp;
6968 bound = unshare_expr (fold_convert (var_type, bound));
6969 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6970 if (stmts)
6971 gsi_insert_seq_on_edge_immediate (
6972 loop_preheader_edge (data->current_loop),
6973 stmts);
6974
6975 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6976 gimple_cond_set_lhs (cond_stmt, var);
6977 gimple_cond_set_code (cond_stmt, compare);
6978 gimple_cond_set_rhs (cond_stmt, op);
6979 return;
6980 }
6981
6982 /* The induction variable elimination failed; just express the original
6983 giv. */
6984 comp = get_computation (data->current_loop, use, cand);
6985 gcc_assert (comp != NULL_TREE);
6986
6987 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6988 gcc_assert (ok);
6989
6990 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6991 true, GSI_SAME_STMT);
6992 }
6993
6994 /* Rewrites USE using candidate CAND. */
6995
6996 static void
6997 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6998 {
6999 switch (use->type)
7000 {
7001 case USE_NONLINEAR_EXPR:
7002 rewrite_use_nonlinear_expr (data, use, cand);
7003 break;
7004
7005 case USE_ADDRESS:
7006 rewrite_use_address (data, use, cand);
7007 break;
7008
7009 case USE_COMPARE:
7010 rewrite_use_compare (data, use, cand);
7011 break;
7012
7013 default:
7014 gcc_unreachable ();
7015 }
7016
7017 update_stmt (use->stmt);
7018 }
7019
7020 /* Rewrite the uses using the selected induction variables. */
7021
7022 static void
7023 rewrite_uses (struct ivopts_data *data)
7024 {
7025 unsigned i;
7026 struct iv_cand *cand;
7027 struct iv_use *use;
7028
7029 for (i = 0; i < n_iv_uses (data); i++)
7030 {
7031 use = iv_use (data, i);
7032 cand = use->selected;
7033 gcc_assert (cand);
7034
7035 rewrite_use (data, use, cand);
7036 }
7037 }
7038
7039 /* Removes the ivs that are not used after rewriting. */
7040
7041 static void
7042 remove_unused_ivs (struct ivopts_data *data)
7043 {
7044 unsigned j;
7045 bitmap_iterator bi;
7046 bitmap toremove = BITMAP_ALLOC (NULL);
7047
7048 /* Figure out an order in which to release SSA DEFs so that we don't
7049 release something that we'd have to propagate into a debug stmt
7050 afterwards. */
7051 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7052 {
7053 struct version_info *info;
7054
7055 info = ver_info (data, j);
7056 if (info->iv
7057 && !integer_zerop (info->iv->step)
7058 && !info->inv_id
7059 && !info->iv->have_use_for
7060 && !info->preserve_biv)
7061 {
7062 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7063
7064 tree def = info->iv->ssa_name;
7065
7066 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7067 {
7068 imm_use_iterator imm_iter;
7069 use_operand_p use_p;
7070 gimple stmt;
7071 int count = 0;
7072
7073 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7074 {
7075 if (!gimple_debug_bind_p (stmt))
7076 continue;
7077
7078 /* We just want to determine whether to do nothing
7079 (count == 0), to substitute the computed
7080 expression into a single use of the SSA DEF by
7081 itself (count == 1), or to use a debug temp
7082 because the SSA DEF is used multiple times or as
7083 part of a larger expression (count > 1). */
7084 count++;
7085 if (gimple_debug_bind_get_value (stmt) != def)
7086 count++;
7087
7088 if (count > 1)
7089 BREAK_FROM_IMM_USE_STMT (imm_iter);
7090 }
7091
7092 if (!count)
7093 continue;
7094
7095 struct iv_use dummy_use;
7096 struct iv_cand *best_cand = NULL, *cand;
7097 unsigned i, best_pref = 0, cand_pref;
7098
7099 memset (&dummy_use, 0, sizeof (dummy_use));
7100 dummy_use.iv = info->iv;
7101 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7102 {
7103 cand = iv_use (data, i)->selected;
7104 if (cand == best_cand)
7105 continue;
7106 cand_pref = operand_equal_p (cand->iv->step,
7107 info->iv->step, 0)
7108 ? 4 : 0;
7109 cand_pref
7110 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7111 == TYPE_MODE (TREE_TYPE (info->iv->base))
7112 ? 2 : 0;
7113 cand_pref
7114 += TREE_CODE (cand->iv->base) == INTEGER_CST
7115 ? 1 : 0;
7116 if (best_cand == NULL || best_pref < cand_pref)
7117 {
7118 best_cand = cand;
7119 best_pref = cand_pref;
7120 }
7121 }
7122
7123 if (!best_cand)
7124 continue;
7125
7126 tree comp = get_computation_at (data->current_loop,
7127 &dummy_use, best_cand,
7128 SSA_NAME_DEF_STMT (def));
7129 if (!comp)
7130 continue;
7131
7132 if (count > 1)
7133 {
7134 tree vexpr = make_node (DEBUG_EXPR_DECL);
7135 DECL_ARTIFICIAL (vexpr) = 1;
7136 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7137 if (SSA_NAME_VAR (def))
7138 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7139 else
7140 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7141 gdebug *def_temp
7142 = gimple_build_debug_bind (vexpr, comp, NULL);
7143 gimple_stmt_iterator gsi;
7144
7145 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7146 gsi = gsi_after_labels (gimple_bb
7147 (SSA_NAME_DEF_STMT (def)));
7148 else
7149 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7150
7151 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7152 comp = vexpr;
7153 }
7154
7155 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7156 {
7157 if (!gimple_debug_bind_p (stmt))
7158 continue;
7159
7160 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7161 SET_USE (use_p, comp);
7162
7163 update_stmt (stmt);
7164 }
7165 }
7166 }
7167 }
7168
7169 release_defs_bitset (toremove);
7170
7171 BITMAP_FREE (toremove);
7172 }
7173
7174 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7175 for hash_map::traverse. */
7176
7177 bool
7178 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7179 {
7180 free (value);
7181 return true;
7182 }
7183
7184 /* Frees data allocated by the optimization of a single loop. */
7185
7186 static void
7187 free_loop_data (struct ivopts_data *data)
7188 {
7189 unsigned i, j;
7190 bitmap_iterator bi;
7191 tree obj;
7192
7193 if (data->niters)
7194 {
7195 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7196 delete data->niters;
7197 data->niters = NULL;
7198 }
7199
7200 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7201 {
7202 struct version_info *info;
7203
7204 info = ver_info (data, i);
7205 free (info->iv);
7206 info->iv = NULL;
7207 info->has_nonlin_use = false;
7208 info->preserve_biv = false;
7209 info->inv_id = 0;
7210 }
7211 bitmap_clear (data->relevant);
7212 bitmap_clear (data->important_candidates);
7213
7214 for (i = 0; i < n_iv_uses (data); i++)
7215 {
7216 struct iv_use *use = iv_use (data, i);
7217 struct iv_use *pre = use, *sub = use->next;
7218
7219 while (sub)
7220 {
7221 gcc_assert (sub->related_cands == NULL);
7222 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7223
7224 free (sub->iv);
7225 pre = sub;
7226 sub = sub->next;
7227 free (pre);
7228 }
7229
7230 free (use->iv);
7231 BITMAP_FREE (use->related_cands);
7232 for (j = 0; j < use->n_map_members; j++)
7233 if (use->cost_map[j].depends_on)
7234 BITMAP_FREE (use->cost_map[j].depends_on);
7235 free (use->cost_map);
7236 free (use);
7237 }
7238 data->iv_uses.truncate (0);
7239
7240 for (i = 0; i < n_iv_cands (data); i++)
7241 {
7242 struct iv_cand *cand = iv_cand (data, i);
7243
7244 free (cand->iv);
7245 if (cand->depends_on)
7246 BITMAP_FREE (cand->depends_on);
7247 free (cand);
7248 }
7249 data->iv_candidates.truncate (0);
7250
7251 if (data->version_info_size < num_ssa_names)
7252 {
7253 data->version_info_size = 2 * num_ssa_names;
7254 free (data->version_info);
7255 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7256 }
7257
7258 data->max_inv_id = 0;
7259
7260 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7261 SET_DECL_RTL (obj, NULL_RTX);
7262
7263 decl_rtl_to_reset.truncate (0);
7264
7265 data->inv_expr_tab->empty ();
7266 data->inv_expr_id = 0;
7267 }
7268
7269 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7270 loop tree. */
7271
7272 static void
7273 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7274 {
7275 free_loop_data (data);
7276 free (data->version_info);
7277 BITMAP_FREE (data->relevant);
7278 BITMAP_FREE (data->important_candidates);
7279
7280 decl_rtl_to_reset.release ();
7281 data->iv_uses.release ();
7282 data->iv_candidates.release ();
7283 delete data->inv_expr_tab;
7284 data->inv_expr_tab = NULL;
7285 free_affine_expand_cache (&data->name_expansion_cache);
7286 }
7287
7288 /* Returns true if the loop body BODY includes any function calls. */
7289
7290 static bool
7291 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7292 {
7293 gimple_stmt_iterator gsi;
7294 unsigned i;
7295
7296 for (i = 0; i < num_nodes; i++)
7297 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7298 {
7299 gimple stmt = gsi_stmt (gsi);
7300 if (is_gimple_call (stmt)
7301 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7302 return true;
7303 }
7304 return false;
7305 }
7306
7307 /* Optimizes the LOOP. Returns true if anything changed. */
7308
7309 static bool
7310 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7311 {
7312 bool changed = false;
7313 struct iv_ca *iv_ca;
7314 edge exit = single_dom_exit (loop);
7315 basic_block *body;
7316
7317 gcc_assert (!data->niters);
7318 data->current_loop = loop;
7319 data->loop_loc = find_loop_location (loop);
7320 data->speed = optimize_loop_for_speed_p (loop);
7321
7322 if (dump_file && (dump_flags & TDF_DETAILS))
7323 {
7324 fprintf (dump_file, "Processing loop %d", loop->num);
7325 if (data->loop_loc != UNKNOWN_LOCATION)
7326 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7327 LOCATION_LINE (data->loop_loc));
7328 fprintf (dump_file, "\n");
7329
7330 if (exit)
7331 {
7332 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7333 exit->src->index, exit->dest->index);
7334 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7335 fprintf (dump_file, "\n");
7336 }
7337
7338 fprintf (dump_file, "\n");
7339 }
7340
7341 body = get_loop_body (loop);
7342 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7343 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7344 free (body);
7345
7346 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7347
7348 /* For each ssa name determines whether it behaves as an induction variable
7349 in some loop. */
7350 if (!find_induction_variables (data))
7351 goto finish;
7352
7353 /* Finds interesting uses (item 1). */
7354 find_interesting_uses (data);
7355 group_address_uses (data);
7356 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7357 goto finish;
7358
7359 /* Finds candidates for the induction variables (item 2). */
7360 find_iv_candidates (data);
7361
7362 /* Calculates the costs (item 3, part 1). */
7363 determine_iv_costs (data);
7364 determine_use_iv_costs (data);
7365 determine_set_costs (data);
7366
7367 /* Find the optimal set of induction variables (item 3, part 2). */
7368 iv_ca = find_optimal_iv_set (data);
7369 if (!iv_ca)
7370 goto finish;
7371 changed = true;
7372
7373 /* Create the new induction variables (item 4, part 1). */
7374 create_new_ivs (data, iv_ca);
7375 iv_ca_free (&iv_ca);
7376
7377 /* Rewrite the uses (item 4, part 2). */
7378 rewrite_uses (data);
7379
7380 /* Remove the ivs that are unused after rewriting. */
7381 remove_unused_ivs (data);
7382
7383 /* We have changed the structure of induction variables; it might happen
7384 that definitions in the scev database refer to some of them that were
7385 eliminated. */
7386 scev_reset ();
7387
7388 finish:
7389 free_loop_data (data);
7390
7391 return changed;
7392 }
7393
7394 /* Main entry point. Optimizes induction variables in loops. */
7395
7396 void
7397 tree_ssa_iv_optimize (void)
7398 {
7399 struct loop *loop;
7400 struct ivopts_data data;
7401
7402 tree_ssa_iv_optimize_init (&data);
7403
7404 /* Optimize the loops starting with the innermost ones. */
7405 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7406 {
7407 if (dump_file && (dump_flags & TDF_DETAILS))
7408 flow_loop_dump (loop, dump_file, NULL, 1);
7409
7410 tree_ssa_iv_optimize_loop (&data, loop);
7411 }
7412
7413 tree_ssa_iv_optimize_finalize (&data);
7414 }