tree-ssa-loop-ivopts.c (struct ivopts_data): New field iv_obstack.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "alias.h"
69 #include "symtab.h"
70 #include "tree.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
73 #include "tm_p.h"
74 #include "predict.h"
75 #include "hard-reg-set.h"
76 #include "function.h"
77 #include "dominance.h"
78 #include "cfg.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
90 #include "cgraph.h"
91 #include "tree-cfg.h"
92 #include "tree-phinodes.h"
93 #include "ssa-iterators.h"
94 #include "stringpool.h"
95 #include "tree-ssanames.h"
96 #include "tree-ssa-loop-ivopts.h"
97 #include "tree-ssa-loop-manip.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
100 #include "rtl.h"
101 #include "flags.h"
102 #include "insn-config.h"
103 #include "expmed.h"
104 #include "dojump.h"
105 #include "explow.h"
106 #include "calls.h"
107 #include "emit-rtl.h"
108 #include "varasm.h"
109 #include "stmt.h"
110 #include "expr.h"
111 #include "tree-dfa.h"
112 #include "tree-ssa.h"
113 #include "cfgloop.h"
114 #include "tree-pass.h"
115 #include "tree-chrec.h"
116 #include "tree-scalar-evolution.h"
117 #include "params.h"
118 #include "langhooks.h"
119 #include "tree-affine.h"
120 #include "target.h"
121 #include "tree-inline.h"
122 #include "tree-ssa-propagate.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125 #include "tree-vectorizer.h"
126
127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
128 cost of different addressing modes. This should be moved to a TBD
129 interface between the GIMPLE and RTL worlds. */
130 #include "recog.h"
131
132 /* The infinite cost. */
133 #define INFTY 10000000
134
135 #define AVG_LOOP_NITER(LOOP) 5
136
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
140
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
143 {
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
147
148 return niter;
149 }
150
151 /* Representation of the induction variable. */
152 struct iv
153 {
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 unsigned use_id; /* The identifier in the use if it is the case. */
159 bool biv_p; /* Is it a biv? */
160 bool have_use_for; /* Do we already have a use for it? */
161 bool no_overflow; /* True if the iv doesn't overflow. */
162 };
163
164 /* Per-ssa version information (induction variable descriptions, etc.). */
165 struct version_info
166 {
167 tree name; /* The ssa name. */
168 struct iv *iv; /* Induction variable description. */
169 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
170 an expression that is not an induction variable. */
171 bool preserve_biv; /* For the original biv, whether to preserve it. */
172 unsigned inv_id; /* Id of an invariant. */
173 };
174
175 /* Types of uses. */
176 enum use_type
177 {
178 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
179 USE_ADDRESS, /* Use in an address. */
180 USE_COMPARE /* Use is a compare. */
181 };
182
183 /* Cost of a computation. */
184 typedef struct
185 {
186 int cost; /* The runtime cost. */
187 unsigned complexity; /* The estimate of the complexity of the code for
188 the computation (in no concrete units --
189 complexity field should be larger for more
190 complex expressions and addressing modes). */
191 } comp_cost;
192
193 static const comp_cost no_cost = {0, 0};
194 static const comp_cost infinite_cost = {INFTY, INFTY};
195
196 /* The candidate - cost pair. */
197 struct cost_pair
198 {
199 struct iv_cand *cand; /* The candidate. */
200 comp_cost cost; /* The cost. */
201 bitmap depends_on; /* The list of invariants that have to be
202 preserved. */
203 tree value; /* For final value elimination, the expression for
204 the final value of the iv. For iv elimination,
205 the new bound to compare with. */
206 enum tree_code comp; /* For iv elimination, the comparison. */
207 int inv_expr_id; /* Loop invariant expression id. */
208 };
209
210 /* Use. */
211 struct iv_use
212 {
213 unsigned id; /* The id of the use. */
214 unsigned sub_id; /* The id of the sub use. */
215 enum use_type type; /* Type of the use. */
216 struct iv *iv; /* The induction variable it is based on. */
217 gimple stmt; /* Statement in that it occurs. */
218 tree *op_p; /* The place where it occurs. */
219 bitmap related_cands; /* The set of "related" iv candidates, plus the common
220 important ones. */
221
222 unsigned n_map_members; /* Number of candidates in the cost_map list. */
223 struct cost_pair *cost_map;
224 /* The costs wrto the iv candidates. */
225
226 struct iv_cand *selected;
227 /* The selected candidate. */
228
229 struct iv_use *next; /* The next sub use. */
230 tree addr_base; /* Base address with const offset stripped. */
231 unsigned HOST_WIDE_INT addr_offset;
232 /* Const offset stripped from base address. */
233 };
234
235 /* The position where the iv is computed. */
236 enum iv_position
237 {
238 IP_NORMAL, /* At the end, just before the exit condition. */
239 IP_END, /* At the end of the latch block. */
240 IP_BEFORE_USE, /* Immediately before a specific use. */
241 IP_AFTER_USE, /* Immediately after a specific use. */
242 IP_ORIGINAL /* The original biv. */
243 };
244
245 /* The induction variable candidate. */
246 struct iv_cand
247 {
248 unsigned id; /* The number of the candidate. */
249 bool important; /* Whether this is an "important" candidate, i.e. such
250 that it should be considered by all uses. */
251 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
252 gimple incremented_at;/* For original biv, the statement where it is
253 incremented. */
254 tree var_before; /* The variable used for it before increment. */
255 tree var_after; /* The variable used for it after increment. */
256 struct iv *iv; /* The value of the candidate. NULL for
257 "pseudocandidate" used to indicate the possibility
258 to replace the final value of an iv by direct
259 computation of the value. */
260 unsigned cost; /* Cost of the candidate. */
261 unsigned cost_step; /* Cost of the candidate's increment operation. */
262 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
263 where it is incremented. */
264 bitmap depends_on; /* The list of invariants that are used in step of the
265 biv. */
266 };
267
268 /* Loop invariant expression hashtable entry. */
269 struct iv_inv_expr_ent
270 {
271 tree expr;
272 int id;
273 hashval_t hash;
274 };
275
276 /* The data used by the induction variable optimizations. */
277
278 typedef struct iv_use *iv_use_p;
279
280 typedef struct iv_cand *iv_cand_p;
281
282 /* Hashtable helpers. */
283
284 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
285 {
286 static inline hashval_t hash (const iv_inv_expr_ent *);
287 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
288 };
289
290 /* Hash function for loop invariant expressions. */
291
292 inline hashval_t
293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
294 {
295 return expr->hash;
296 }
297
298 /* Hash table equality function for expressions. */
299
300 inline bool
301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
302 const iv_inv_expr_ent *expr2)
303 {
304 return expr1->hash == expr2->hash
305 && operand_equal_p (expr1->expr, expr2->expr, 0);
306 }
307
308 struct ivopts_data
309 {
310 /* The currently optimized loop. */
311 struct loop *current_loop;
312 source_location loop_loc;
313
314 /* Numbers of iterations for all exits of the current loop. */
315 hash_map<edge, tree_niter_desc *> *niters;
316
317 /* Number of registers used in it. */
318 unsigned regs_used;
319
320 /* The size of version_info array allocated. */
321 unsigned version_info_size;
322
323 /* The array of information for the ssa names. */
324 struct version_info *version_info;
325
326 /* The hashtable of loop invariant expressions created
327 by ivopt. */
328 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
329
330 /* Loop invariant expression id. */
331 int inv_expr_id;
332
333 /* The bitmap of indices in version_info whose value was changed. */
334 bitmap relevant;
335
336 /* The uses of induction variables. */
337 vec<iv_use_p> iv_uses;
338
339 /* The candidates. */
340 vec<iv_cand_p> iv_candidates;
341
342 /* A bitmap of important candidates. */
343 bitmap important_candidates;
344
345 /* Cache used by tree_to_aff_combination_expand. */
346 hash_map<tree, name_expansion *> *name_expansion_cache;
347
348 /* The maximum invariant id. */
349 unsigned max_inv_id;
350
351 /* Obstack for iv structure. */
352 struct obstack iv_obstack;
353
354 /* Whether to consider just related and important candidates when replacing a
355 use. */
356 bool consider_all_candidates;
357
358 /* Are we optimizing for speed? */
359 bool speed;
360
361 /* Whether the loop body includes any function calls. */
362 bool body_includes_call;
363
364 /* Whether the loop body can only be exited via single exit. */
365 bool loop_single_exit_p;
366 };
367
368 /* An assignment of iv candidates to uses. */
369
370 struct iv_ca
371 {
372 /* The number of uses covered by the assignment. */
373 unsigned upto;
374
375 /* Number of uses that cannot be expressed by the candidates in the set. */
376 unsigned bad_uses;
377
378 /* Candidate assigned to a use, together with the related costs. */
379 struct cost_pair **cand_for_use;
380
381 /* Number of times each candidate is used. */
382 unsigned *n_cand_uses;
383
384 /* The candidates used. */
385 bitmap cands;
386
387 /* The number of candidates in the set. */
388 unsigned n_cands;
389
390 /* Total number of registers needed. */
391 unsigned n_regs;
392
393 /* Total cost of expressing uses. */
394 comp_cost cand_use_cost;
395
396 /* Total cost of candidates. */
397 unsigned cand_cost;
398
399 /* Number of times each invariant is used. */
400 unsigned *n_invariant_uses;
401
402 /* The array holding the number of uses of each loop
403 invariant expressions created by ivopt. */
404 unsigned *used_inv_expr;
405
406 /* The number of created loop invariants. */
407 unsigned num_used_inv_expr;
408
409 /* Total cost of the assignment. */
410 comp_cost cost;
411 };
412
413 /* Difference of two iv candidate assignments. */
414
415 struct iv_ca_delta
416 {
417 /* Changed use. */
418 struct iv_use *use;
419
420 /* An old assignment (for rollback purposes). */
421 struct cost_pair *old_cp;
422
423 /* A new assignment. */
424 struct cost_pair *new_cp;
425
426 /* Next change in the list. */
427 struct iv_ca_delta *next_change;
428 };
429
430 /* Bound on number of candidates below that all candidates are considered. */
431
432 #define CONSIDER_ALL_CANDIDATES_BOUND \
433 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
434
435 /* If there are more iv occurrences, we just give up (it is quite unlikely that
436 optimizing such a loop would help, and it would take ages). */
437
438 #define MAX_CONSIDERED_USES \
439 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
440
441 /* If there are at most this number of ivs in the set, try removing unnecessary
442 ivs from the set always. */
443
444 #define ALWAYS_PRUNE_CAND_SET_BOUND \
445 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
446
447 /* The list of trees for that the decl_rtl field must be reset is stored
448 here. */
449
450 static vec<tree> decl_rtl_to_reset;
451
452 static comp_cost force_expr_to_var_cost (tree, bool);
453
454 /* Number of uses recorded in DATA. */
455
456 static inline unsigned
457 n_iv_uses (struct ivopts_data *data)
458 {
459 return data->iv_uses.length ();
460 }
461
462 /* Ith use recorded in DATA. */
463
464 static inline struct iv_use *
465 iv_use (struct ivopts_data *data, unsigned i)
466 {
467 return data->iv_uses[i];
468 }
469
470 /* Number of candidates recorded in DATA. */
471
472 static inline unsigned
473 n_iv_cands (struct ivopts_data *data)
474 {
475 return data->iv_candidates.length ();
476 }
477
478 /* Ith candidate recorded in DATA. */
479
480 static inline struct iv_cand *
481 iv_cand (struct ivopts_data *data, unsigned i)
482 {
483 return data->iv_candidates[i];
484 }
485
486 /* The single loop exit if it dominates the latch, NULL otherwise. */
487
488 edge
489 single_dom_exit (struct loop *loop)
490 {
491 edge exit = single_exit (loop);
492
493 if (!exit)
494 return NULL;
495
496 if (!just_once_each_iteration_p (loop, exit->src))
497 return NULL;
498
499 return exit;
500 }
501
502 /* Dumps information about the induction variable IV to FILE. */
503
504 void
505 dump_iv (FILE *file, struct iv *iv, bool dump_name)
506 {
507 if (iv->ssa_name && dump_name)
508 {
509 fprintf (file, "ssa name ");
510 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
511 fprintf (file, "\n");
512 }
513
514 fprintf (file, " type ");
515 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
516 fprintf (file, "\n");
517
518 if (iv->step)
519 {
520 fprintf (file, " base ");
521 print_generic_expr (file, iv->base, TDF_SLIM);
522 fprintf (file, "\n");
523
524 fprintf (file, " step ");
525 print_generic_expr (file, iv->step, TDF_SLIM);
526 fprintf (file, "\n");
527 }
528 else
529 {
530 fprintf (file, " invariant ");
531 print_generic_expr (file, iv->base, TDF_SLIM);
532 fprintf (file, "\n");
533 }
534
535 if (iv->base_object)
536 {
537 fprintf (file, " base object ");
538 print_generic_expr (file, iv->base_object, TDF_SLIM);
539 fprintf (file, "\n");
540 }
541
542 if (iv->biv_p)
543 fprintf (file, " is a biv\n");
544 }
545
546 /* Dumps information about the USE to FILE. */
547
548 void
549 dump_use (FILE *file, struct iv_use *use)
550 {
551 fprintf (file, "use %d", use->id);
552 if (use->sub_id)
553 fprintf (file, ".%d", use->sub_id);
554
555 fprintf (file, "\n");
556
557 switch (use->type)
558 {
559 case USE_NONLINEAR_EXPR:
560 fprintf (file, " generic\n");
561 break;
562
563 case USE_ADDRESS:
564 fprintf (file, " address\n");
565 break;
566
567 case USE_COMPARE:
568 fprintf (file, " compare\n");
569 break;
570
571 default:
572 gcc_unreachable ();
573 }
574
575 fprintf (file, " in statement ");
576 print_gimple_stmt (file, use->stmt, 0, 0);
577 fprintf (file, "\n");
578
579 fprintf (file, " at position ");
580 if (use->op_p)
581 print_generic_expr (file, *use->op_p, TDF_SLIM);
582 fprintf (file, "\n");
583
584 dump_iv (file, use->iv, false);
585
586 if (use->related_cands)
587 {
588 fprintf (file, " related candidates ");
589 dump_bitmap (file, use->related_cands);
590 }
591 }
592
593 /* Dumps information about the uses to FILE. */
594
595 void
596 dump_uses (FILE *file, struct ivopts_data *data)
597 {
598 unsigned i;
599 struct iv_use *use;
600
601 for (i = 0; i < n_iv_uses (data); i++)
602 {
603 use = iv_use (data, i);
604 do
605 {
606 dump_use (file, use);
607 use = use->next;
608 }
609 while (use);
610 fprintf (file, "\n");
611 }
612 }
613
614 /* Dumps information about induction variable candidate CAND to FILE. */
615
616 void
617 dump_cand (FILE *file, struct iv_cand *cand)
618 {
619 struct iv *iv = cand->iv;
620
621 fprintf (file, "candidate %d%s\n",
622 cand->id, cand->important ? " (important)" : "");
623
624 if (cand->depends_on)
625 {
626 fprintf (file, " depends on ");
627 dump_bitmap (file, cand->depends_on);
628 }
629
630 if (!iv)
631 {
632 fprintf (file, " final value replacement\n");
633 return;
634 }
635
636 if (cand->var_before)
637 {
638 fprintf (file, " var_before ");
639 print_generic_expr (file, cand->var_before, TDF_SLIM);
640 fprintf (file, "\n");
641 }
642 if (cand->var_after)
643 {
644 fprintf (file, " var_after ");
645 print_generic_expr (file, cand->var_after, TDF_SLIM);
646 fprintf (file, "\n");
647 }
648
649 switch (cand->pos)
650 {
651 case IP_NORMAL:
652 fprintf (file, " incremented before exit test\n");
653 break;
654
655 case IP_BEFORE_USE:
656 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
657 break;
658
659 case IP_AFTER_USE:
660 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
661 break;
662
663 case IP_END:
664 fprintf (file, " incremented at end\n");
665 break;
666
667 case IP_ORIGINAL:
668 fprintf (file, " original biv\n");
669 break;
670 }
671
672 dump_iv (file, iv, false);
673 }
674
675 /* Returns the info for ssa version VER. */
676
677 static inline struct version_info *
678 ver_info (struct ivopts_data *data, unsigned ver)
679 {
680 return data->version_info + ver;
681 }
682
683 /* Returns the info for ssa name NAME. */
684
685 static inline struct version_info *
686 name_info (struct ivopts_data *data, tree name)
687 {
688 return ver_info (data, SSA_NAME_VERSION (name));
689 }
690
691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
692 emitted in LOOP. */
693
694 static bool
695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
696 {
697 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
698
699 gcc_assert (bb);
700
701 if (sbb == loop->latch)
702 return true;
703
704 if (sbb != bb)
705 return false;
706
707 return stmt == last_stmt (bb);
708 }
709
710 /* Returns true if STMT if after the place where the original induction
711 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
712 if the positions are identical. */
713
714 static bool
715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
716 {
717 basic_block cand_bb = gimple_bb (cand->incremented_at);
718 basic_block stmt_bb = gimple_bb (stmt);
719
720 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
721 return false;
722
723 if (stmt_bb != cand_bb)
724 return true;
725
726 if (true_if_equal
727 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
728 return true;
729 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
730 }
731
732 /* Returns true if STMT if after the place where the induction variable
733 CAND is incremented in LOOP. */
734
735 static bool
736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
737 {
738 switch (cand->pos)
739 {
740 case IP_END:
741 return false;
742
743 case IP_NORMAL:
744 return stmt_after_ip_normal_pos (loop, stmt);
745
746 case IP_ORIGINAL:
747 case IP_AFTER_USE:
748 return stmt_after_inc_pos (cand, stmt, false);
749
750 case IP_BEFORE_USE:
751 return stmt_after_inc_pos (cand, stmt, true);
752
753 default:
754 gcc_unreachable ();
755 }
756 }
757
758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
759
760 static bool
761 abnormal_ssa_name_p (tree exp)
762 {
763 if (!exp)
764 return false;
765
766 if (TREE_CODE (exp) != SSA_NAME)
767 return false;
768
769 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
770 }
771
772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
773 abnormal phi node. Callback for for_each_index. */
774
775 static bool
776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
777 void *data ATTRIBUTE_UNUSED)
778 {
779 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
780 {
781 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
782 return false;
783 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
784 return false;
785 }
786
787 return !abnormal_ssa_name_p (*index);
788 }
789
790 /* Returns true if EXPR contains a ssa name that occurs in an
791 abnormal phi node. */
792
793 bool
794 contains_abnormal_ssa_name_p (tree expr)
795 {
796 enum tree_code code;
797 enum tree_code_class codeclass;
798
799 if (!expr)
800 return false;
801
802 code = TREE_CODE (expr);
803 codeclass = TREE_CODE_CLASS (code);
804
805 if (code == SSA_NAME)
806 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
807
808 if (code == INTEGER_CST
809 || is_gimple_min_invariant (expr))
810 return false;
811
812 if (code == ADDR_EXPR)
813 return !for_each_index (&TREE_OPERAND (expr, 0),
814 idx_contains_abnormal_ssa_name_p,
815 NULL);
816
817 if (code == COND_EXPR)
818 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
821
822 switch (codeclass)
823 {
824 case tcc_binary:
825 case tcc_comparison:
826 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
827 return true;
828
829 /* Fallthru. */
830 case tcc_unary:
831 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
832 return true;
833
834 break;
835
836 default:
837 gcc_unreachable ();
838 }
839
840 return false;
841 }
842
843 /* Returns the structure describing number of iterations determined from
844 EXIT of DATA->current_loop, or NULL if something goes wrong. */
845
846 static struct tree_niter_desc *
847 niter_for_exit (struct ivopts_data *data, edge exit)
848 {
849 struct tree_niter_desc *desc;
850 tree_niter_desc **slot;
851
852 if (!data->niters)
853 {
854 data->niters = new hash_map<edge, tree_niter_desc *>;
855 slot = NULL;
856 }
857 else
858 slot = data->niters->get (exit);
859
860 if (!slot)
861 {
862 /* Try to determine number of iterations. We cannot safely work with ssa
863 names that appear in phi nodes on abnormal edges, so that we do not
864 create overlapping life ranges for them (PR 27283). */
865 desc = XNEW (struct tree_niter_desc);
866 if (!number_of_iterations_exit (data->current_loop,
867 exit, desc, true)
868 || contains_abnormal_ssa_name_p (desc->niter))
869 {
870 XDELETE (desc);
871 desc = NULL;
872 }
873 data->niters->put (exit, desc);
874 }
875 else
876 desc = *slot;
877
878 return desc;
879 }
880
881 /* Returns the structure describing number of iterations determined from
882 single dominating exit of DATA->current_loop, or NULL if something
883 goes wrong. */
884
885 static struct tree_niter_desc *
886 niter_for_single_dom_exit (struct ivopts_data *data)
887 {
888 edge exit = single_dom_exit (data->current_loop);
889
890 if (!exit)
891 return NULL;
892
893 return niter_for_exit (data, exit);
894 }
895
896 /* Initializes data structures used by the iv optimization pass, stored
897 in DATA. */
898
899 static void
900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
901 {
902 data->version_info_size = 2 * num_ssa_names;
903 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
904 data->relevant = BITMAP_ALLOC (NULL);
905 data->important_candidates = BITMAP_ALLOC (NULL);
906 data->max_inv_id = 0;
907 data->niters = NULL;
908 data->iv_uses.create (20);
909 data->iv_candidates.create (20);
910 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
911 data->inv_expr_id = 0;
912 data->name_expansion_cache = NULL;
913 decl_rtl_to_reset.create (20);
914 gcc_obstack_init (&data->iv_obstack);
915 }
916
917 /* Returns a memory object to that EXPR points. In case we are able to
918 determine that it does not point to any such object, NULL is returned. */
919
920 static tree
921 determine_base_object (tree expr)
922 {
923 enum tree_code code = TREE_CODE (expr);
924 tree base, obj;
925
926 /* If this is a pointer casted to any type, we need to determine
927 the base object for the pointer; so handle conversions before
928 throwing away non-pointer expressions. */
929 if (CONVERT_EXPR_P (expr))
930 return determine_base_object (TREE_OPERAND (expr, 0));
931
932 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
933 return NULL_TREE;
934
935 switch (code)
936 {
937 case INTEGER_CST:
938 return NULL_TREE;
939
940 case ADDR_EXPR:
941 obj = TREE_OPERAND (expr, 0);
942 base = get_base_address (obj);
943
944 if (!base)
945 return expr;
946
947 if (TREE_CODE (base) == MEM_REF)
948 return determine_base_object (TREE_OPERAND (base, 0));
949
950 return fold_convert (ptr_type_node,
951 build_fold_addr_expr (base));
952
953 case POINTER_PLUS_EXPR:
954 return determine_base_object (TREE_OPERAND (expr, 0));
955
956 case PLUS_EXPR:
957 case MINUS_EXPR:
958 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
959 gcc_unreachable ();
960
961 default:
962 return fold_convert (ptr_type_node, expr);
963 }
964 }
965
966 /* Return true if address expression with non-DECL_P operand appears
967 in EXPR. */
968
969 static bool
970 contain_complex_addr_expr (tree expr)
971 {
972 bool res = false;
973
974 STRIP_NOPS (expr);
975 switch (TREE_CODE (expr))
976 {
977 case POINTER_PLUS_EXPR:
978 case PLUS_EXPR:
979 case MINUS_EXPR:
980 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
981 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
982 break;
983
984 case ADDR_EXPR:
985 return (!DECL_P (TREE_OPERAND (expr, 0)));
986
987 default:
988 return false;
989 }
990
991 return res;
992 }
993
994 /* Allocates an induction variable with given initial value BASE and step STEP
995 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
996
997 static struct iv *
998 alloc_iv (struct ivopts_data *data, tree base, tree step,
999 bool no_overflow = false)
1000 {
1001 tree expr = base;
1002 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1003 sizeof (struct iv));
1004 gcc_assert (step != NULL_TREE);
1005
1006 /* Lower address expression in base except ones with DECL_P as operand.
1007 By doing this:
1008 1) More accurate cost can be computed for address expressions;
1009 2) Duplicate candidates won't be created for bases in different
1010 forms, like &a[0] and &a. */
1011 STRIP_NOPS (expr);
1012 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1013 || contain_complex_addr_expr (expr))
1014 {
1015 aff_tree comb;
1016 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1017 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1018 }
1019
1020 iv->base = base;
1021 iv->base_object = determine_base_object (base);
1022 iv->step = step;
1023 iv->biv_p = false;
1024 iv->have_use_for = false;
1025 iv->use_id = 0;
1026 iv->ssa_name = NULL_TREE;
1027 iv->no_overflow = no_overflow;
1028
1029 return iv;
1030 }
1031
1032 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1033 doesn't overflow. */
1034
1035 static void
1036 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1037 bool no_overflow)
1038 {
1039 struct version_info *info = name_info (data, iv);
1040
1041 gcc_assert (!info->iv);
1042
1043 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1044 info->iv = alloc_iv (data, base, step, no_overflow);
1045 info->iv->ssa_name = iv;
1046 }
1047
1048 /* Finds induction variable declaration for VAR. */
1049
1050 static struct iv *
1051 get_iv (struct ivopts_data *data, tree var)
1052 {
1053 basic_block bb;
1054 tree type = TREE_TYPE (var);
1055
1056 if (!POINTER_TYPE_P (type)
1057 && !INTEGRAL_TYPE_P (type))
1058 return NULL;
1059
1060 if (!name_info (data, var)->iv)
1061 {
1062 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1063
1064 if (!bb
1065 || !flow_bb_inside_loop_p (data->current_loop, bb))
1066 set_iv (data, var, var, build_int_cst (type, 0), true);
1067 }
1068
1069 return name_info (data, var)->iv;
1070 }
1071
1072 /* Return the first non-invariant ssa var found in EXPR. */
1073
1074 static tree
1075 extract_single_var_from_expr (tree expr)
1076 {
1077 int i, n;
1078 tree tmp;
1079 enum tree_code code;
1080
1081 if (!expr || is_gimple_min_invariant (expr))
1082 return NULL;
1083
1084 code = TREE_CODE (expr);
1085 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1086 {
1087 n = TREE_OPERAND_LENGTH (expr);
1088 for (i = 0; i < n; i++)
1089 {
1090 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1091
1092 if (tmp)
1093 return tmp;
1094 }
1095 }
1096 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1097 }
1098
1099 /* Finds basic ivs. */
1100
1101 static bool
1102 find_bivs (struct ivopts_data *data)
1103 {
1104 gphi *phi;
1105 affine_iv iv;
1106 tree step, type, base, stop;
1107 bool found = false;
1108 struct loop *loop = data->current_loop;
1109 gphi_iterator psi;
1110
1111 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1112 {
1113 phi = psi.phi ();
1114
1115 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1116 continue;
1117
1118 if (virtual_operand_p (PHI_RESULT (phi)))
1119 continue;
1120
1121 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1122 continue;
1123
1124 if (integer_zerop (iv.step))
1125 continue;
1126
1127 step = iv.step;
1128 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1129 /* Stop expanding iv base at the first ssa var referred by iv step.
1130 Ideally we should stop at any ssa var, because that's expensive
1131 and unusual to happen, we just do it on the first one.
1132
1133 See PR64705 for the rationale. */
1134 stop = extract_single_var_from_expr (step);
1135 base = expand_simple_operations (base, stop);
1136 if (contains_abnormal_ssa_name_p (base)
1137 || contains_abnormal_ssa_name_p (step))
1138 continue;
1139
1140 type = TREE_TYPE (PHI_RESULT (phi));
1141 base = fold_convert (type, base);
1142 if (step)
1143 {
1144 if (POINTER_TYPE_P (type))
1145 step = convert_to_ptrofftype (step);
1146 else
1147 step = fold_convert (type, step);
1148 }
1149
1150 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1151 found = true;
1152 }
1153
1154 return found;
1155 }
1156
1157 /* Marks basic ivs. */
1158
1159 static void
1160 mark_bivs (struct ivopts_data *data)
1161 {
1162 gphi *phi;
1163 gimple def;
1164 tree var;
1165 struct iv *iv, *incr_iv;
1166 struct loop *loop = data->current_loop;
1167 basic_block incr_bb;
1168 gphi_iterator psi;
1169
1170 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1171 {
1172 phi = psi.phi ();
1173
1174 iv = get_iv (data, PHI_RESULT (phi));
1175 if (!iv)
1176 continue;
1177
1178 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179 def = SSA_NAME_DEF_STMT (var);
1180 /* Don't mark iv peeled from other one as biv. */
1181 if (def
1182 && gimple_code (def) == GIMPLE_PHI
1183 && gimple_bb (def) == loop->header)
1184 continue;
1185
1186 incr_iv = get_iv (data, var);
1187 if (!incr_iv)
1188 continue;
1189
1190 /* If the increment is in the subloop, ignore it. */
1191 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192 if (incr_bb->loop_father != data->current_loop
1193 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194 continue;
1195
1196 iv->biv_p = true;
1197 incr_iv->biv_p = true;
1198 }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202 parameters to IV. */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1206 {
1207 tree lhs, stop;
1208 struct loop *loop = data->current_loop;
1209
1210 iv->base = NULL_TREE;
1211 iv->step = NULL_TREE;
1212
1213 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214 return false;
1215
1216 lhs = gimple_assign_lhs (stmt);
1217 if (TREE_CODE (lhs) != SSA_NAME)
1218 return false;
1219
1220 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221 return false;
1222
1223 /* Stop expanding iv base at the first ssa var referred by iv step.
1224 Ideally we should stop at any ssa var, because that's expensive
1225 and unusual to happen, we just do it on the first one.
1226
1227 See PR64705 for the rationale. */
1228 stop = extract_single_var_from_expr (iv->step);
1229 iv->base = expand_simple_operations (iv->base, stop);
1230 if (contains_abnormal_ssa_name_p (iv->base)
1231 || contains_abnormal_ssa_name_p (iv->step))
1232 return false;
1233
1234 /* If STMT could throw, then do not consider STMT as defining a GIV.
1235 While this will suppress optimizations, we can not safely delete this
1236 GIV and associated statements, even if it appears it is not used. */
1237 if (stmt_could_throw_p (stmt))
1238 return false;
1239
1240 return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT. */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1247 {
1248 affine_iv iv;
1249
1250 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251 return;
1252
1253 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB. */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261 gimple_stmt_iterator bsi;
1262
1263 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264 find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs. */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272 struct loop *loop = data->current_loop;
1273 basic_block *body = get_loop_body_in_dom_order (loop);
1274 unsigned i;
1275
1276 for (i = 0; i < loop->num_nodes; i++)
1277 find_givs_in_bb (data, body[i]);
1278 free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282 variable and if so, its initial value and step. */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287 unsigned i;
1288 bitmap_iterator bi;
1289
1290 if (!find_bivs (data))
1291 return false;
1292
1293 find_givs (data);
1294 mark_bivs (data);
1295
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1297 {
1298 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300 if (niter)
1301 {
1302 fprintf (dump_file, " number of iterations ");
1303 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304 if (!integer_zerop (niter->may_be_zero))
1305 {
1306 fprintf (dump_file, "; zero if ");
1307 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308 }
1309 fprintf (dump_file, "\n\n");
1310 };
1311
1312 fprintf (dump_file, "Induction variables:\n\n");
1313
1314 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315 {
1316 if (ver_info (data, i)->iv)
1317 dump_iv (dump_file, ver_info (data, i)->iv, true);
1318 }
1319 }
1320
1321 return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326 is the const offset stripped from IV base. For uses of other types,
1327 ADDR_BASE and ADDR_OFFSET are zero by default. */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1332 unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334 struct iv_use *use = XCNEW (struct iv_use);
1335
1336 use->id = n_iv_uses (data);
1337 use->sub_id = 0;
1338 use->type = use_type;
1339 use->iv = iv;
1340 use->stmt = stmt;
1341 use->op_p = use_p;
1342 use->related_cands = BITMAP_ALLOC (NULL);
1343 use->next = NULL;
1344 use->addr_base = addr_base;
1345 use->addr_offset = addr_offset;
1346
1347 data->iv_uses.safe_push (use);
1348
1349 return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353 The sub use is recorded under the one whose use id is ID_GROUP. */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357 struct iv *iv, gimple stmt, enum use_type use_type,
1358 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359 unsigned int id_group)
1360 {
1361 struct iv_use *use = XCNEW (struct iv_use);
1362 struct iv_use *group = iv_use (data, id_group);
1363
1364 use->id = group->id;
1365 use->sub_id = 0;
1366 use->type = use_type;
1367 use->iv = iv;
1368 use->stmt = stmt;
1369 use->op_p = use_p;
1370 use->related_cands = NULL;
1371 use->addr_base = addr_base;
1372 use->addr_offset = addr_offset;
1373
1374 /* Sub use list is maintained in offset ascending order. */
1375 if (addr_offset <= group->addr_offset)
1376 {
1377 use->related_cands = group->related_cands;
1378 group->related_cands = NULL;
1379 use->next = group;
1380 data->iv_uses[id_group] = use;
1381 }
1382 else
1383 {
1384 struct iv_use *pre;
1385 do
1386 {
1387 pre = group;
1388 group = group->next;
1389 }
1390 while (group && addr_offset > group->addr_offset);
1391 use->next = pre->next;
1392 pre->next = use;
1393 }
1394
1395 return use;
1396 }
1397
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399 NONLINEAR_USE is true if the invariant is used in a way we do not
1400 handle specially. */
1401
1402 static void
1403 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1404 {
1405 basic_block bb;
1406 struct version_info *info;
1407
1408 if (TREE_CODE (op) != SSA_NAME
1409 || virtual_operand_p (op))
1410 return;
1411
1412 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1413 if (bb
1414 && flow_bb_inside_loop_p (data->current_loop, bb))
1415 return;
1416
1417 info = name_info (data, op);
1418 info->name = op;
1419 info->has_nonlin_use |= nonlinear_use;
1420 if (!info->inv_id)
1421 info->inv_id = ++data->max_inv_id;
1422 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 }
1424
1425 /* Checks whether the use OP is interesting and if so, records it. */
1426
1427 static struct iv_use *
1428 find_interesting_uses_op (struct ivopts_data *data, tree op)
1429 {
1430 struct iv *iv;
1431 gimple stmt;
1432 struct iv_use *use;
1433
1434 if (TREE_CODE (op) != SSA_NAME)
1435 return NULL;
1436
1437 iv = get_iv (data, op);
1438 if (!iv)
1439 return NULL;
1440
1441 if (iv->have_use_for)
1442 {
1443 use = iv_use (data, iv->use_id);
1444
1445 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1446 return use;
1447 }
1448
1449 if (integer_zerop (iv->step))
1450 {
1451 record_invariant (data, op, true);
1452 return NULL;
1453 }
1454 iv->have_use_for = true;
1455
1456 stmt = SSA_NAME_DEF_STMT (op);
1457 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1458 || is_gimple_assign (stmt));
1459
1460 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1461 iv->use_id = use->id;
1462
1463 return use;
1464 }
1465
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467 of an induction variable and an invariant. If this is the case,
1468 CONTROL_VAR is set to location of the iv, BOUND to the location of
1469 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470 induction variable descriptions, and true is returned. If this is not
1471 the case, CONTROL_VAR and BOUND are set to the arguments of the
1472 condition and false is returned. */
1473
1474 static bool
1475 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1476 tree **control_var, tree **bound,
1477 struct iv **iv_var, struct iv **iv_bound)
1478 {
1479 /* The objects returned when COND has constant operands. */
1480 static struct iv const_iv;
1481 static tree zero;
1482 tree *op0 = &zero, *op1 = &zero;
1483 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1484 bool ret = false;
1485
1486 if (gimple_code (stmt) == GIMPLE_COND)
1487 {
1488 gcond *cond_stmt = as_a <gcond *> (stmt);
1489 op0 = gimple_cond_lhs_ptr (cond_stmt);
1490 op1 = gimple_cond_rhs_ptr (cond_stmt);
1491 }
1492 else
1493 {
1494 op0 = gimple_assign_rhs1_ptr (stmt);
1495 op1 = gimple_assign_rhs2_ptr (stmt);
1496 }
1497
1498 zero = integer_zero_node;
1499 const_iv.step = integer_zero_node;
1500
1501 if (TREE_CODE (*op0) == SSA_NAME)
1502 iv0 = get_iv (data, *op0);
1503 if (TREE_CODE (*op1) == SSA_NAME)
1504 iv1 = get_iv (data, *op1);
1505
1506 /* Exactly one of the compared values must be an iv, and the other one must
1507 be an invariant. */
1508 if (!iv0 || !iv1)
1509 goto end;
1510
1511 if (integer_zerop (iv0->step))
1512 {
1513 /* Control variable may be on the other side. */
1514 std::swap (op0, op1);
1515 std::swap (iv0, iv1);
1516 }
1517 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1518
1519 end:
1520 if (control_var)
1521 *control_var = op0;
1522 if (iv_var)
1523 *iv_var = iv0;
1524 if (bound)
1525 *bound = op1;
1526 if (iv_bound)
1527 *iv_bound = iv1;
1528
1529 return ret;
1530 }
1531
1532 /* Checks whether the condition in STMT is interesting and if so,
1533 records it. */
1534
1535 static void
1536 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1537 {
1538 tree *var_p, *bound_p;
1539 struct iv *var_iv;
1540
1541 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1542 {
1543 find_interesting_uses_op (data, *var_p);
1544 find_interesting_uses_op (data, *bound_p);
1545 return;
1546 }
1547
1548 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1549 }
1550
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552 relative to the loop LOOP, i.e. if all its operands are defined
1553 outside of the returned loop. Returns NULL if EXPR is not
1554 even obviously invariant in LOOP. */
1555
1556 struct loop *
1557 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1558 {
1559 basic_block def_bb;
1560 unsigned i, len;
1561
1562 if (is_gimple_min_invariant (expr))
1563 return current_loops->tree_root;
1564
1565 if (TREE_CODE (expr) == SSA_NAME)
1566 {
1567 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1568 if (def_bb)
1569 {
1570 if (flow_bb_inside_loop_p (loop, def_bb))
1571 return NULL;
1572 return superloop_at_depth (loop,
1573 loop_depth (def_bb->loop_father) + 1);
1574 }
1575
1576 return current_loops->tree_root;
1577 }
1578
1579 if (!EXPR_P (expr))
1580 return NULL;
1581
1582 unsigned maxdepth = 0;
1583 len = TREE_OPERAND_LENGTH (expr);
1584 for (i = 0; i < len; i++)
1585 {
1586 struct loop *ivloop;
1587 if (!TREE_OPERAND (expr, i))
1588 continue;
1589
1590 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1591 if (!ivloop)
1592 return NULL;
1593 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1594 }
1595
1596 return superloop_at_depth (loop, maxdepth);
1597 }
1598
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600 i.e. if all its operands are defined outside of the LOOP. LOOP
1601 should not be the function body. */
1602
1603 bool
1604 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1605 {
1606 basic_block def_bb;
1607 unsigned i, len;
1608
1609 gcc_assert (loop_depth (loop) > 0);
1610
1611 if (is_gimple_min_invariant (expr))
1612 return true;
1613
1614 if (TREE_CODE (expr) == SSA_NAME)
1615 {
1616 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1617 if (def_bb
1618 && flow_bb_inside_loop_p (loop, def_bb))
1619 return false;
1620
1621 return true;
1622 }
1623
1624 if (!EXPR_P (expr))
1625 return false;
1626
1627 len = TREE_OPERAND_LENGTH (expr);
1628 for (i = 0; i < len; i++)
1629 if (TREE_OPERAND (expr, i)
1630 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1631 return false;
1632
1633 return true;
1634 }
1635
1636 /* Cumulates the steps of indices into DATA and replaces their values with the
1637 initial ones. Returns false when the value of the index cannot be determined.
1638 Callback for for_each_index. */
1639
1640 struct ifs_ivopts_data
1641 {
1642 struct ivopts_data *ivopts_data;
1643 gimple stmt;
1644 tree step;
1645 };
1646
1647 static bool
1648 idx_find_step (tree base, tree *idx, void *data)
1649 {
1650 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1651 struct iv *iv;
1652 bool use_overflow_semantics = false;
1653 tree step, iv_base, iv_step, lbound, off;
1654 struct loop *loop = dta->ivopts_data->current_loop;
1655
1656 /* If base is a component ref, require that the offset of the reference
1657 be invariant. */
1658 if (TREE_CODE (base) == COMPONENT_REF)
1659 {
1660 off = component_ref_field_offset (base);
1661 return expr_invariant_in_loop_p (loop, off);
1662 }
1663
1664 /* If base is array, first check whether we will be able to move the
1665 reference out of the loop (in order to take its address in strength
1666 reduction). In order for this to work we need both lower bound
1667 and step to be loop invariants. */
1668 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1669 {
1670 /* Moreover, for a range, the size needs to be invariant as well. */
1671 if (TREE_CODE (base) == ARRAY_RANGE_REF
1672 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1673 return false;
1674
1675 step = array_ref_element_size (base);
1676 lbound = array_ref_low_bound (base);
1677
1678 if (!expr_invariant_in_loop_p (loop, step)
1679 || !expr_invariant_in_loop_p (loop, lbound))
1680 return false;
1681 }
1682
1683 if (TREE_CODE (*idx) != SSA_NAME)
1684 return true;
1685
1686 iv = get_iv (dta->ivopts_data, *idx);
1687 if (!iv)
1688 return false;
1689
1690 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1691 *&x[0], which is not folded and does not trigger the
1692 ARRAY_REF path below. */
1693 *idx = iv->base;
1694
1695 if (integer_zerop (iv->step))
1696 return true;
1697
1698 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1699 {
1700 step = array_ref_element_size (base);
1701
1702 /* We only handle addresses whose step is an integer constant. */
1703 if (TREE_CODE (step) != INTEGER_CST)
1704 return false;
1705 }
1706 else
1707 /* The step for pointer arithmetics already is 1 byte. */
1708 step = size_one_node;
1709
1710 iv_base = iv->base;
1711 iv_step = iv->step;
1712 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1713 use_overflow_semantics = true;
1714
1715 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1716 sizetype, &iv_base, &iv_step, dta->stmt,
1717 use_overflow_semantics))
1718 {
1719 /* The index might wrap. */
1720 return false;
1721 }
1722
1723 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1724 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1725
1726 return true;
1727 }
1728
1729 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1730 object is passed to it in DATA. */
1731
1732 static bool
1733 idx_record_use (tree base, tree *idx,
1734 void *vdata)
1735 {
1736 struct ivopts_data *data = (struct ivopts_data *) vdata;
1737 find_interesting_uses_op (data, *idx);
1738 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1739 {
1740 find_interesting_uses_op (data, array_ref_element_size (base));
1741 find_interesting_uses_op (data, array_ref_low_bound (base));
1742 }
1743 return true;
1744 }
1745
1746 /* If we can prove that TOP = cst * BOT for some constant cst,
1747 store cst to MUL and return true. Otherwise return false.
1748 The returned value is always sign-extended, regardless of the
1749 signedness of TOP and BOT. */
1750
1751 static bool
1752 constant_multiple_of (tree top, tree bot, widest_int *mul)
1753 {
1754 tree mby;
1755 enum tree_code code;
1756 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1757 widest_int res, p0, p1;
1758
1759 STRIP_NOPS (top);
1760 STRIP_NOPS (bot);
1761
1762 if (operand_equal_p (top, bot, 0))
1763 {
1764 *mul = 1;
1765 return true;
1766 }
1767
1768 code = TREE_CODE (top);
1769 switch (code)
1770 {
1771 case MULT_EXPR:
1772 mby = TREE_OPERAND (top, 1);
1773 if (TREE_CODE (mby) != INTEGER_CST)
1774 return false;
1775
1776 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1777 return false;
1778
1779 *mul = wi::sext (res * wi::to_widest (mby), precision);
1780 return true;
1781
1782 case PLUS_EXPR:
1783 case MINUS_EXPR:
1784 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1785 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1786 return false;
1787
1788 if (code == MINUS_EXPR)
1789 p1 = -p1;
1790 *mul = wi::sext (p0 + p1, precision);
1791 return true;
1792
1793 case INTEGER_CST:
1794 if (TREE_CODE (bot) != INTEGER_CST)
1795 return false;
1796
1797 p0 = widest_int::from (top, SIGNED);
1798 p1 = widest_int::from (bot, SIGNED);
1799 if (p1 == 0)
1800 return false;
1801 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1802 return res == 0;
1803
1804 default:
1805 return false;
1806 }
1807 }
1808
1809 /* Return true if memory reference REF with step STEP may be unaligned. */
1810
1811 static bool
1812 may_be_unaligned_p (tree ref, tree step)
1813 {
1814 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1815 thus they are not misaligned. */
1816 if (TREE_CODE (ref) == TARGET_MEM_REF)
1817 return false;
1818
1819 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1820 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1821 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1822
1823 unsigned HOST_WIDE_INT bitpos;
1824 unsigned int ref_align;
1825 get_object_alignment_1 (ref, &ref_align, &bitpos);
1826 if (ref_align < align
1827 || (bitpos % align) != 0
1828 || (bitpos % BITS_PER_UNIT) != 0)
1829 return true;
1830
1831 unsigned int trailing_zeros = tree_ctz (step);
1832 if (trailing_zeros < HOST_BITS_PER_INT
1833 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1834 return true;
1835
1836 return false;
1837 }
1838
1839 /* Return true if EXPR may be non-addressable. */
1840
1841 bool
1842 may_be_nonaddressable_p (tree expr)
1843 {
1844 switch (TREE_CODE (expr))
1845 {
1846 case TARGET_MEM_REF:
1847 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1848 target, thus they are always addressable. */
1849 return false;
1850
1851 case COMPONENT_REF:
1852 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1853 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1854
1855 case VIEW_CONVERT_EXPR:
1856 /* This kind of view-conversions may wrap non-addressable objects
1857 and make them look addressable. After some processing the
1858 non-addressability may be uncovered again, causing ADDR_EXPRs
1859 of inappropriate objects to be built. */
1860 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1861 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1862 return true;
1863
1864 /* ... fall through ... */
1865
1866 case ARRAY_REF:
1867 case ARRAY_RANGE_REF:
1868 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1869
1870 CASE_CONVERT:
1871 return true;
1872
1873 default:
1874 break;
1875 }
1876
1877 return false;
1878 }
1879
1880 static tree
1881 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1882
1883 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1884 If there is an existing use which has same stripped iv base and step,
1885 this function records this one as a sub use to that; otherwise records
1886 it as a normal one. */
1887
1888 static struct iv_use *
1889 record_group_use (struct ivopts_data *data, tree *use_p,
1890 struct iv *iv, gimple stmt, enum use_type use_type)
1891 {
1892 unsigned int i;
1893 struct iv_use *use;
1894 tree addr_base;
1895 unsigned HOST_WIDE_INT addr_offset;
1896
1897 /* Only support sub use for address type uses, that is, with base
1898 object. */
1899 if (!iv->base_object)
1900 return record_use (data, use_p, iv, stmt, use_type);
1901
1902 addr_base = strip_offset (iv->base, &addr_offset);
1903 for (i = 0; i < n_iv_uses (data); i++)
1904 {
1905 use = iv_use (data, i);
1906 if (use->type != USE_ADDRESS || !use->iv->base_object)
1907 continue;
1908
1909 /* Check if it has the same stripped base and step. */
1910 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1911 && operand_equal_p (iv->step, use->iv->step, 0)
1912 && operand_equal_p (addr_base, use->addr_base, 0))
1913 break;
1914 }
1915
1916 if (i == n_iv_uses (data))
1917 return record_use (data, use_p, iv, stmt,
1918 use_type, addr_base, addr_offset);
1919 else
1920 return record_sub_use (data, use_p, iv, stmt,
1921 use_type, addr_base, addr_offset, i);
1922 }
1923
1924 /* Finds addresses in *OP_P inside STMT. */
1925
1926 static void
1927 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1928 {
1929 tree base = *op_p, step = size_zero_node;
1930 struct iv *civ;
1931 struct ifs_ivopts_data ifs_ivopts_data;
1932
1933 /* Do not play with volatile memory references. A bit too conservative,
1934 perhaps, but safe. */
1935 if (gimple_has_volatile_ops (stmt))
1936 goto fail;
1937
1938 /* Ignore bitfields for now. Not really something terribly complicated
1939 to handle. TODO. */
1940 if (TREE_CODE (base) == BIT_FIELD_REF)
1941 goto fail;
1942
1943 base = unshare_expr (base);
1944
1945 if (TREE_CODE (base) == TARGET_MEM_REF)
1946 {
1947 tree type = build_pointer_type (TREE_TYPE (base));
1948 tree astep;
1949
1950 if (TMR_BASE (base)
1951 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1952 {
1953 civ = get_iv (data, TMR_BASE (base));
1954 if (!civ)
1955 goto fail;
1956
1957 TMR_BASE (base) = civ->base;
1958 step = civ->step;
1959 }
1960 if (TMR_INDEX2 (base)
1961 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1962 {
1963 civ = get_iv (data, TMR_INDEX2 (base));
1964 if (!civ)
1965 goto fail;
1966
1967 TMR_INDEX2 (base) = civ->base;
1968 step = civ->step;
1969 }
1970 if (TMR_INDEX (base)
1971 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1972 {
1973 civ = get_iv (data, TMR_INDEX (base));
1974 if (!civ)
1975 goto fail;
1976
1977 TMR_INDEX (base) = civ->base;
1978 astep = civ->step;
1979
1980 if (astep)
1981 {
1982 if (TMR_STEP (base))
1983 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1984
1985 step = fold_build2 (PLUS_EXPR, type, step, astep);
1986 }
1987 }
1988
1989 if (integer_zerop (step))
1990 goto fail;
1991 base = tree_mem_ref_addr (type, base);
1992 }
1993 else
1994 {
1995 ifs_ivopts_data.ivopts_data = data;
1996 ifs_ivopts_data.stmt = stmt;
1997 ifs_ivopts_data.step = size_zero_node;
1998 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1999 || integer_zerop (ifs_ivopts_data.step))
2000 goto fail;
2001 step = ifs_ivopts_data.step;
2002
2003 /* Check that the base expression is addressable. This needs
2004 to be done after substituting bases of IVs into it. */
2005 if (may_be_nonaddressable_p (base))
2006 goto fail;
2007
2008 /* Moreover, on strict alignment platforms, check that it is
2009 sufficiently aligned. */
2010 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2011 goto fail;
2012
2013 base = build_fold_addr_expr (base);
2014
2015 /* Substituting bases of IVs into the base expression might
2016 have caused folding opportunities. */
2017 if (TREE_CODE (base) == ADDR_EXPR)
2018 {
2019 tree *ref = &TREE_OPERAND (base, 0);
2020 while (handled_component_p (*ref))
2021 ref = &TREE_OPERAND (*ref, 0);
2022 if (TREE_CODE (*ref) == MEM_REF)
2023 {
2024 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2025 TREE_OPERAND (*ref, 0),
2026 TREE_OPERAND (*ref, 1));
2027 if (tem)
2028 *ref = tem;
2029 }
2030 }
2031 }
2032
2033 civ = alloc_iv (data, base, step);
2034 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2035 return;
2036
2037 fail:
2038 for_each_index (op_p, idx_record_use, data);
2039 }
2040
2041 /* Finds and records invariants used in STMT. */
2042
2043 static void
2044 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2045 {
2046 ssa_op_iter iter;
2047 use_operand_p use_p;
2048 tree op;
2049
2050 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2051 {
2052 op = USE_FROM_PTR (use_p);
2053 record_invariant (data, op, false);
2054 }
2055 }
2056
2057 /* Finds interesting uses of induction variables in the statement STMT. */
2058
2059 static void
2060 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2061 {
2062 struct iv *iv;
2063 tree op, *lhs, *rhs;
2064 ssa_op_iter iter;
2065 use_operand_p use_p;
2066 enum tree_code code;
2067
2068 find_invariants_stmt (data, stmt);
2069
2070 if (gimple_code (stmt) == GIMPLE_COND)
2071 {
2072 find_interesting_uses_cond (data, stmt);
2073 return;
2074 }
2075
2076 if (is_gimple_assign (stmt))
2077 {
2078 lhs = gimple_assign_lhs_ptr (stmt);
2079 rhs = gimple_assign_rhs1_ptr (stmt);
2080
2081 if (TREE_CODE (*lhs) == SSA_NAME)
2082 {
2083 /* If the statement defines an induction variable, the uses are not
2084 interesting by themselves. */
2085
2086 iv = get_iv (data, *lhs);
2087
2088 if (iv && !integer_zerop (iv->step))
2089 return;
2090 }
2091
2092 code = gimple_assign_rhs_code (stmt);
2093 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2094 && (REFERENCE_CLASS_P (*rhs)
2095 || is_gimple_val (*rhs)))
2096 {
2097 if (REFERENCE_CLASS_P (*rhs))
2098 find_interesting_uses_address (data, stmt, rhs);
2099 else
2100 find_interesting_uses_op (data, *rhs);
2101
2102 if (REFERENCE_CLASS_P (*lhs))
2103 find_interesting_uses_address (data, stmt, lhs);
2104 return;
2105 }
2106 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2107 {
2108 find_interesting_uses_cond (data, stmt);
2109 return;
2110 }
2111
2112 /* TODO -- we should also handle address uses of type
2113
2114 memory = call (whatever);
2115
2116 and
2117
2118 call (memory). */
2119 }
2120
2121 if (gimple_code (stmt) == GIMPLE_PHI
2122 && gimple_bb (stmt) == data->current_loop->header)
2123 {
2124 iv = get_iv (data, PHI_RESULT (stmt));
2125
2126 if (iv && !integer_zerop (iv->step))
2127 return;
2128 }
2129
2130 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2131 {
2132 op = USE_FROM_PTR (use_p);
2133
2134 if (TREE_CODE (op) != SSA_NAME)
2135 continue;
2136
2137 iv = get_iv (data, op);
2138 if (!iv)
2139 continue;
2140
2141 find_interesting_uses_op (data, op);
2142 }
2143 }
2144
2145 /* Finds interesting uses of induction variables outside of loops
2146 on loop exit edge EXIT. */
2147
2148 static void
2149 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2150 {
2151 gphi *phi;
2152 gphi_iterator psi;
2153 tree def;
2154
2155 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2156 {
2157 phi = psi.phi ();
2158 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2159 if (!virtual_operand_p (def))
2160 find_interesting_uses_op (data, def);
2161 }
2162 }
2163
2164 /* Finds uses of the induction variables that are interesting. */
2165
2166 static void
2167 find_interesting_uses (struct ivopts_data *data)
2168 {
2169 basic_block bb;
2170 gimple_stmt_iterator bsi;
2171 basic_block *body = get_loop_body (data->current_loop);
2172 unsigned i;
2173 struct version_info *info;
2174 edge e;
2175
2176 if (dump_file && (dump_flags & TDF_DETAILS))
2177 fprintf (dump_file, "Uses:\n\n");
2178
2179 for (i = 0; i < data->current_loop->num_nodes; i++)
2180 {
2181 edge_iterator ei;
2182 bb = body[i];
2183
2184 FOR_EACH_EDGE (e, ei, bb->succs)
2185 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2186 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2187 find_interesting_uses_outside (data, e);
2188
2189 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2190 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2191 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2192 if (!is_gimple_debug (gsi_stmt (bsi)))
2193 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2194 }
2195
2196 if (dump_file && (dump_flags & TDF_DETAILS))
2197 {
2198 bitmap_iterator bi;
2199
2200 fprintf (dump_file, "\n");
2201
2202 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2203 {
2204 info = ver_info (data, i);
2205 if (info->inv_id)
2206 {
2207 fprintf (dump_file, " ");
2208 print_generic_expr (dump_file, info->name, TDF_SLIM);
2209 fprintf (dump_file, " is invariant (%d)%s\n",
2210 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2211 }
2212 }
2213
2214 fprintf (dump_file, "\n");
2215 }
2216
2217 free (body);
2218 }
2219
2220 /* Compute maximum offset of [base + offset] addressing mode
2221 for memory reference represented by USE. */
2222
2223 static HOST_WIDE_INT
2224 compute_max_addr_offset (struct iv_use *use)
2225 {
2226 int width;
2227 rtx reg, addr;
2228 HOST_WIDE_INT i, off;
2229 unsigned list_index, num;
2230 addr_space_t as;
2231 machine_mode mem_mode, addr_mode;
2232 static vec<HOST_WIDE_INT> max_offset_list;
2233
2234 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2235 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2236
2237 num = max_offset_list.length ();
2238 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2239 if (list_index >= num)
2240 {
2241 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2242 for (; num < max_offset_list.length (); num++)
2243 max_offset_list[num] = -1;
2244 }
2245
2246 off = max_offset_list[list_index];
2247 if (off != -1)
2248 return off;
2249
2250 addr_mode = targetm.addr_space.address_mode (as);
2251 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2252 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2253
2254 width = GET_MODE_BITSIZE (addr_mode) - 1;
2255 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2256 width = HOST_BITS_PER_WIDE_INT - 1;
2257
2258 for (i = width; i > 0; i--)
2259 {
2260 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2261 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2262 if (memory_address_addr_space_p (mem_mode, addr, as))
2263 break;
2264
2265 /* For some strict-alignment targets, the offset must be naturally
2266 aligned. Try an aligned offset if mem_mode is not QImode. */
2267 off = ((unsigned HOST_WIDE_INT) 1 << i);
2268 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2269 {
2270 off -= GET_MODE_SIZE (mem_mode);
2271 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2272 if (memory_address_addr_space_p (mem_mode, addr, as))
2273 break;
2274 }
2275 }
2276 if (i == 0)
2277 off = 0;
2278
2279 max_offset_list[list_index] = off;
2280 return off;
2281 }
2282
2283 /* Check if all small groups should be split. Return true if and
2284 only if:
2285
2286 1) At least one groups contain two uses with different offsets.
2287 2) No group contains more than two uses with different offsets.
2288
2289 Return false otherwise. We want to split such groups because:
2290
2291 1) Small groups don't have much benefit and may interfer with
2292 general candidate selection.
2293 2) Size for problem with only small groups is usually small and
2294 general algorithm can handle it well.
2295
2296 TODO -- Above claim may not hold when auto increment is supported. */
2297
2298 static bool
2299 split_all_small_groups (struct ivopts_data *data)
2300 {
2301 bool split_p = false;
2302 unsigned int i, n, distinct;
2303 struct iv_use *pre, *use;
2304
2305 n = n_iv_uses (data);
2306 for (i = 0; i < n; i++)
2307 {
2308 use = iv_use (data, i);
2309 if (!use->next)
2310 continue;
2311
2312 distinct = 1;
2313 gcc_assert (use->type == USE_ADDRESS);
2314 for (pre = use, use = use->next; use; pre = use, use = use->next)
2315 {
2316 if (pre->addr_offset != use->addr_offset)
2317 distinct++;
2318
2319 if (distinct > 2)
2320 return false;
2321 }
2322 if (distinct == 2)
2323 split_p = true;
2324 }
2325
2326 return split_p;
2327 }
2328
2329 /* For each group of address type uses, this function further groups
2330 these uses according to the maximum offset supported by target's
2331 [base + offset] addressing mode. */
2332
2333 static void
2334 group_address_uses (struct ivopts_data *data)
2335 {
2336 HOST_WIDE_INT max_offset = -1;
2337 unsigned int i, n, sub_id;
2338 struct iv_use *pre, *use;
2339 unsigned HOST_WIDE_INT addr_offset_first;
2340
2341 /* Reset max offset to split all small groups. */
2342 if (split_all_small_groups (data))
2343 max_offset = 0;
2344
2345 n = n_iv_uses (data);
2346 for (i = 0; i < n; i++)
2347 {
2348 use = iv_use (data, i);
2349 if (!use->next)
2350 continue;
2351
2352 gcc_assert (use->type == USE_ADDRESS);
2353 if (max_offset != 0)
2354 max_offset = compute_max_addr_offset (use);
2355
2356 while (use)
2357 {
2358 sub_id = 0;
2359 addr_offset_first = use->addr_offset;
2360 /* Only uses with offset that can fit in offset part against
2361 the first use can be grouped together. */
2362 for (pre = use, use = use->next;
2363 use && (use->addr_offset - addr_offset_first
2364 <= (unsigned HOST_WIDE_INT) max_offset);
2365 pre = use, use = use->next)
2366 {
2367 use->id = pre->id;
2368 use->sub_id = ++sub_id;
2369 }
2370
2371 /* Break the list and create new group. */
2372 if (use)
2373 {
2374 pre->next = NULL;
2375 use->id = n_iv_uses (data);
2376 use->related_cands = BITMAP_ALLOC (NULL);
2377 data->iv_uses.safe_push (use);
2378 }
2379 }
2380 }
2381
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2383 dump_uses (dump_file, data);
2384 }
2385
2386 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2387 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2388 we are at the top-level of the processed address. */
2389
2390 static tree
2391 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2392 HOST_WIDE_INT *offset)
2393 {
2394 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2395 enum tree_code code;
2396 tree type, orig_type = TREE_TYPE (expr);
2397 HOST_WIDE_INT off0, off1, st;
2398 tree orig_expr = expr;
2399
2400 STRIP_NOPS (expr);
2401
2402 type = TREE_TYPE (expr);
2403 code = TREE_CODE (expr);
2404 *offset = 0;
2405
2406 switch (code)
2407 {
2408 case INTEGER_CST:
2409 if (!cst_and_fits_in_hwi (expr)
2410 || integer_zerop (expr))
2411 return orig_expr;
2412
2413 *offset = int_cst_value (expr);
2414 return build_int_cst (orig_type, 0);
2415
2416 case POINTER_PLUS_EXPR:
2417 case PLUS_EXPR:
2418 case MINUS_EXPR:
2419 op0 = TREE_OPERAND (expr, 0);
2420 op1 = TREE_OPERAND (expr, 1);
2421
2422 op0 = strip_offset_1 (op0, false, false, &off0);
2423 op1 = strip_offset_1 (op1, false, false, &off1);
2424
2425 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2426 if (op0 == TREE_OPERAND (expr, 0)
2427 && op1 == TREE_OPERAND (expr, 1))
2428 return orig_expr;
2429
2430 if (integer_zerop (op1))
2431 expr = op0;
2432 else if (integer_zerop (op0))
2433 {
2434 if (code == MINUS_EXPR)
2435 expr = fold_build1 (NEGATE_EXPR, type, op1);
2436 else
2437 expr = op1;
2438 }
2439 else
2440 expr = fold_build2 (code, type, op0, op1);
2441
2442 return fold_convert (orig_type, expr);
2443
2444 case MULT_EXPR:
2445 op1 = TREE_OPERAND (expr, 1);
2446 if (!cst_and_fits_in_hwi (op1))
2447 return orig_expr;
2448
2449 op0 = TREE_OPERAND (expr, 0);
2450 op0 = strip_offset_1 (op0, false, false, &off0);
2451 if (op0 == TREE_OPERAND (expr, 0))
2452 return orig_expr;
2453
2454 *offset = off0 * int_cst_value (op1);
2455 if (integer_zerop (op0))
2456 expr = op0;
2457 else
2458 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2459
2460 return fold_convert (orig_type, expr);
2461
2462 case ARRAY_REF:
2463 case ARRAY_RANGE_REF:
2464 if (!inside_addr)
2465 return orig_expr;
2466
2467 step = array_ref_element_size (expr);
2468 if (!cst_and_fits_in_hwi (step))
2469 break;
2470
2471 st = int_cst_value (step);
2472 op1 = TREE_OPERAND (expr, 1);
2473 op1 = strip_offset_1 (op1, false, false, &off1);
2474 *offset = off1 * st;
2475
2476 if (top_compref
2477 && integer_zerop (op1))
2478 {
2479 /* Strip the component reference completely. */
2480 op0 = TREE_OPERAND (expr, 0);
2481 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2482 *offset += off0;
2483 return op0;
2484 }
2485 break;
2486
2487 case COMPONENT_REF:
2488 {
2489 tree field;
2490
2491 if (!inside_addr)
2492 return orig_expr;
2493
2494 tmp = component_ref_field_offset (expr);
2495 field = TREE_OPERAND (expr, 1);
2496 if (top_compref
2497 && cst_and_fits_in_hwi (tmp)
2498 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2499 {
2500 HOST_WIDE_INT boffset, abs_off;
2501
2502 /* Strip the component reference completely. */
2503 op0 = TREE_OPERAND (expr, 0);
2504 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2505 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2506 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2507 if (boffset < 0)
2508 abs_off = -abs_off;
2509
2510 *offset = off0 + int_cst_value (tmp) + abs_off;
2511 return op0;
2512 }
2513 }
2514 break;
2515
2516 case ADDR_EXPR:
2517 op0 = TREE_OPERAND (expr, 0);
2518 op0 = strip_offset_1 (op0, true, true, &off0);
2519 *offset += off0;
2520
2521 if (op0 == TREE_OPERAND (expr, 0))
2522 return orig_expr;
2523
2524 expr = build_fold_addr_expr (op0);
2525 return fold_convert (orig_type, expr);
2526
2527 case MEM_REF:
2528 /* ??? Offset operand? */
2529 inside_addr = false;
2530 break;
2531
2532 default:
2533 return orig_expr;
2534 }
2535
2536 /* Default handling of expressions for that we want to recurse into
2537 the first operand. */
2538 op0 = TREE_OPERAND (expr, 0);
2539 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2540 *offset += off0;
2541
2542 if (op0 == TREE_OPERAND (expr, 0)
2543 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2544 return orig_expr;
2545
2546 expr = copy_node (expr);
2547 TREE_OPERAND (expr, 0) = op0;
2548 if (op1)
2549 TREE_OPERAND (expr, 1) = op1;
2550
2551 /* Inside address, we might strip the top level component references,
2552 thus changing type of the expression. Handling of ADDR_EXPR
2553 will fix that. */
2554 expr = fold_convert (orig_type, expr);
2555
2556 return expr;
2557 }
2558
2559 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2560
2561 static tree
2562 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2563 {
2564 HOST_WIDE_INT off;
2565 tree core = strip_offset_1 (expr, false, false, &off);
2566 *offset = off;
2567 return core;
2568 }
2569
2570 /* Returns variant of TYPE that can be used as base for different uses.
2571 We return unsigned type with the same precision, which avoids problems
2572 with overflows. */
2573
2574 static tree
2575 generic_type_for (tree type)
2576 {
2577 if (POINTER_TYPE_P (type))
2578 return unsigned_type_for (type);
2579
2580 if (TYPE_UNSIGNED (type))
2581 return type;
2582
2583 return unsigned_type_for (type);
2584 }
2585
2586 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2587 the bitmap to that we should store it. */
2588
2589 static struct ivopts_data *fd_ivopts_data;
2590 static tree
2591 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2592 {
2593 bitmap *depends_on = (bitmap *) data;
2594 struct version_info *info;
2595
2596 if (TREE_CODE (*expr_p) != SSA_NAME)
2597 return NULL_TREE;
2598 info = name_info (fd_ivopts_data, *expr_p);
2599
2600 if (!info->inv_id || info->has_nonlin_use)
2601 return NULL_TREE;
2602
2603 if (!*depends_on)
2604 *depends_on = BITMAP_ALLOC (NULL);
2605 bitmap_set_bit (*depends_on, info->inv_id);
2606
2607 return NULL_TREE;
2608 }
2609
2610 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2611 position to POS. If USE is not NULL, the candidate is set as related to
2612 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2613 replacement of the final value of the iv by a direct computation. */
2614
2615 static struct iv_cand *
2616 add_candidate_1 (struct ivopts_data *data,
2617 tree base, tree step, bool important, enum iv_position pos,
2618 struct iv_use *use, gimple incremented_at)
2619 {
2620 unsigned i;
2621 struct iv_cand *cand = NULL;
2622 tree type, orig_type;
2623
2624 /* For non-original variables, make sure their values are computed in a type
2625 that does not invoke undefined behavior on overflows (since in general,
2626 we cannot prove that these induction variables are non-wrapping). */
2627 if (pos != IP_ORIGINAL)
2628 {
2629 orig_type = TREE_TYPE (base);
2630 type = generic_type_for (orig_type);
2631 if (type != orig_type)
2632 {
2633 base = fold_convert (type, base);
2634 step = fold_convert (type, step);
2635 }
2636 }
2637
2638 for (i = 0; i < n_iv_cands (data); i++)
2639 {
2640 cand = iv_cand (data, i);
2641
2642 if (cand->pos != pos)
2643 continue;
2644
2645 if (cand->incremented_at != incremented_at
2646 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2647 && cand->ainc_use != use))
2648 continue;
2649
2650 if (!cand->iv)
2651 {
2652 if (!base && !step)
2653 break;
2654
2655 continue;
2656 }
2657
2658 if (!base && !step)
2659 continue;
2660
2661 if (operand_equal_p (base, cand->iv->base, 0)
2662 && operand_equal_p (step, cand->iv->step, 0)
2663 && (TYPE_PRECISION (TREE_TYPE (base))
2664 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2665 break;
2666 }
2667
2668 if (i == n_iv_cands (data))
2669 {
2670 cand = XCNEW (struct iv_cand);
2671 cand->id = i;
2672
2673 if (!base && !step)
2674 cand->iv = NULL;
2675 else
2676 cand->iv = alloc_iv (data, base, step);
2677
2678 cand->pos = pos;
2679 if (pos != IP_ORIGINAL && cand->iv)
2680 {
2681 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2682 cand->var_after = cand->var_before;
2683 }
2684 cand->important = important;
2685 cand->incremented_at = incremented_at;
2686 data->iv_candidates.safe_push (cand);
2687
2688 if (step
2689 && TREE_CODE (step) != INTEGER_CST)
2690 {
2691 fd_ivopts_data = data;
2692 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2693 }
2694
2695 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2696 cand->ainc_use = use;
2697 else
2698 cand->ainc_use = NULL;
2699
2700 if (dump_file && (dump_flags & TDF_DETAILS))
2701 dump_cand (dump_file, cand);
2702 }
2703
2704 if (important && !cand->important)
2705 {
2706 cand->important = true;
2707 if (dump_file && (dump_flags & TDF_DETAILS))
2708 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2709 }
2710
2711 if (use)
2712 {
2713 bitmap_set_bit (use->related_cands, i);
2714 if (dump_file && (dump_flags & TDF_DETAILS))
2715 fprintf (dump_file, "Candidate %d is related to use %d\n",
2716 cand->id, use->id);
2717 }
2718
2719 return cand;
2720 }
2721
2722 /* Returns true if incrementing the induction variable at the end of the LOOP
2723 is allowed.
2724
2725 The purpose is to avoid splitting latch edge with a biv increment, thus
2726 creating a jump, possibly confusing other optimization passes and leaving
2727 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2728 is not available (so we do not have a better alternative), or if the latch
2729 edge is already nonempty. */
2730
2731 static bool
2732 allow_ip_end_pos_p (struct loop *loop)
2733 {
2734 if (!ip_normal_pos (loop))
2735 return true;
2736
2737 if (!empty_block_p (ip_end_pos (loop)))
2738 return true;
2739
2740 return false;
2741 }
2742
2743 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2744 Important field is set to IMPORTANT. */
2745
2746 static void
2747 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2748 bool important, struct iv_use *use)
2749 {
2750 basic_block use_bb = gimple_bb (use->stmt);
2751 machine_mode mem_mode;
2752 unsigned HOST_WIDE_INT cstepi;
2753
2754 /* If we insert the increment in any position other than the standard
2755 ones, we must ensure that it is incremented once per iteration.
2756 It must not be in an inner nested loop, or one side of an if
2757 statement. */
2758 if (use_bb->loop_father != data->current_loop
2759 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2760 || stmt_could_throw_p (use->stmt)
2761 || !cst_and_fits_in_hwi (step))
2762 return;
2763
2764 cstepi = int_cst_value (step);
2765
2766 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2767 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2768 || USE_STORE_PRE_INCREMENT (mem_mode))
2769 && GET_MODE_SIZE (mem_mode) == cstepi)
2770 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2771 || USE_STORE_PRE_DECREMENT (mem_mode))
2772 && GET_MODE_SIZE (mem_mode) == -cstepi))
2773 {
2774 enum tree_code code = MINUS_EXPR;
2775 tree new_base;
2776 tree new_step = step;
2777
2778 if (POINTER_TYPE_P (TREE_TYPE (base)))
2779 {
2780 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2781 code = POINTER_PLUS_EXPR;
2782 }
2783 else
2784 new_step = fold_convert (TREE_TYPE (base), new_step);
2785 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2786 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2787 use->stmt);
2788 }
2789 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2790 || USE_STORE_POST_INCREMENT (mem_mode))
2791 && GET_MODE_SIZE (mem_mode) == cstepi)
2792 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2793 || USE_STORE_POST_DECREMENT (mem_mode))
2794 && GET_MODE_SIZE (mem_mode) == -cstepi))
2795 {
2796 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2797 use->stmt);
2798 }
2799 }
2800
2801 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2802 position to POS. If USE is not NULL, the candidate is set as related to
2803 it. The candidate computation is scheduled on all available positions. */
2804
2805 static void
2806 add_candidate (struct ivopts_data *data,
2807 tree base, tree step, bool important, struct iv_use *use)
2808 {
2809 gcc_assert (use == NULL || use->sub_id == 0);
2810
2811 if (ip_normal_pos (data->current_loop))
2812 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2813 if (ip_end_pos (data->current_loop)
2814 && allow_ip_end_pos_p (data->current_loop))
2815 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2816
2817 if (use != NULL && use->type == USE_ADDRESS)
2818 add_autoinc_candidates (data, base, step, important, use);
2819 }
2820
2821 /* Adds standard iv candidates. */
2822
2823 static void
2824 add_standard_iv_candidates (struct ivopts_data *data)
2825 {
2826 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2827
2828 /* The same for a double-integer type if it is still fast enough. */
2829 if (TYPE_PRECISION
2830 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2831 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2832 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2833 build_int_cst (long_integer_type_node, 1), true, NULL);
2834
2835 /* The same for a double-integer type if it is still fast enough. */
2836 if (TYPE_PRECISION
2837 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2838 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2839 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2840 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2841 }
2842
2843
2844 /* Adds candidates bases on the old induction variable IV. */
2845
2846 static void
2847 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2848 {
2849 gimple phi;
2850 tree def;
2851 struct iv_cand *cand;
2852
2853 add_candidate (data, iv->base, iv->step, true, NULL);
2854
2855 /* The same, but with initial value zero. */
2856 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2857 add_candidate (data, size_int (0), iv->step, true, NULL);
2858 else
2859 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2860 iv->step, true, NULL);
2861
2862 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2863 if (gimple_code (phi) == GIMPLE_PHI)
2864 {
2865 /* Additionally record the possibility of leaving the original iv
2866 untouched. */
2867 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2868 /* Don't add candidate if it's from another PHI node because
2869 it's an affine iv appearing in the form of PEELED_CHREC. */
2870 phi = SSA_NAME_DEF_STMT (def);
2871 if (gimple_code (phi) != GIMPLE_PHI)
2872 {
2873 cand = add_candidate_1 (data,
2874 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2875 SSA_NAME_DEF_STMT (def));
2876 cand->var_before = iv->ssa_name;
2877 cand->var_after = def;
2878 }
2879 else
2880 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2881 }
2882 }
2883
2884 /* Adds candidates based on the old induction variables. */
2885
2886 static void
2887 add_old_ivs_candidates (struct ivopts_data *data)
2888 {
2889 unsigned i;
2890 struct iv *iv;
2891 bitmap_iterator bi;
2892
2893 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2894 {
2895 iv = ver_info (data, i)->iv;
2896 if (iv && iv->biv_p && !integer_zerop (iv->step))
2897 add_old_iv_candidates (data, iv);
2898 }
2899 }
2900
2901 /* Adds candidates based on the value of the induction variable IV and USE. */
2902
2903 static void
2904 add_iv_value_candidates (struct ivopts_data *data,
2905 struct iv *iv, struct iv_use *use)
2906 {
2907 unsigned HOST_WIDE_INT offset;
2908 tree base;
2909 tree basetype;
2910
2911 add_candidate (data, iv->base, iv->step, false, use);
2912
2913 /* The same, but with initial value zero. Make such variable important,
2914 since it is generic enough so that possibly many uses may be based
2915 on it. */
2916 basetype = TREE_TYPE (iv->base);
2917 if (POINTER_TYPE_P (basetype))
2918 basetype = sizetype;
2919 add_candidate (data, build_int_cst (basetype, 0),
2920 iv->step, true, use);
2921
2922 /* Third, try removing the constant offset. Make sure to even
2923 add a candidate for &a[0] vs. (T *)&a. */
2924 base = strip_offset (iv->base, &offset);
2925 if (offset
2926 || base != iv->base)
2927 add_candidate (data, base, iv->step, false, use);
2928 }
2929
2930 /* Adds candidates based on the uses. */
2931
2932 static void
2933 add_derived_ivs_candidates (struct ivopts_data *data)
2934 {
2935 unsigned i;
2936
2937 for (i = 0; i < n_iv_uses (data); i++)
2938 {
2939 struct iv_use *use = iv_use (data, i);
2940
2941 if (!use)
2942 continue;
2943
2944 switch (use->type)
2945 {
2946 case USE_NONLINEAR_EXPR:
2947 case USE_COMPARE:
2948 case USE_ADDRESS:
2949 /* Just add the ivs based on the value of the iv used here. */
2950 add_iv_value_candidates (data, use->iv, use);
2951 break;
2952
2953 default:
2954 gcc_unreachable ();
2955 }
2956 }
2957 }
2958
2959 /* Record important candidates and add them to related_cands bitmaps
2960 if needed. */
2961
2962 static void
2963 record_important_candidates (struct ivopts_data *data)
2964 {
2965 unsigned i;
2966 struct iv_use *use;
2967
2968 for (i = 0; i < n_iv_cands (data); i++)
2969 {
2970 struct iv_cand *cand = iv_cand (data, i);
2971
2972 if (cand->important)
2973 bitmap_set_bit (data->important_candidates, i);
2974 }
2975
2976 data->consider_all_candidates = (n_iv_cands (data)
2977 <= CONSIDER_ALL_CANDIDATES_BOUND);
2978
2979 if (data->consider_all_candidates)
2980 {
2981 /* We will not need "related_cands" bitmaps in this case,
2982 so release them to decrease peak memory consumption. */
2983 for (i = 0; i < n_iv_uses (data); i++)
2984 {
2985 use = iv_use (data, i);
2986 BITMAP_FREE (use->related_cands);
2987 }
2988 }
2989 else
2990 {
2991 /* Add important candidates to the related_cands bitmaps. */
2992 for (i = 0; i < n_iv_uses (data); i++)
2993 bitmap_ior_into (iv_use (data, i)->related_cands,
2994 data->important_candidates);
2995 }
2996 }
2997
2998 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2999 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3000 we allocate a simple list to every use. */
3001
3002 static void
3003 alloc_use_cost_map (struct ivopts_data *data)
3004 {
3005 unsigned i, size, s;
3006
3007 for (i = 0; i < n_iv_uses (data); i++)
3008 {
3009 struct iv_use *use = iv_use (data, i);
3010
3011 if (data->consider_all_candidates)
3012 size = n_iv_cands (data);
3013 else
3014 {
3015 s = bitmap_count_bits (use->related_cands);
3016
3017 /* Round up to the power of two, so that moduling by it is fast. */
3018 size = s ? (1 << ceil_log2 (s)) : 1;
3019 }
3020
3021 use->n_map_members = size;
3022 use->cost_map = XCNEWVEC (struct cost_pair, size);
3023 }
3024 }
3025
3026 /* Returns description of computation cost of expression whose runtime
3027 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3028
3029 static comp_cost
3030 new_cost (unsigned runtime, unsigned complexity)
3031 {
3032 comp_cost cost;
3033
3034 cost.cost = runtime;
3035 cost.complexity = complexity;
3036
3037 return cost;
3038 }
3039
3040 /* Returns true if COST is infinite. */
3041
3042 static bool
3043 infinite_cost_p (comp_cost cost)
3044 {
3045 return cost.cost == INFTY;
3046 }
3047
3048 /* Adds costs COST1 and COST2. */
3049
3050 static comp_cost
3051 add_costs (comp_cost cost1, comp_cost cost2)
3052 {
3053 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3054 return infinite_cost;
3055
3056 cost1.cost += cost2.cost;
3057 cost1.complexity += cost2.complexity;
3058
3059 return cost1;
3060 }
3061 /* Subtracts costs COST1 and COST2. */
3062
3063 static comp_cost
3064 sub_costs (comp_cost cost1, comp_cost cost2)
3065 {
3066 cost1.cost -= cost2.cost;
3067 cost1.complexity -= cost2.complexity;
3068
3069 return cost1;
3070 }
3071
3072 /* Returns a negative number if COST1 < COST2, a positive number if
3073 COST1 > COST2, and 0 if COST1 = COST2. */
3074
3075 static int
3076 compare_costs (comp_cost cost1, comp_cost cost2)
3077 {
3078 if (cost1.cost == cost2.cost)
3079 return cost1.complexity - cost2.complexity;
3080
3081 return cost1.cost - cost2.cost;
3082 }
3083
3084 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3085 on invariants DEPENDS_ON and that the value used in expressing it
3086 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3087
3088 static void
3089 set_use_iv_cost (struct ivopts_data *data,
3090 struct iv_use *use, struct iv_cand *cand,
3091 comp_cost cost, bitmap depends_on, tree value,
3092 enum tree_code comp, int inv_expr_id)
3093 {
3094 unsigned i, s;
3095
3096 if (infinite_cost_p (cost))
3097 {
3098 BITMAP_FREE (depends_on);
3099 return;
3100 }
3101
3102 if (data->consider_all_candidates)
3103 {
3104 use->cost_map[cand->id].cand = cand;
3105 use->cost_map[cand->id].cost = cost;
3106 use->cost_map[cand->id].depends_on = depends_on;
3107 use->cost_map[cand->id].value = value;
3108 use->cost_map[cand->id].comp = comp;
3109 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3110 return;
3111 }
3112
3113 /* n_map_members is a power of two, so this computes modulo. */
3114 s = cand->id & (use->n_map_members - 1);
3115 for (i = s; i < use->n_map_members; i++)
3116 if (!use->cost_map[i].cand)
3117 goto found;
3118 for (i = 0; i < s; i++)
3119 if (!use->cost_map[i].cand)
3120 goto found;
3121
3122 gcc_unreachable ();
3123
3124 found:
3125 use->cost_map[i].cand = cand;
3126 use->cost_map[i].cost = cost;
3127 use->cost_map[i].depends_on = depends_on;
3128 use->cost_map[i].value = value;
3129 use->cost_map[i].comp = comp;
3130 use->cost_map[i].inv_expr_id = inv_expr_id;
3131 }
3132
3133 /* Gets cost of (USE, CANDIDATE) pair. */
3134
3135 static struct cost_pair *
3136 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3137 struct iv_cand *cand)
3138 {
3139 unsigned i, s;
3140 struct cost_pair *ret;
3141
3142 if (!cand)
3143 return NULL;
3144
3145 if (data->consider_all_candidates)
3146 {
3147 ret = use->cost_map + cand->id;
3148 if (!ret->cand)
3149 return NULL;
3150
3151 return ret;
3152 }
3153
3154 /* n_map_members is a power of two, so this computes modulo. */
3155 s = cand->id & (use->n_map_members - 1);
3156 for (i = s; i < use->n_map_members; i++)
3157 if (use->cost_map[i].cand == cand)
3158 return use->cost_map + i;
3159 else if (use->cost_map[i].cand == NULL)
3160 return NULL;
3161 for (i = 0; i < s; i++)
3162 if (use->cost_map[i].cand == cand)
3163 return use->cost_map + i;
3164 else if (use->cost_map[i].cand == NULL)
3165 return NULL;
3166
3167 return NULL;
3168 }
3169
3170 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3171 static rtx
3172 produce_memory_decl_rtl (tree obj, int *regno)
3173 {
3174 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3175 machine_mode address_mode = targetm.addr_space.address_mode (as);
3176 rtx x;
3177
3178 gcc_assert (obj);
3179 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3180 {
3181 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3182 x = gen_rtx_SYMBOL_REF (address_mode, name);
3183 SET_SYMBOL_REF_DECL (x, obj);
3184 x = gen_rtx_MEM (DECL_MODE (obj), x);
3185 set_mem_addr_space (x, as);
3186 targetm.encode_section_info (obj, x, true);
3187 }
3188 else
3189 {
3190 x = gen_raw_REG (address_mode, (*regno)++);
3191 x = gen_rtx_MEM (DECL_MODE (obj), x);
3192 set_mem_addr_space (x, as);
3193 }
3194
3195 return x;
3196 }
3197
3198 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3199 walk_tree. DATA contains the actual fake register number. */
3200
3201 static tree
3202 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3203 {
3204 tree obj = NULL_TREE;
3205 rtx x = NULL_RTX;
3206 int *regno = (int *) data;
3207
3208 switch (TREE_CODE (*expr_p))
3209 {
3210 case ADDR_EXPR:
3211 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3212 handled_component_p (*expr_p);
3213 expr_p = &TREE_OPERAND (*expr_p, 0))
3214 continue;
3215 obj = *expr_p;
3216 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3217 x = produce_memory_decl_rtl (obj, regno);
3218 break;
3219
3220 case SSA_NAME:
3221 *ws = 0;
3222 obj = SSA_NAME_VAR (*expr_p);
3223 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3224 if (!obj)
3225 return NULL_TREE;
3226 if (!DECL_RTL_SET_P (obj))
3227 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3228 break;
3229
3230 case VAR_DECL:
3231 case PARM_DECL:
3232 case RESULT_DECL:
3233 *ws = 0;
3234 obj = *expr_p;
3235
3236 if (DECL_RTL_SET_P (obj))
3237 break;
3238
3239 if (DECL_MODE (obj) == BLKmode)
3240 x = produce_memory_decl_rtl (obj, regno);
3241 else
3242 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3243
3244 break;
3245
3246 default:
3247 break;
3248 }
3249
3250 if (x)
3251 {
3252 decl_rtl_to_reset.safe_push (obj);
3253 SET_DECL_RTL (obj, x);
3254 }
3255
3256 return NULL_TREE;
3257 }
3258
3259 /* Determines cost of the computation of EXPR. */
3260
3261 static unsigned
3262 computation_cost (tree expr, bool speed)
3263 {
3264 rtx_insn *seq;
3265 rtx rslt;
3266 tree type = TREE_TYPE (expr);
3267 unsigned cost;
3268 /* Avoid using hard regs in ways which may be unsupported. */
3269 int regno = LAST_VIRTUAL_REGISTER + 1;
3270 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3271 enum node_frequency real_frequency = node->frequency;
3272
3273 node->frequency = NODE_FREQUENCY_NORMAL;
3274 crtl->maybe_hot_insn_p = speed;
3275 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3276 start_sequence ();
3277 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3278 seq = get_insns ();
3279 end_sequence ();
3280 default_rtl_profile ();
3281 node->frequency = real_frequency;
3282
3283 cost = seq_cost (seq, speed);
3284 if (MEM_P (rslt))
3285 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3286 TYPE_ADDR_SPACE (type), speed);
3287 else if (!REG_P (rslt))
3288 cost += set_src_cost (rslt, speed);
3289
3290 return cost;
3291 }
3292
3293 /* Returns variable containing the value of candidate CAND at statement AT. */
3294
3295 static tree
3296 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3297 {
3298 if (stmt_after_increment (loop, cand, stmt))
3299 return cand->var_after;
3300 else
3301 return cand->var_before;
3302 }
3303
3304 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3305 same precision that is at least as wide as the precision of TYPE, stores
3306 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3307 type of A and B. */
3308
3309 static tree
3310 determine_common_wider_type (tree *a, tree *b)
3311 {
3312 tree wider_type = NULL;
3313 tree suba, subb;
3314 tree atype = TREE_TYPE (*a);
3315
3316 if (CONVERT_EXPR_P (*a))
3317 {
3318 suba = TREE_OPERAND (*a, 0);
3319 wider_type = TREE_TYPE (suba);
3320 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3321 return atype;
3322 }
3323 else
3324 return atype;
3325
3326 if (CONVERT_EXPR_P (*b))
3327 {
3328 subb = TREE_OPERAND (*b, 0);
3329 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3330 return atype;
3331 }
3332 else
3333 return atype;
3334
3335 *a = suba;
3336 *b = subb;
3337 return wider_type;
3338 }
3339
3340 /* Determines the expression by that USE is expressed from induction variable
3341 CAND at statement AT in LOOP. The expression is stored in a decomposed
3342 form into AFF. Returns false if USE cannot be expressed using CAND. */
3343
3344 static bool
3345 get_computation_aff (struct loop *loop,
3346 struct iv_use *use, struct iv_cand *cand, gimple at,
3347 struct aff_tree *aff)
3348 {
3349 tree ubase = use->iv->base;
3350 tree ustep = use->iv->step;
3351 tree cbase = cand->iv->base;
3352 tree cstep = cand->iv->step, cstep_common;
3353 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3354 tree common_type, var;
3355 tree uutype;
3356 aff_tree cbase_aff, var_aff;
3357 widest_int rat;
3358
3359 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3360 {
3361 /* We do not have a precision to express the values of use. */
3362 return false;
3363 }
3364
3365 var = var_at_stmt (loop, cand, at);
3366 uutype = unsigned_type_for (utype);
3367
3368 /* If the conversion is not noop, perform it. */
3369 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3370 {
3371 cstep = fold_convert (uutype, cstep);
3372 cbase = fold_convert (uutype, cbase);
3373 var = fold_convert (uutype, var);
3374 }
3375
3376 if (!constant_multiple_of (ustep, cstep, &rat))
3377 return false;
3378
3379 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3380 type, we achieve better folding by computing their difference in this
3381 wider type, and cast the result to UUTYPE. We do not need to worry about
3382 overflows, as all the arithmetics will in the end be performed in UUTYPE
3383 anyway. */
3384 common_type = determine_common_wider_type (&ubase, &cbase);
3385
3386 /* use = ubase - ratio * cbase + ratio * var. */
3387 tree_to_aff_combination (ubase, common_type, aff);
3388 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3389 tree_to_aff_combination (var, uutype, &var_aff);
3390
3391 /* We need to shift the value if we are after the increment. */
3392 if (stmt_after_increment (loop, cand, at))
3393 {
3394 aff_tree cstep_aff;
3395
3396 if (common_type != uutype)
3397 cstep_common = fold_convert (common_type, cstep);
3398 else
3399 cstep_common = cstep;
3400
3401 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3402 aff_combination_add (&cbase_aff, &cstep_aff);
3403 }
3404
3405 aff_combination_scale (&cbase_aff, -rat);
3406 aff_combination_add (aff, &cbase_aff);
3407 if (common_type != uutype)
3408 aff_combination_convert (aff, uutype);
3409
3410 aff_combination_scale (&var_aff, rat);
3411 aff_combination_add (aff, &var_aff);
3412
3413 return true;
3414 }
3415
3416 /* Return the type of USE. */
3417
3418 static tree
3419 get_use_type (struct iv_use *use)
3420 {
3421 tree base_type = TREE_TYPE (use->iv->base);
3422 tree type;
3423
3424 if (use->type == USE_ADDRESS)
3425 {
3426 /* The base_type may be a void pointer. Create a pointer type based on
3427 the mem_ref instead. */
3428 type = build_pointer_type (TREE_TYPE (*use->op_p));
3429 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3430 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3431 }
3432 else
3433 type = base_type;
3434
3435 return type;
3436 }
3437
3438 /* Determines the expression by that USE is expressed from induction variable
3439 CAND at statement AT in LOOP. The computation is unshared. */
3440
3441 static tree
3442 get_computation_at (struct loop *loop,
3443 struct iv_use *use, struct iv_cand *cand, gimple at)
3444 {
3445 aff_tree aff;
3446 tree type = get_use_type (use);
3447
3448 if (!get_computation_aff (loop, use, cand, at, &aff))
3449 return NULL_TREE;
3450 unshare_aff_combination (&aff);
3451 return fold_convert (type, aff_combination_to_tree (&aff));
3452 }
3453
3454 /* Determines the expression by that USE is expressed from induction variable
3455 CAND in LOOP. The computation is unshared. */
3456
3457 static tree
3458 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3459 {
3460 return get_computation_at (loop, use, cand, use->stmt);
3461 }
3462
3463 /* Adjust the cost COST for being in loop setup rather than loop body.
3464 If we're optimizing for space, the loop setup overhead is constant;
3465 if we're optimizing for speed, amortize it over the per-iteration cost. */
3466 static unsigned
3467 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3468 {
3469 if (cost == INFTY)
3470 return cost;
3471 else if (optimize_loop_for_speed_p (data->current_loop))
3472 return cost / avg_loop_niter (data->current_loop);
3473 else
3474 return cost;
3475 }
3476
3477 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3478 validity for a memory reference accessing memory of mode MODE in
3479 address space AS. */
3480
3481
3482 bool
3483 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3484 addr_space_t as)
3485 {
3486 #define MAX_RATIO 128
3487 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3488 static vec<sbitmap> valid_mult_list;
3489 sbitmap valid_mult;
3490
3491 if (data_index >= valid_mult_list.length ())
3492 valid_mult_list.safe_grow_cleared (data_index + 1);
3493
3494 valid_mult = valid_mult_list[data_index];
3495 if (!valid_mult)
3496 {
3497 machine_mode address_mode = targetm.addr_space.address_mode (as);
3498 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3499 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3500 rtx addr, scaled;
3501 HOST_WIDE_INT i;
3502
3503 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3504 bitmap_clear (valid_mult);
3505 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3506 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3507 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3508 {
3509 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3510 if (memory_address_addr_space_p (mode, addr, as)
3511 || memory_address_addr_space_p (mode, scaled, as))
3512 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3513 }
3514
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3516 {
3517 fprintf (dump_file, " allowed multipliers:");
3518 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3519 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3520 fprintf (dump_file, " %d", (int) i);
3521 fprintf (dump_file, "\n");
3522 fprintf (dump_file, "\n");
3523 }
3524
3525 valid_mult_list[data_index] = valid_mult;
3526 }
3527
3528 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3529 return false;
3530
3531 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3532 }
3533
3534 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3535 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3536 variable is omitted. Compute the cost for a memory reference that accesses
3537 a memory location of mode MEM_MODE in address space AS.
3538
3539 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3540 size of MEM_MODE / RATIO) is available. To make this determination, we
3541 look at the size of the increment to be made, which is given in CSTEP.
3542 CSTEP may be zero if the step is unknown.
3543 STMT_AFTER_INC is true iff the statement we're looking at is after the
3544 increment of the original biv.
3545
3546 TODO -- there must be some better way. This all is quite crude. */
3547
3548 enum ainc_type
3549 {
3550 AINC_PRE_INC, /* Pre increment. */
3551 AINC_PRE_DEC, /* Pre decrement. */
3552 AINC_POST_INC, /* Post increment. */
3553 AINC_POST_DEC, /* Post decrement. */
3554 AINC_NONE /* Also the number of auto increment types. */
3555 };
3556
3557 typedef struct address_cost_data_s
3558 {
3559 HOST_WIDE_INT min_offset, max_offset;
3560 unsigned costs[2][2][2][2];
3561 unsigned ainc_costs[AINC_NONE];
3562 } *address_cost_data;
3563
3564
3565 static comp_cost
3566 get_address_cost (bool symbol_present, bool var_present,
3567 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3568 HOST_WIDE_INT cstep, machine_mode mem_mode,
3569 addr_space_t as, bool speed,
3570 bool stmt_after_inc, bool *may_autoinc)
3571 {
3572 machine_mode address_mode = targetm.addr_space.address_mode (as);
3573 static vec<address_cost_data> address_cost_data_list;
3574 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3575 address_cost_data data;
3576 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3577 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3578 unsigned cost, acost, complexity;
3579 enum ainc_type autoinc_type;
3580 bool offset_p, ratio_p, autoinc;
3581 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3582 unsigned HOST_WIDE_INT mask;
3583 unsigned bits;
3584
3585 if (data_index >= address_cost_data_list.length ())
3586 address_cost_data_list.safe_grow_cleared (data_index + 1);
3587
3588 data = address_cost_data_list[data_index];
3589 if (!data)
3590 {
3591 HOST_WIDE_INT i;
3592 HOST_WIDE_INT rat, off = 0;
3593 int old_cse_not_expected, width;
3594 unsigned sym_p, var_p, off_p, rat_p, add_c;
3595 rtx_insn *seq;
3596 rtx addr, base;
3597 rtx reg0, reg1;
3598
3599 data = (address_cost_data) xcalloc (1, sizeof (*data));
3600
3601 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3602
3603 width = GET_MODE_BITSIZE (address_mode) - 1;
3604 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3605 width = HOST_BITS_PER_WIDE_INT - 1;
3606 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3607
3608 for (i = width; i >= 0; i--)
3609 {
3610 off = -((unsigned HOST_WIDE_INT) 1 << i);
3611 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3612 if (memory_address_addr_space_p (mem_mode, addr, as))
3613 break;
3614 }
3615 data->min_offset = (i == -1? 0 : off);
3616
3617 for (i = width; i >= 0; i--)
3618 {
3619 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3620 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3621 if (memory_address_addr_space_p (mem_mode, addr, as))
3622 break;
3623 /* For some strict-alignment targets, the offset must be naturally
3624 aligned. Try an aligned offset if mem_mode is not QImode. */
3625 off = mem_mode != QImode
3626 ? ((unsigned HOST_WIDE_INT) 1 << i)
3627 - GET_MODE_SIZE (mem_mode)
3628 : 0;
3629 if (off > 0)
3630 {
3631 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3632 if (memory_address_addr_space_p (mem_mode, addr, as))
3633 break;
3634 }
3635 }
3636 if (i == -1)
3637 off = 0;
3638 data->max_offset = off;
3639
3640 if (dump_file && (dump_flags & TDF_DETAILS))
3641 {
3642 fprintf (dump_file, "get_address_cost:\n");
3643 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3644 GET_MODE_NAME (mem_mode),
3645 data->min_offset);
3646 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3647 GET_MODE_NAME (mem_mode),
3648 data->max_offset);
3649 }
3650
3651 rat = 1;
3652 for (i = 2; i <= MAX_RATIO; i++)
3653 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3654 {
3655 rat = i;
3656 break;
3657 }
3658
3659 /* Compute the cost of various addressing modes. */
3660 acost = 0;
3661 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3662 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3663
3664 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3665 || USE_STORE_PRE_DECREMENT (mem_mode))
3666 {
3667 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3668 has_predec[mem_mode]
3669 = memory_address_addr_space_p (mem_mode, addr, as);
3670
3671 if (has_predec[mem_mode])
3672 data->ainc_costs[AINC_PRE_DEC]
3673 = address_cost (addr, mem_mode, as, speed);
3674 }
3675 if (USE_LOAD_POST_DECREMENT (mem_mode)
3676 || USE_STORE_POST_DECREMENT (mem_mode))
3677 {
3678 addr = gen_rtx_POST_DEC (address_mode, reg0);
3679 has_postdec[mem_mode]
3680 = memory_address_addr_space_p (mem_mode, addr, as);
3681
3682 if (has_postdec[mem_mode])
3683 data->ainc_costs[AINC_POST_DEC]
3684 = address_cost (addr, mem_mode, as, speed);
3685 }
3686 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3687 || USE_STORE_PRE_DECREMENT (mem_mode))
3688 {
3689 addr = gen_rtx_PRE_INC (address_mode, reg0);
3690 has_preinc[mem_mode]
3691 = memory_address_addr_space_p (mem_mode, addr, as);
3692
3693 if (has_preinc[mem_mode])
3694 data->ainc_costs[AINC_PRE_INC]
3695 = address_cost (addr, mem_mode, as, speed);
3696 }
3697 if (USE_LOAD_POST_INCREMENT (mem_mode)
3698 || USE_STORE_POST_INCREMENT (mem_mode))
3699 {
3700 addr = gen_rtx_POST_INC (address_mode, reg0);
3701 has_postinc[mem_mode]
3702 = memory_address_addr_space_p (mem_mode, addr, as);
3703
3704 if (has_postinc[mem_mode])
3705 data->ainc_costs[AINC_POST_INC]
3706 = address_cost (addr, mem_mode, as, speed);
3707 }
3708 for (i = 0; i < 16; i++)
3709 {
3710 sym_p = i & 1;
3711 var_p = (i >> 1) & 1;
3712 off_p = (i >> 2) & 1;
3713 rat_p = (i >> 3) & 1;
3714
3715 addr = reg0;
3716 if (rat_p)
3717 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3718 gen_int_mode (rat, address_mode));
3719
3720 if (var_p)
3721 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3722
3723 if (sym_p)
3724 {
3725 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3726 /* ??? We can run into trouble with some backends by presenting
3727 it with symbols which haven't been properly passed through
3728 targetm.encode_section_info. By setting the local bit, we
3729 enhance the probability of things working. */
3730 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3731
3732 if (off_p)
3733 base = gen_rtx_fmt_e (CONST, address_mode,
3734 gen_rtx_fmt_ee
3735 (PLUS, address_mode, base,
3736 gen_int_mode (off, address_mode)));
3737 }
3738 else if (off_p)
3739 base = gen_int_mode (off, address_mode);
3740 else
3741 base = NULL_RTX;
3742
3743 if (base)
3744 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3745
3746 start_sequence ();
3747 /* To avoid splitting addressing modes, pretend that no cse will
3748 follow. */
3749 old_cse_not_expected = cse_not_expected;
3750 cse_not_expected = true;
3751 addr = memory_address_addr_space (mem_mode, addr, as);
3752 cse_not_expected = old_cse_not_expected;
3753 seq = get_insns ();
3754 end_sequence ();
3755
3756 acost = seq_cost (seq, speed);
3757 acost += address_cost (addr, mem_mode, as, speed);
3758
3759 if (!acost)
3760 acost = 1;
3761 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3762 }
3763
3764 /* On some targets, it is quite expensive to load symbol to a register,
3765 which makes addresses that contain symbols look much more expensive.
3766 However, the symbol will have to be loaded in any case before the
3767 loop (and quite likely we have it in register already), so it does not
3768 make much sense to penalize them too heavily. So make some final
3769 tweaks for the SYMBOL_PRESENT modes:
3770
3771 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3772 var is cheaper, use this mode with small penalty.
3773 If VAR_PRESENT is true, try whether the mode with
3774 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3775 if this is the case, use it. */
3776 add_c = add_cost (speed, address_mode);
3777 for (i = 0; i < 8; i++)
3778 {
3779 var_p = i & 1;
3780 off_p = (i >> 1) & 1;
3781 rat_p = (i >> 2) & 1;
3782
3783 acost = data->costs[0][1][off_p][rat_p] + 1;
3784 if (var_p)
3785 acost += add_c;
3786
3787 if (acost < data->costs[1][var_p][off_p][rat_p])
3788 data->costs[1][var_p][off_p][rat_p] = acost;
3789 }
3790
3791 if (dump_file && (dump_flags & TDF_DETAILS))
3792 {
3793 fprintf (dump_file, "Address costs:\n");
3794
3795 for (i = 0; i < 16; i++)
3796 {
3797 sym_p = i & 1;
3798 var_p = (i >> 1) & 1;
3799 off_p = (i >> 2) & 1;
3800 rat_p = (i >> 3) & 1;
3801
3802 fprintf (dump_file, " ");
3803 if (sym_p)
3804 fprintf (dump_file, "sym + ");
3805 if (var_p)
3806 fprintf (dump_file, "var + ");
3807 if (off_p)
3808 fprintf (dump_file, "cst + ");
3809 if (rat_p)
3810 fprintf (dump_file, "rat * ");
3811
3812 acost = data->costs[sym_p][var_p][off_p][rat_p];
3813 fprintf (dump_file, "index costs %d\n", acost);
3814 }
3815 if (has_predec[mem_mode] || has_postdec[mem_mode]
3816 || has_preinc[mem_mode] || has_postinc[mem_mode])
3817 fprintf (dump_file, " May include autoinc/dec\n");
3818 fprintf (dump_file, "\n");
3819 }
3820
3821 address_cost_data_list[data_index] = data;
3822 }
3823
3824 bits = GET_MODE_BITSIZE (address_mode);
3825 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3826 offset &= mask;
3827 if ((offset >> (bits - 1) & 1))
3828 offset |= ~mask;
3829 s_offset = offset;
3830
3831 autoinc = false;
3832 autoinc_type = AINC_NONE;
3833 msize = GET_MODE_SIZE (mem_mode);
3834 autoinc_offset = offset;
3835 if (stmt_after_inc)
3836 autoinc_offset += ratio * cstep;
3837 if (symbol_present || var_present || ratio != 1)
3838 autoinc = false;
3839 else
3840 {
3841 if (has_postinc[mem_mode] && autoinc_offset == 0
3842 && msize == cstep)
3843 autoinc_type = AINC_POST_INC;
3844 else if (has_postdec[mem_mode] && autoinc_offset == 0
3845 && msize == -cstep)
3846 autoinc_type = AINC_POST_DEC;
3847 else if (has_preinc[mem_mode] && autoinc_offset == msize
3848 && msize == cstep)
3849 autoinc_type = AINC_PRE_INC;
3850 else if (has_predec[mem_mode] && autoinc_offset == -msize
3851 && msize == -cstep)
3852 autoinc_type = AINC_PRE_DEC;
3853
3854 if (autoinc_type != AINC_NONE)
3855 autoinc = true;
3856 }
3857
3858 cost = 0;
3859 offset_p = (s_offset != 0
3860 && data->min_offset <= s_offset
3861 && s_offset <= data->max_offset);
3862 ratio_p = (ratio != 1
3863 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3864
3865 if (ratio != 1 && !ratio_p)
3866 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3867
3868 if (s_offset && !offset_p && !symbol_present)
3869 cost += add_cost (speed, address_mode);
3870
3871 if (may_autoinc)
3872 *may_autoinc = autoinc;
3873 if (autoinc)
3874 acost = data->ainc_costs[autoinc_type];
3875 else
3876 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3877 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3878 return new_cost (cost + acost, complexity);
3879 }
3880
3881 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3882 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3883 calculating the operands of EXPR. Returns true if successful, and returns
3884 the cost in COST. */
3885
3886 static bool
3887 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3888 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3889 {
3890 comp_cost res;
3891 tree op1 = TREE_OPERAND (expr, 1);
3892 tree cst = TREE_OPERAND (mult, 1);
3893 tree multop = TREE_OPERAND (mult, 0);
3894 int m = exact_log2 (int_cst_value (cst));
3895 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3896 int as_cost, sa_cost;
3897 bool mult_in_op1;
3898
3899 if (!(m >= 0 && m < maxm))
3900 return false;
3901
3902 mult_in_op1 = operand_equal_p (op1, mult, 0);
3903
3904 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3905
3906 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3907 use that in preference to a shift insn followed by an add insn. */
3908 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3909 ? shiftadd_cost (speed, mode, m)
3910 : (mult_in_op1
3911 ? shiftsub1_cost (speed, mode, m)
3912 : shiftsub0_cost (speed, mode, m)));
3913
3914 res = new_cost (MIN (as_cost, sa_cost), 0);
3915 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3916
3917 STRIP_NOPS (multop);
3918 if (!is_gimple_val (multop))
3919 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3920
3921 *cost = res;
3922 return true;
3923 }
3924
3925 /* Estimates cost of forcing expression EXPR into a variable. */
3926
3927 static comp_cost
3928 force_expr_to_var_cost (tree expr, bool speed)
3929 {
3930 static bool costs_initialized = false;
3931 static unsigned integer_cost [2];
3932 static unsigned symbol_cost [2];
3933 static unsigned address_cost [2];
3934 tree op0, op1;
3935 comp_cost cost0, cost1, cost;
3936 machine_mode mode;
3937
3938 if (!costs_initialized)
3939 {
3940 tree type = build_pointer_type (integer_type_node);
3941 tree var, addr;
3942 rtx x;
3943 int i;
3944
3945 var = create_tmp_var_raw (integer_type_node, "test_var");
3946 TREE_STATIC (var) = 1;
3947 x = produce_memory_decl_rtl (var, NULL);
3948 SET_DECL_RTL (var, x);
3949
3950 addr = build1 (ADDR_EXPR, type, var);
3951
3952
3953 for (i = 0; i < 2; i++)
3954 {
3955 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3956 2000), i);
3957
3958 symbol_cost[i] = computation_cost (addr, i) + 1;
3959
3960 address_cost[i]
3961 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3962 if (dump_file && (dump_flags & TDF_DETAILS))
3963 {
3964 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3965 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3966 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3967 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3968 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3969 fprintf (dump_file, "\n");
3970 }
3971 }
3972
3973 costs_initialized = true;
3974 }
3975
3976 STRIP_NOPS (expr);
3977
3978 if (SSA_VAR_P (expr))
3979 return no_cost;
3980
3981 if (is_gimple_min_invariant (expr))
3982 {
3983 if (TREE_CODE (expr) == INTEGER_CST)
3984 return new_cost (integer_cost [speed], 0);
3985
3986 if (TREE_CODE (expr) == ADDR_EXPR)
3987 {
3988 tree obj = TREE_OPERAND (expr, 0);
3989
3990 if (TREE_CODE (obj) == VAR_DECL
3991 || TREE_CODE (obj) == PARM_DECL
3992 || TREE_CODE (obj) == RESULT_DECL)
3993 return new_cost (symbol_cost [speed], 0);
3994 }
3995
3996 return new_cost (address_cost [speed], 0);
3997 }
3998
3999 switch (TREE_CODE (expr))
4000 {
4001 case POINTER_PLUS_EXPR:
4002 case PLUS_EXPR:
4003 case MINUS_EXPR:
4004 case MULT_EXPR:
4005 op0 = TREE_OPERAND (expr, 0);
4006 op1 = TREE_OPERAND (expr, 1);
4007 STRIP_NOPS (op0);
4008 STRIP_NOPS (op1);
4009 break;
4010
4011 CASE_CONVERT:
4012 case NEGATE_EXPR:
4013 op0 = TREE_OPERAND (expr, 0);
4014 STRIP_NOPS (op0);
4015 op1 = NULL_TREE;
4016 break;
4017
4018 default:
4019 /* Just an arbitrary value, FIXME. */
4020 return new_cost (target_spill_cost[speed], 0);
4021 }
4022
4023 if (op0 == NULL_TREE
4024 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4025 cost0 = no_cost;
4026 else
4027 cost0 = force_expr_to_var_cost (op0, speed);
4028
4029 if (op1 == NULL_TREE
4030 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4031 cost1 = no_cost;
4032 else
4033 cost1 = force_expr_to_var_cost (op1, speed);
4034
4035 mode = TYPE_MODE (TREE_TYPE (expr));
4036 switch (TREE_CODE (expr))
4037 {
4038 case POINTER_PLUS_EXPR:
4039 case PLUS_EXPR:
4040 case MINUS_EXPR:
4041 case NEGATE_EXPR:
4042 cost = new_cost (add_cost (speed, mode), 0);
4043 if (TREE_CODE (expr) != NEGATE_EXPR)
4044 {
4045 tree mult = NULL_TREE;
4046 comp_cost sa_cost;
4047 if (TREE_CODE (op1) == MULT_EXPR)
4048 mult = op1;
4049 else if (TREE_CODE (op0) == MULT_EXPR)
4050 mult = op0;
4051
4052 if (mult != NULL_TREE
4053 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4054 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4055 speed, &sa_cost))
4056 return sa_cost;
4057 }
4058 break;
4059
4060 CASE_CONVERT:
4061 {
4062 tree inner_mode, outer_mode;
4063 outer_mode = TREE_TYPE (expr);
4064 inner_mode = TREE_TYPE (op0);
4065 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4066 TYPE_MODE (inner_mode), speed), 0);
4067 }
4068 break;
4069
4070 case MULT_EXPR:
4071 if (cst_and_fits_in_hwi (op0))
4072 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4073 mode, speed), 0);
4074 else if (cst_and_fits_in_hwi (op1))
4075 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4076 mode, speed), 0);
4077 else
4078 return new_cost (target_spill_cost [speed], 0);
4079 break;
4080
4081 default:
4082 gcc_unreachable ();
4083 }
4084
4085 cost = add_costs (cost, cost0);
4086 cost = add_costs (cost, cost1);
4087
4088 /* Bound the cost by target_spill_cost. The parts of complicated
4089 computations often are either loop invariant or at least can
4090 be shared between several iv uses, so letting this grow without
4091 limits would not give reasonable results. */
4092 if (cost.cost > (int) target_spill_cost [speed])
4093 cost.cost = target_spill_cost [speed];
4094
4095 return cost;
4096 }
4097
4098 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4099 invariants the computation depends on. */
4100
4101 static comp_cost
4102 force_var_cost (struct ivopts_data *data,
4103 tree expr, bitmap *depends_on)
4104 {
4105 if (depends_on)
4106 {
4107 fd_ivopts_data = data;
4108 walk_tree (&expr, find_depends, depends_on, NULL);
4109 }
4110
4111 return force_expr_to_var_cost (expr, data->speed);
4112 }
4113
4114 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4115 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4116 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4117 invariants the computation depends on. */
4118
4119 static comp_cost
4120 split_address_cost (struct ivopts_data *data,
4121 tree addr, bool *symbol_present, bool *var_present,
4122 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4123 {
4124 tree core;
4125 HOST_WIDE_INT bitsize;
4126 HOST_WIDE_INT bitpos;
4127 tree toffset;
4128 machine_mode mode;
4129 int unsignedp, volatilep;
4130
4131 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4132 &unsignedp, &volatilep, false);
4133
4134 if (toffset != 0
4135 || bitpos % BITS_PER_UNIT != 0
4136 || TREE_CODE (core) != VAR_DECL)
4137 {
4138 *symbol_present = false;
4139 *var_present = true;
4140 fd_ivopts_data = data;
4141 walk_tree (&addr, find_depends, depends_on, NULL);
4142 return new_cost (target_spill_cost[data->speed], 0);
4143 }
4144
4145 *offset += bitpos / BITS_PER_UNIT;
4146 if (TREE_STATIC (core)
4147 || DECL_EXTERNAL (core))
4148 {
4149 *symbol_present = true;
4150 *var_present = false;
4151 return no_cost;
4152 }
4153
4154 *symbol_present = false;
4155 *var_present = true;
4156 return no_cost;
4157 }
4158
4159 /* Estimates cost of expressing difference of addresses E1 - E2 as
4160 var + symbol + offset. The value of offset is added to OFFSET,
4161 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4162 part is missing. DEPENDS_ON is a set of the invariants the computation
4163 depends on. */
4164
4165 static comp_cost
4166 ptr_difference_cost (struct ivopts_data *data,
4167 tree e1, tree e2, bool *symbol_present, bool *var_present,
4168 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4169 {
4170 HOST_WIDE_INT diff = 0;
4171 aff_tree aff_e1, aff_e2;
4172 tree type;
4173
4174 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4175
4176 if (ptr_difference_const (e1, e2, &diff))
4177 {
4178 *offset += diff;
4179 *symbol_present = false;
4180 *var_present = false;
4181 return no_cost;
4182 }
4183
4184 if (integer_zerop (e2))
4185 return split_address_cost (data, TREE_OPERAND (e1, 0),
4186 symbol_present, var_present, offset, depends_on);
4187
4188 *symbol_present = false;
4189 *var_present = true;
4190
4191 type = signed_type_for (TREE_TYPE (e1));
4192 tree_to_aff_combination (e1, type, &aff_e1);
4193 tree_to_aff_combination (e2, type, &aff_e2);
4194 aff_combination_scale (&aff_e2, -1);
4195 aff_combination_add (&aff_e1, &aff_e2);
4196
4197 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4198 }
4199
4200 /* Estimates cost of expressing difference E1 - E2 as
4201 var + symbol + offset. The value of offset is added to OFFSET,
4202 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4203 part is missing. DEPENDS_ON is a set of the invariants the computation
4204 depends on. */
4205
4206 static comp_cost
4207 difference_cost (struct ivopts_data *data,
4208 tree e1, tree e2, bool *symbol_present, bool *var_present,
4209 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4210 {
4211 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4212 unsigned HOST_WIDE_INT off1, off2;
4213 aff_tree aff_e1, aff_e2;
4214 tree type;
4215
4216 e1 = strip_offset (e1, &off1);
4217 e2 = strip_offset (e2, &off2);
4218 *offset += off1 - off2;
4219
4220 STRIP_NOPS (e1);
4221 STRIP_NOPS (e2);
4222
4223 if (TREE_CODE (e1) == ADDR_EXPR)
4224 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4225 offset, depends_on);
4226 *symbol_present = false;
4227
4228 if (operand_equal_p (e1, e2, 0))
4229 {
4230 *var_present = false;
4231 return no_cost;
4232 }
4233
4234 *var_present = true;
4235
4236 if (integer_zerop (e2))
4237 return force_var_cost (data, e1, depends_on);
4238
4239 if (integer_zerop (e1))
4240 {
4241 comp_cost cost = force_var_cost (data, e2, depends_on);
4242 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4243 return cost;
4244 }
4245
4246 type = signed_type_for (TREE_TYPE (e1));
4247 tree_to_aff_combination (e1, type, &aff_e1);
4248 tree_to_aff_combination (e2, type, &aff_e2);
4249 aff_combination_scale (&aff_e2, -1);
4250 aff_combination_add (&aff_e1, &aff_e2);
4251
4252 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4253 }
4254
4255 /* Returns true if AFF1 and AFF2 are identical. */
4256
4257 static bool
4258 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4259 {
4260 unsigned i;
4261
4262 if (aff1->n != aff2->n)
4263 return false;
4264
4265 for (i = 0; i < aff1->n; i++)
4266 {
4267 if (aff1->elts[i].coef != aff2->elts[i].coef)
4268 return false;
4269
4270 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4271 return false;
4272 }
4273 return true;
4274 }
4275
4276 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4277
4278 static int
4279 get_expr_id (struct ivopts_data *data, tree expr)
4280 {
4281 struct iv_inv_expr_ent ent;
4282 struct iv_inv_expr_ent **slot;
4283
4284 ent.expr = expr;
4285 ent.hash = iterative_hash_expr (expr, 0);
4286 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4287 if (*slot)
4288 return (*slot)->id;
4289
4290 *slot = XNEW (struct iv_inv_expr_ent);
4291 (*slot)->expr = expr;
4292 (*slot)->hash = ent.hash;
4293 (*slot)->id = data->inv_expr_id++;
4294 return (*slot)->id;
4295 }
4296
4297 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4298 requires a new compiler generated temporary. Returns -1 otherwise.
4299 ADDRESS_P is a flag indicating if the expression is for address
4300 computation. */
4301
4302 static int
4303 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4304 tree cbase, HOST_WIDE_INT ratio,
4305 bool address_p)
4306 {
4307 aff_tree ubase_aff, cbase_aff;
4308 tree expr, ub, cb;
4309
4310 STRIP_NOPS (ubase);
4311 STRIP_NOPS (cbase);
4312 ub = ubase;
4313 cb = cbase;
4314
4315 if ((TREE_CODE (ubase) == INTEGER_CST)
4316 && (TREE_CODE (cbase) == INTEGER_CST))
4317 return -1;
4318
4319 /* Strips the constant part. */
4320 if (TREE_CODE (ubase) == PLUS_EXPR
4321 || TREE_CODE (ubase) == MINUS_EXPR
4322 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4323 {
4324 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4325 ubase = TREE_OPERAND (ubase, 0);
4326 }
4327
4328 /* Strips the constant part. */
4329 if (TREE_CODE (cbase) == PLUS_EXPR
4330 || TREE_CODE (cbase) == MINUS_EXPR
4331 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4332 {
4333 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4334 cbase = TREE_OPERAND (cbase, 0);
4335 }
4336
4337 if (address_p)
4338 {
4339 if (((TREE_CODE (ubase) == SSA_NAME)
4340 || (TREE_CODE (ubase) == ADDR_EXPR
4341 && is_gimple_min_invariant (ubase)))
4342 && (TREE_CODE (cbase) == INTEGER_CST))
4343 return -1;
4344
4345 if (((TREE_CODE (cbase) == SSA_NAME)
4346 || (TREE_CODE (cbase) == ADDR_EXPR
4347 && is_gimple_min_invariant (cbase)))
4348 && (TREE_CODE (ubase) == INTEGER_CST))
4349 return -1;
4350 }
4351
4352 if (ratio == 1)
4353 {
4354 if (operand_equal_p (ubase, cbase, 0))
4355 return -1;
4356
4357 if (TREE_CODE (ubase) == ADDR_EXPR
4358 && TREE_CODE (cbase) == ADDR_EXPR)
4359 {
4360 tree usym, csym;
4361
4362 usym = TREE_OPERAND (ubase, 0);
4363 csym = TREE_OPERAND (cbase, 0);
4364 if (TREE_CODE (usym) == ARRAY_REF)
4365 {
4366 tree ind = TREE_OPERAND (usym, 1);
4367 if (TREE_CODE (ind) == INTEGER_CST
4368 && tree_fits_shwi_p (ind)
4369 && tree_to_shwi (ind) == 0)
4370 usym = TREE_OPERAND (usym, 0);
4371 }
4372 if (TREE_CODE (csym) == ARRAY_REF)
4373 {
4374 tree ind = TREE_OPERAND (csym, 1);
4375 if (TREE_CODE (ind) == INTEGER_CST
4376 && tree_fits_shwi_p (ind)
4377 && tree_to_shwi (ind) == 0)
4378 csym = TREE_OPERAND (csym, 0);
4379 }
4380 if (operand_equal_p (usym, csym, 0))
4381 return -1;
4382 }
4383 /* Now do more complex comparison */
4384 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4385 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4386 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4387 return -1;
4388 }
4389
4390 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4391 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4392
4393 aff_combination_scale (&cbase_aff, -1 * ratio);
4394 aff_combination_add (&ubase_aff, &cbase_aff);
4395 expr = aff_combination_to_tree (&ubase_aff);
4396 return get_expr_id (data, expr);
4397 }
4398
4399
4400
4401 /* Determines the cost of the computation by that USE is expressed
4402 from induction variable CAND. If ADDRESS_P is true, we just need
4403 to create an address from it, otherwise we want to get it into
4404 register. A set of invariants we depend on is stored in
4405 DEPENDS_ON. AT is the statement at that the value is computed.
4406 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4407 addressing is likely. */
4408
4409 static comp_cost
4410 get_computation_cost_at (struct ivopts_data *data,
4411 struct iv_use *use, struct iv_cand *cand,
4412 bool address_p, bitmap *depends_on, gimple at,
4413 bool *can_autoinc,
4414 int *inv_expr_id)
4415 {
4416 tree ubase = use->iv->base, ustep = use->iv->step;
4417 tree cbase, cstep;
4418 tree utype = TREE_TYPE (ubase), ctype;
4419 unsigned HOST_WIDE_INT cstepi, offset = 0;
4420 HOST_WIDE_INT ratio, aratio;
4421 bool var_present, symbol_present, stmt_is_after_inc;
4422 comp_cost cost;
4423 widest_int rat;
4424 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4425 machine_mode mem_mode = (address_p
4426 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4427 : VOIDmode);
4428
4429 *depends_on = NULL;
4430
4431 /* Only consider real candidates. */
4432 if (!cand->iv)
4433 return infinite_cost;
4434
4435 cbase = cand->iv->base;
4436 cstep = cand->iv->step;
4437 ctype = TREE_TYPE (cbase);
4438
4439 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4440 {
4441 /* We do not have a precision to express the values of use. */
4442 return infinite_cost;
4443 }
4444
4445 if (address_p
4446 || (use->iv->base_object
4447 && cand->iv->base_object
4448 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4449 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4450 {
4451 /* Do not try to express address of an object with computation based
4452 on address of a different object. This may cause problems in rtl
4453 level alias analysis (that does not expect this to be happening,
4454 as this is illegal in C), and would be unlikely to be useful
4455 anyway. */
4456 if (use->iv->base_object
4457 && cand->iv->base_object
4458 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4459 return infinite_cost;
4460 }
4461
4462 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4463 {
4464 /* TODO -- add direct handling of this case. */
4465 goto fallback;
4466 }
4467
4468 /* CSTEPI is removed from the offset in case statement is after the
4469 increment. If the step is not constant, we use zero instead.
4470 This is a bit imprecise (there is the extra addition), but
4471 redundancy elimination is likely to transform the code so that
4472 it uses value of the variable before increment anyway,
4473 so it is not that much unrealistic. */
4474 if (cst_and_fits_in_hwi (cstep))
4475 cstepi = int_cst_value (cstep);
4476 else
4477 cstepi = 0;
4478
4479 if (!constant_multiple_of (ustep, cstep, &rat))
4480 return infinite_cost;
4481
4482 if (wi::fits_shwi_p (rat))
4483 ratio = rat.to_shwi ();
4484 else
4485 return infinite_cost;
4486
4487 STRIP_NOPS (cbase);
4488 ctype = TREE_TYPE (cbase);
4489
4490 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4491
4492 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4493 or ratio == 1, it is better to handle this like
4494
4495 ubase - ratio * cbase + ratio * var
4496
4497 (also holds in the case ratio == -1, TODO. */
4498
4499 if (cst_and_fits_in_hwi (cbase))
4500 {
4501 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4502 cost = difference_cost (data,
4503 ubase, build_int_cst (utype, 0),
4504 &symbol_present, &var_present, &offset,
4505 depends_on);
4506 cost.cost /= avg_loop_niter (data->current_loop);
4507 }
4508 else if (ratio == 1)
4509 {
4510 tree real_cbase = cbase;
4511
4512 /* Check to see if any adjustment is needed. */
4513 if (cstepi == 0 && stmt_is_after_inc)
4514 {
4515 aff_tree real_cbase_aff;
4516 aff_tree cstep_aff;
4517
4518 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4519 &real_cbase_aff);
4520 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4521
4522 aff_combination_add (&real_cbase_aff, &cstep_aff);
4523 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4524 }
4525
4526 cost = difference_cost (data,
4527 ubase, real_cbase,
4528 &symbol_present, &var_present, &offset,
4529 depends_on);
4530 cost.cost /= avg_loop_niter (data->current_loop);
4531 }
4532 else if (address_p
4533 && !POINTER_TYPE_P (ctype)
4534 && multiplier_allowed_in_address_p
4535 (ratio, mem_mode,
4536 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4537 {
4538 cbase
4539 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4540 cost = difference_cost (data,
4541 ubase, cbase,
4542 &symbol_present, &var_present, &offset,
4543 depends_on);
4544 cost.cost /= avg_loop_niter (data->current_loop);
4545 }
4546 else
4547 {
4548 cost = force_var_cost (data, cbase, depends_on);
4549 cost = add_costs (cost,
4550 difference_cost (data,
4551 ubase, build_int_cst (utype, 0),
4552 &symbol_present, &var_present,
4553 &offset, depends_on));
4554 cost.cost /= avg_loop_niter (data->current_loop);
4555 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4556 }
4557
4558 /* Set of invariants depended on by sub use has already been computed
4559 for the first use in the group. */
4560 if (use->sub_id)
4561 {
4562 cost.cost = 0;
4563 if (depends_on && *depends_on)
4564 bitmap_clear (*depends_on);
4565 }
4566 else if (inv_expr_id)
4567 {
4568 *inv_expr_id =
4569 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4570 /* Clear depends on. */
4571 if (*inv_expr_id != -1 && depends_on && *depends_on)
4572 bitmap_clear (*depends_on);
4573 }
4574
4575 /* If we are after the increment, the value of the candidate is higher by
4576 one iteration. */
4577 if (stmt_is_after_inc)
4578 offset -= ratio * cstepi;
4579
4580 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4581 (symbol/var1/const parts may be omitted). If we are looking for an
4582 address, find the cost of addressing this. */
4583 if (address_p)
4584 return add_costs (cost,
4585 get_address_cost (symbol_present, var_present,
4586 offset, ratio, cstepi,
4587 mem_mode,
4588 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4589 speed, stmt_is_after_inc,
4590 can_autoinc));
4591
4592 /* Otherwise estimate the costs for computing the expression. */
4593 if (!symbol_present && !var_present && !offset)
4594 {
4595 if (ratio != 1)
4596 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4597 return cost;
4598 }
4599
4600 /* Symbol + offset should be compile-time computable so consider that they
4601 are added once to the variable, if present. */
4602 if (var_present && (symbol_present || offset))
4603 cost.cost += adjust_setup_cost (data,
4604 add_cost (speed, TYPE_MODE (ctype)));
4605
4606 /* Having offset does not affect runtime cost in case it is added to
4607 symbol, but it increases complexity. */
4608 if (offset)
4609 cost.complexity++;
4610
4611 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4612
4613 aratio = ratio > 0 ? ratio : -ratio;
4614 if (aratio != 1)
4615 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4616 return cost;
4617
4618 fallback:
4619 if (can_autoinc)
4620 *can_autoinc = false;
4621
4622 {
4623 /* Just get the expression, expand it and measure the cost. */
4624 tree comp = get_computation_at (data->current_loop, use, cand, at);
4625
4626 if (!comp)
4627 return infinite_cost;
4628
4629 if (address_p)
4630 comp = build_simple_mem_ref (comp);
4631
4632 return new_cost (computation_cost (comp, speed), 0);
4633 }
4634 }
4635
4636 /* Determines the cost of the computation by that USE is expressed
4637 from induction variable CAND. If ADDRESS_P is true, we just need
4638 to create an address from it, otherwise we want to get it into
4639 register. A set of invariants we depend on is stored in
4640 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4641 autoinc addressing is likely. */
4642
4643 static comp_cost
4644 get_computation_cost (struct ivopts_data *data,
4645 struct iv_use *use, struct iv_cand *cand,
4646 bool address_p, bitmap *depends_on,
4647 bool *can_autoinc, int *inv_expr_id)
4648 {
4649 return get_computation_cost_at (data,
4650 use, cand, address_p, depends_on, use->stmt,
4651 can_autoinc, inv_expr_id);
4652 }
4653
4654 /* Determines cost of basing replacement of USE on CAND in a generic
4655 expression. */
4656
4657 static bool
4658 determine_use_iv_cost_generic (struct ivopts_data *data,
4659 struct iv_use *use, struct iv_cand *cand)
4660 {
4661 bitmap depends_on;
4662 comp_cost cost;
4663 int inv_expr_id = -1;
4664
4665 /* The simple case first -- if we need to express value of the preserved
4666 original biv, the cost is 0. This also prevents us from counting the
4667 cost of increment twice -- once at this use and once in the cost of
4668 the candidate. */
4669 if (cand->pos == IP_ORIGINAL
4670 && cand->incremented_at == use->stmt)
4671 {
4672 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4673 ERROR_MARK, -1);
4674 return true;
4675 }
4676
4677 cost = get_computation_cost (data, use, cand, false, &depends_on,
4678 NULL, &inv_expr_id);
4679
4680 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4681 inv_expr_id);
4682
4683 return !infinite_cost_p (cost);
4684 }
4685
4686 /* Determines cost of basing replacement of USE on CAND in an address. */
4687
4688 static bool
4689 determine_use_iv_cost_address (struct ivopts_data *data,
4690 struct iv_use *use, struct iv_cand *cand)
4691 {
4692 bitmap depends_on;
4693 bool can_autoinc;
4694 int inv_expr_id = -1;
4695 struct iv_use *sub_use;
4696 comp_cost sub_cost;
4697 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4698 &can_autoinc, &inv_expr_id);
4699
4700 if (cand->ainc_use == use)
4701 {
4702 if (can_autoinc)
4703 cost.cost -= cand->cost_step;
4704 /* If we generated the candidate solely for exploiting autoincrement
4705 opportunities, and it turns out it can't be used, set the cost to
4706 infinity to make sure we ignore it. */
4707 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4708 cost = infinite_cost;
4709 }
4710 for (sub_use = use->next;
4711 sub_use && !infinite_cost_p (cost);
4712 sub_use = sub_use->next)
4713 {
4714 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4715 &can_autoinc, &inv_expr_id);
4716 cost = add_costs (cost, sub_cost);
4717 }
4718
4719 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4720 inv_expr_id);
4721
4722 return !infinite_cost_p (cost);
4723 }
4724
4725 /* Computes value of candidate CAND at position AT in iteration NITER, and
4726 stores it to VAL. */
4727
4728 static void
4729 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4730 aff_tree *val)
4731 {
4732 aff_tree step, delta, nit;
4733 struct iv *iv = cand->iv;
4734 tree type = TREE_TYPE (iv->base);
4735 tree steptype = type;
4736 if (POINTER_TYPE_P (type))
4737 steptype = sizetype;
4738 steptype = unsigned_type_for (type);
4739
4740 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4741 aff_combination_convert (&step, steptype);
4742 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4743 aff_combination_convert (&nit, steptype);
4744 aff_combination_mult (&nit, &step, &delta);
4745 if (stmt_after_increment (loop, cand, at))
4746 aff_combination_add (&delta, &step);
4747
4748 tree_to_aff_combination (iv->base, type, val);
4749 if (!POINTER_TYPE_P (type))
4750 aff_combination_convert (val, steptype);
4751 aff_combination_add (val, &delta);
4752 }
4753
4754 /* Returns period of induction variable iv. */
4755
4756 static tree
4757 iv_period (struct iv *iv)
4758 {
4759 tree step = iv->step, period, type;
4760 tree pow2div;
4761
4762 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4763
4764 type = unsigned_type_for (TREE_TYPE (step));
4765 /* Period of the iv is lcm (step, type_range)/step -1,
4766 i.e., N*type_range/step - 1. Since type range is power
4767 of two, N == (step >> num_of_ending_zeros_binary (step),
4768 so the final result is
4769
4770 (type_range >> num_of_ending_zeros_binary (step)) - 1
4771
4772 */
4773 pow2div = num_ending_zeros (step);
4774
4775 period = build_low_bits_mask (type,
4776 (TYPE_PRECISION (type)
4777 - tree_to_uhwi (pow2div)));
4778
4779 return period;
4780 }
4781
4782 /* Returns the comparison operator used when eliminating the iv USE. */
4783
4784 static enum tree_code
4785 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4786 {
4787 struct loop *loop = data->current_loop;
4788 basic_block ex_bb;
4789 edge exit;
4790
4791 ex_bb = gimple_bb (use->stmt);
4792 exit = EDGE_SUCC (ex_bb, 0);
4793 if (flow_bb_inside_loop_p (loop, exit->dest))
4794 exit = EDGE_SUCC (ex_bb, 1);
4795
4796 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4797 }
4798
4799 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4800 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4801 calculation is performed in non-wrapping type.
4802
4803 TODO: More generally, we could test for the situation that
4804 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4805 This would require knowing the sign of OFFSET. */
4806
4807 static bool
4808 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4809 {
4810 enum tree_code code;
4811 tree e1, e2;
4812 aff_tree aff_e1, aff_e2, aff_offset;
4813
4814 if (!nowrap_type_p (TREE_TYPE (base)))
4815 return false;
4816
4817 base = expand_simple_operations (base);
4818
4819 if (TREE_CODE (base) == SSA_NAME)
4820 {
4821 gimple stmt = SSA_NAME_DEF_STMT (base);
4822
4823 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4824 return false;
4825
4826 code = gimple_assign_rhs_code (stmt);
4827 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4828 return false;
4829
4830 e1 = gimple_assign_rhs1 (stmt);
4831 e2 = gimple_assign_rhs2 (stmt);
4832 }
4833 else
4834 {
4835 code = TREE_CODE (base);
4836 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4837 return false;
4838 e1 = TREE_OPERAND (base, 0);
4839 e2 = TREE_OPERAND (base, 1);
4840 }
4841
4842 /* Use affine expansion as deeper inspection to prove the equality. */
4843 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4844 &aff_e2, &data->name_expansion_cache);
4845 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4846 &aff_offset, &data->name_expansion_cache);
4847 aff_combination_scale (&aff_offset, -1);
4848 switch (code)
4849 {
4850 case PLUS_EXPR:
4851 aff_combination_add (&aff_e2, &aff_offset);
4852 if (aff_combination_zero_p (&aff_e2))
4853 return true;
4854
4855 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4856 &aff_e1, &data->name_expansion_cache);
4857 aff_combination_add (&aff_e1, &aff_offset);
4858 return aff_combination_zero_p (&aff_e1);
4859
4860 case POINTER_PLUS_EXPR:
4861 aff_combination_add (&aff_e2, &aff_offset);
4862 return aff_combination_zero_p (&aff_e2);
4863
4864 default:
4865 return false;
4866 }
4867 }
4868
4869 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4870 comparison with CAND. NITER describes the number of iterations of
4871 the loops. If successful, the comparison in COMP_P is altered accordingly.
4872
4873 We aim to handle the following situation:
4874
4875 sometype *base, *p;
4876 int a, b, i;
4877
4878 i = a;
4879 p = p_0 = base + a;
4880
4881 do
4882 {
4883 bla (*p);
4884 p++;
4885 i++;
4886 }
4887 while (i < b);
4888
4889 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4890 We aim to optimize this to
4891
4892 p = p_0 = base + a;
4893 do
4894 {
4895 bla (*p);
4896 p++;
4897 }
4898 while (p < p_0 - a + b);
4899
4900 This preserves the correctness, since the pointer arithmetics does not
4901 overflow. More precisely:
4902
4903 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4904 overflow in computing it or the values of p.
4905 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4906 overflow. To prove this, we use the fact that p_0 = base + a. */
4907
4908 static bool
4909 iv_elimination_compare_lt (struct ivopts_data *data,
4910 struct iv_cand *cand, enum tree_code *comp_p,
4911 struct tree_niter_desc *niter)
4912 {
4913 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4914 struct aff_tree nit, tmpa, tmpb;
4915 enum tree_code comp;
4916 HOST_WIDE_INT step;
4917
4918 /* We need to know that the candidate induction variable does not overflow.
4919 While more complex analysis may be used to prove this, for now just
4920 check that the variable appears in the original program and that it
4921 is computed in a type that guarantees no overflows. */
4922 cand_type = TREE_TYPE (cand->iv->base);
4923 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4924 return false;
4925
4926 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4927 the calculation of the BOUND could overflow, making the comparison
4928 invalid. */
4929 if (!data->loop_single_exit_p)
4930 return false;
4931
4932 /* We need to be able to decide whether candidate is increasing or decreasing
4933 in order to choose the right comparison operator. */
4934 if (!cst_and_fits_in_hwi (cand->iv->step))
4935 return false;
4936 step = int_cst_value (cand->iv->step);
4937
4938 /* Check that the number of iterations matches the expected pattern:
4939 a + 1 > b ? 0 : b - a - 1. */
4940 mbz = niter->may_be_zero;
4941 if (TREE_CODE (mbz) == GT_EXPR)
4942 {
4943 /* Handle a + 1 > b. */
4944 tree op0 = TREE_OPERAND (mbz, 0);
4945 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4946 {
4947 a = TREE_OPERAND (op0, 0);
4948 b = TREE_OPERAND (mbz, 1);
4949 }
4950 else
4951 return false;
4952 }
4953 else if (TREE_CODE (mbz) == LT_EXPR)
4954 {
4955 tree op1 = TREE_OPERAND (mbz, 1);
4956
4957 /* Handle b < a + 1. */
4958 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4959 {
4960 a = TREE_OPERAND (op1, 0);
4961 b = TREE_OPERAND (mbz, 0);
4962 }
4963 else
4964 return false;
4965 }
4966 else
4967 return false;
4968
4969 /* Expected number of iterations is B - A - 1. Check that it matches
4970 the actual number, i.e., that B - A - NITER = 1. */
4971 tree_to_aff_combination (niter->niter, nit_type, &nit);
4972 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4973 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4974 aff_combination_scale (&nit, -1);
4975 aff_combination_scale (&tmpa, -1);
4976 aff_combination_add (&tmpb, &tmpa);
4977 aff_combination_add (&tmpb, &nit);
4978 if (tmpb.n != 0 || tmpb.offset != 1)
4979 return false;
4980
4981 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4982 overflow. */
4983 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4984 cand->iv->step,
4985 fold_convert (TREE_TYPE (cand->iv->step), a));
4986 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4987 return false;
4988
4989 /* Determine the new comparison operator. */
4990 comp = step < 0 ? GT_EXPR : LT_EXPR;
4991 if (*comp_p == NE_EXPR)
4992 *comp_p = comp;
4993 else if (*comp_p == EQ_EXPR)
4994 *comp_p = invert_tree_comparison (comp, false);
4995 else
4996 gcc_unreachable ();
4997
4998 return true;
4999 }
5000
5001 /* Check whether it is possible to express the condition in USE by comparison
5002 of candidate CAND. If so, store the value compared with to BOUND, and the
5003 comparison operator to COMP. */
5004
5005 static bool
5006 may_eliminate_iv (struct ivopts_data *data,
5007 struct iv_use *use, struct iv_cand *cand, tree *bound,
5008 enum tree_code *comp)
5009 {
5010 basic_block ex_bb;
5011 edge exit;
5012 tree period;
5013 struct loop *loop = data->current_loop;
5014 aff_tree bnd;
5015 struct tree_niter_desc *desc = NULL;
5016
5017 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5018 return false;
5019
5020 /* For now works only for exits that dominate the loop latch.
5021 TODO: extend to other conditions inside loop body. */
5022 ex_bb = gimple_bb (use->stmt);
5023 if (use->stmt != last_stmt (ex_bb)
5024 || gimple_code (use->stmt) != GIMPLE_COND
5025 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5026 return false;
5027
5028 exit = EDGE_SUCC (ex_bb, 0);
5029 if (flow_bb_inside_loop_p (loop, exit->dest))
5030 exit = EDGE_SUCC (ex_bb, 1);
5031 if (flow_bb_inside_loop_p (loop, exit->dest))
5032 return false;
5033
5034 desc = niter_for_exit (data, exit);
5035 if (!desc)
5036 return false;
5037
5038 /* Determine whether we can use the variable to test the exit condition.
5039 This is the case iff the period of the induction variable is greater
5040 than the number of iterations for which the exit condition is true. */
5041 period = iv_period (cand->iv);
5042
5043 /* If the number of iterations is constant, compare against it directly. */
5044 if (TREE_CODE (desc->niter) == INTEGER_CST)
5045 {
5046 /* See cand_value_at. */
5047 if (stmt_after_increment (loop, cand, use->stmt))
5048 {
5049 if (!tree_int_cst_lt (desc->niter, period))
5050 return false;
5051 }
5052 else
5053 {
5054 if (tree_int_cst_lt (period, desc->niter))
5055 return false;
5056 }
5057 }
5058
5059 /* If not, and if this is the only possible exit of the loop, see whether
5060 we can get a conservative estimate on the number of iterations of the
5061 entire loop and compare against that instead. */
5062 else
5063 {
5064 widest_int period_value, max_niter;
5065
5066 max_niter = desc->max;
5067 if (stmt_after_increment (loop, cand, use->stmt))
5068 max_niter += 1;
5069 period_value = wi::to_widest (period);
5070 if (wi::gtu_p (max_niter, period_value))
5071 {
5072 /* See if we can take advantage of inferred loop bound information. */
5073 if (data->loop_single_exit_p)
5074 {
5075 if (!max_loop_iterations (loop, &max_niter))
5076 return false;
5077 /* The loop bound is already adjusted by adding 1. */
5078 if (wi::gtu_p (max_niter, period_value))
5079 return false;
5080 }
5081 else
5082 return false;
5083 }
5084 }
5085
5086 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5087
5088 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5089 aff_combination_to_tree (&bnd));
5090 *comp = iv_elimination_compare (data, use);
5091
5092 /* It is unlikely that computing the number of iterations using division
5093 would be more profitable than keeping the original induction variable. */
5094 if (expression_expensive_p (*bound))
5095 return false;
5096
5097 /* Sometimes, it is possible to handle the situation that the number of
5098 iterations may be zero unless additional assumtions by using <
5099 instead of != in the exit condition.
5100
5101 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5102 base the exit condition on it. However, that is often too
5103 expensive. */
5104 if (!integer_zerop (desc->may_be_zero))
5105 return iv_elimination_compare_lt (data, cand, comp, desc);
5106
5107 return true;
5108 }
5109
5110 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5111 be copied, if is is used in the loop body and DATA->body_includes_call. */
5112
5113 static int
5114 parm_decl_cost (struct ivopts_data *data, tree bound)
5115 {
5116 tree sbound = bound;
5117 STRIP_NOPS (sbound);
5118
5119 if (TREE_CODE (sbound) == SSA_NAME
5120 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5121 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5122 && data->body_includes_call)
5123 return COSTS_N_INSNS (1);
5124
5125 return 0;
5126 }
5127
5128 /* Determines cost of basing replacement of USE on CAND in a condition. */
5129
5130 static bool
5131 determine_use_iv_cost_condition (struct ivopts_data *data,
5132 struct iv_use *use, struct iv_cand *cand)
5133 {
5134 tree bound = NULL_TREE;
5135 struct iv *cmp_iv;
5136 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5137 comp_cost elim_cost, express_cost, cost, bound_cost;
5138 bool ok;
5139 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5140 tree *control_var, *bound_cst;
5141 enum tree_code comp = ERROR_MARK;
5142
5143 /* Only consider real candidates. */
5144 if (!cand->iv)
5145 {
5146 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5147 ERROR_MARK, -1);
5148 return false;
5149 }
5150
5151 /* Try iv elimination. */
5152 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5153 {
5154 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5155 if (elim_cost.cost == 0)
5156 elim_cost.cost = parm_decl_cost (data, bound);
5157 else if (TREE_CODE (bound) == INTEGER_CST)
5158 elim_cost.cost = 0;
5159 /* If we replace a loop condition 'i < n' with 'p < base + n',
5160 depends_on_elim will have 'base' and 'n' set, which implies
5161 that both 'base' and 'n' will be live during the loop. More likely,
5162 'base + n' will be loop invariant, resulting in only one live value
5163 during the loop. So in that case we clear depends_on_elim and set
5164 elim_inv_expr_id instead. */
5165 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5166 {
5167 elim_inv_expr_id = get_expr_id (data, bound);
5168 bitmap_clear (depends_on_elim);
5169 }
5170 /* The bound is a loop invariant, so it will be only computed
5171 once. */
5172 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5173 }
5174 else
5175 elim_cost = infinite_cost;
5176
5177 /* Try expressing the original giv. If it is compared with an invariant,
5178 note that we cannot get rid of it. */
5179 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5180 NULL, &cmp_iv);
5181 gcc_assert (ok);
5182
5183 /* When the condition is a comparison of the candidate IV against
5184 zero, prefer this IV.
5185
5186 TODO: The constant that we're subtracting from the cost should
5187 be target-dependent. This information should be added to the
5188 target costs for each backend. */
5189 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5190 && integer_zerop (*bound_cst)
5191 && (operand_equal_p (*control_var, cand->var_after, 0)
5192 || operand_equal_p (*control_var, cand->var_before, 0)))
5193 elim_cost.cost -= 1;
5194
5195 express_cost = get_computation_cost (data, use, cand, false,
5196 &depends_on_express, NULL,
5197 &express_inv_expr_id);
5198 fd_ivopts_data = data;
5199 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5200
5201 /* Count the cost of the original bound as well. */
5202 bound_cost = force_var_cost (data, *bound_cst, NULL);
5203 if (bound_cost.cost == 0)
5204 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5205 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5206 bound_cost.cost = 0;
5207 express_cost.cost += bound_cost.cost;
5208
5209 /* Choose the better approach, preferring the eliminated IV. */
5210 if (compare_costs (elim_cost, express_cost) <= 0)
5211 {
5212 cost = elim_cost;
5213 depends_on = depends_on_elim;
5214 depends_on_elim = NULL;
5215 inv_expr_id = elim_inv_expr_id;
5216 }
5217 else
5218 {
5219 cost = express_cost;
5220 depends_on = depends_on_express;
5221 depends_on_express = NULL;
5222 bound = NULL_TREE;
5223 comp = ERROR_MARK;
5224 inv_expr_id = express_inv_expr_id;
5225 }
5226
5227 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5228
5229 if (depends_on_elim)
5230 BITMAP_FREE (depends_on_elim);
5231 if (depends_on_express)
5232 BITMAP_FREE (depends_on_express);
5233
5234 return !infinite_cost_p (cost);
5235 }
5236
5237 /* Determines cost of basing replacement of USE on CAND. Returns false
5238 if USE cannot be based on CAND. */
5239
5240 static bool
5241 determine_use_iv_cost (struct ivopts_data *data,
5242 struct iv_use *use, struct iv_cand *cand)
5243 {
5244 switch (use->type)
5245 {
5246 case USE_NONLINEAR_EXPR:
5247 return determine_use_iv_cost_generic (data, use, cand);
5248
5249 case USE_ADDRESS:
5250 return determine_use_iv_cost_address (data, use, cand);
5251
5252 case USE_COMPARE:
5253 return determine_use_iv_cost_condition (data, use, cand);
5254
5255 default:
5256 gcc_unreachable ();
5257 }
5258 }
5259
5260 /* Return true if get_computation_cost indicates that autoincrement is
5261 a possibility for the pair of USE and CAND, false otherwise. */
5262
5263 static bool
5264 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5265 struct iv_cand *cand)
5266 {
5267 bitmap depends_on;
5268 bool can_autoinc;
5269 comp_cost cost;
5270
5271 if (use->type != USE_ADDRESS)
5272 return false;
5273
5274 cost = get_computation_cost (data, use, cand, true, &depends_on,
5275 &can_autoinc, NULL);
5276
5277 BITMAP_FREE (depends_on);
5278
5279 return !infinite_cost_p (cost) && can_autoinc;
5280 }
5281
5282 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5283 use that allows autoincrement, and set their AINC_USE if possible. */
5284
5285 static void
5286 set_autoinc_for_original_candidates (struct ivopts_data *data)
5287 {
5288 unsigned i, j;
5289
5290 for (i = 0; i < n_iv_cands (data); i++)
5291 {
5292 struct iv_cand *cand = iv_cand (data, i);
5293 struct iv_use *closest_before = NULL;
5294 struct iv_use *closest_after = NULL;
5295 if (cand->pos != IP_ORIGINAL)
5296 continue;
5297
5298 for (j = 0; j < n_iv_uses (data); j++)
5299 {
5300 struct iv_use *use = iv_use (data, j);
5301 unsigned uid = gimple_uid (use->stmt);
5302
5303 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5304 continue;
5305
5306 if (uid < gimple_uid (cand->incremented_at)
5307 && (closest_before == NULL
5308 || uid > gimple_uid (closest_before->stmt)))
5309 closest_before = use;
5310
5311 if (uid > gimple_uid (cand->incremented_at)
5312 && (closest_after == NULL
5313 || uid < gimple_uid (closest_after->stmt)))
5314 closest_after = use;
5315 }
5316
5317 if (closest_before != NULL
5318 && autoinc_possible_for_pair (data, closest_before, cand))
5319 cand->ainc_use = closest_before;
5320 else if (closest_after != NULL
5321 && autoinc_possible_for_pair (data, closest_after, cand))
5322 cand->ainc_use = closest_after;
5323 }
5324 }
5325
5326 /* Finds the candidates for the induction variables. */
5327
5328 static void
5329 find_iv_candidates (struct ivopts_data *data)
5330 {
5331 /* Add commonly used ivs. */
5332 add_standard_iv_candidates (data);
5333
5334 /* Add old induction variables. */
5335 add_old_ivs_candidates (data);
5336
5337 /* Add induction variables derived from uses. */
5338 add_derived_ivs_candidates (data);
5339
5340 set_autoinc_for_original_candidates (data);
5341
5342 /* Record the important candidates. */
5343 record_important_candidates (data);
5344 }
5345
5346 /* Determines costs of basing the use of the iv on an iv candidate. */
5347
5348 static void
5349 determine_use_iv_costs (struct ivopts_data *data)
5350 {
5351 unsigned i, j;
5352 struct iv_use *use;
5353 struct iv_cand *cand;
5354 bitmap to_clear = BITMAP_ALLOC (NULL);
5355
5356 alloc_use_cost_map (data);
5357
5358 for (i = 0; i < n_iv_uses (data); i++)
5359 {
5360 use = iv_use (data, i);
5361
5362 if (data->consider_all_candidates)
5363 {
5364 for (j = 0; j < n_iv_cands (data); j++)
5365 {
5366 cand = iv_cand (data, j);
5367 determine_use_iv_cost (data, use, cand);
5368 }
5369 }
5370 else
5371 {
5372 bitmap_iterator bi;
5373
5374 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5375 {
5376 cand = iv_cand (data, j);
5377 if (!determine_use_iv_cost (data, use, cand))
5378 bitmap_set_bit (to_clear, j);
5379 }
5380
5381 /* Remove the candidates for that the cost is infinite from
5382 the list of related candidates. */
5383 bitmap_and_compl_into (use->related_cands, to_clear);
5384 bitmap_clear (to_clear);
5385 }
5386 }
5387
5388 BITMAP_FREE (to_clear);
5389
5390 if (dump_file && (dump_flags & TDF_DETAILS))
5391 {
5392 fprintf (dump_file, "Use-candidate costs:\n");
5393
5394 for (i = 0; i < n_iv_uses (data); i++)
5395 {
5396 use = iv_use (data, i);
5397
5398 fprintf (dump_file, "Use %d:\n", i);
5399 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5400 for (j = 0; j < use->n_map_members; j++)
5401 {
5402 if (!use->cost_map[j].cand
5403 || infinite_cost_p (use->cost_map[j].cost))
5404 continue;
5405
5406 fprintf (dump_file, " %d\t%d\t%d\t",
5407 use->cost_map[j].cand->id,
5408 use->cost_map[j].cost.cost,
5409 use->cost_map[j].cost.complexity);
5410 if (use->cost_map[j].depends_on)
5411 bitmap_print (dump_file,
5412 use->cost_map[j].depends_on, "","");
5413 if (use->cost_map[j].inv_expr_id != -1)
5414 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5415 fprintf (dump_file, "\n");
5416 }
5417
5418 fprintf (dump_file, "\n");
5419 }
5420 fprintf (dump_file, "\n");
5421 }
5422 }
5423
5424 /* Determines cost of the candidate CAND. */
5425
5426 static void
5427 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5428 {
5429 comp_cost cost_base;
5430 unsigned cost, cost_step;
5431 tree base;
5432
5433 if (!cand->iv)
5434 {
5435 cand->cost = 0;
5436 return;
5437 }
5438
5439 /* There are two costs associated with the candidate -- its increment
5440 and its initialization. The second is almost negligible for any loop
5441 that rolls enough, so we take it just very little into account. */
5442
5443 base = cand->iv->base;
5444 cost_base = force_var_cost (data, base, NULL);
5445 /* It will be exceptional that the iv register happens to be initialized with
5446 the proper value at no cost. In general, there will at least be a regcopy
5447 or a const set. */
5448 if (cost_base.cost == 0)
5449 cost_base.cost = COSTS_N_INSNS (1);
5450 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5451
5452 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5453
5454 /* Prefer the original ivs unless we may gain something by replacing it.
5455 The reason is to make debugging simpler; so this is not relevant for
5456 artificial ivs created by other optimization passes. */
5457 if (cand->pos != IP_ORIGINAL
5458 || !SSA_NAME_VAR (cand->var_before)
5459 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5460 cost++;
5461
5462 /* Prefer not to insert statements into latch unless there are some
5463 already (so that we do not create unnecessary jumps). */
5464 if (cand->pos == IP_END
5465 && empty_block_p (ip_end_pos (data->current_loop)))
5466 cost++;
5467
5468 cand->cost = cost;
5469 cand->cost_step = cost_step;
5470 }
5471
5472 /* Determines costs of computation of the candidates. */
5473
5474 static void
5475 determine_iv_costs (struct ivopts_data *data)
5476 {
5477 unsigned i;
5478
5479 if (dump_file && (dump_flags & TDF_DETAILS))
5480 {
5481 fprintf (dump_file, "Candidate costs:\n");
5482 fprintf (dump_file, " cand\tcost\n");
5483 }
5484
5485 for (i = 0; i < n_iv_cands (data); i++)
5486 {
5487 struct iv_cand *cand = iv_cand (data, i);
5488
5489 determine_iv_cost (data, cand);
5490
5491 if (dump_file && (dump_flags & TDF_DETAILS))
5492 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5493 }
5494
5495 if (dump_file && (dump_flags & TDF_DETAILS))
5496 fprintf (dump_file, "\n");
5497 }
5498
5499 /* Calculates cost for having SIZE induction variables. */
5500
5501 static unsigned
5502 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5503 {
5504 /* We add size to the cost, so that we prefer eliminating ivs
5505 if possible. */
5506 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5507 data->body_includes_call);
5508 }
5509
5510 /* For each size of the induction variable set determine the penalty. */
5511
5512 static void
5513 determine_set_costs (struct ivopts_data *data)
5514 {
5515 unsigned j, n;
5516 gphi *phi;
5517 gphi_iterator psi;
5518 tree op;
5519 struct loop *loop = data->current_loop;
5520 bitmap_iterator bi;
5521
5522 if (dump_file && (dump_flags & TDF_DETAILS))
5523 {
5524 fprintf (dump_file, "Global costs:\n");
5525 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5526 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5527 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5528 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5529 }
5530
5531 n = 0;
5532 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5533 {
5534 phi = psi.phi ();
5535 op = PHI_RESULT (phi);
5536
5537 if (virtual_operand_p (op))
5538 continue;
5539
5540 if (get_iv (data, op))
5541 continue;
5542
5543 n++;
5544 }
5545
5546 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5547 {
5548 struct version_info *info = ver_info (data, j);
5549
5550 if (info->inv_id && info->has_nonlin_use)
5551 n++;
5552 }
5553
5554 data->regs_used = n;
5555 if (dump_file && (dump_flags & TDF_DETAILS))
5556 fprintf (dump_file, " regs_used %d\n", n);
5557
5558 if (dump_file && (dump_flags & TDF_DETAILS))
5559 {
5560 fprintf (dump_file, " cost for size:\n");
5561 fprintf (dump_file, " ivs\tcost\n");
5562 for (j = 0; j <= 2 * target_avail_regs; j++)
5563 fprintf (dump_file, " %d\t%d\n", j,
5564 ivopts_global_cost_for_size (data, j));
5565 fprintf (dump_file, "\n");
5566 }
5567 }
5568
5569 /* Returns true if A is a cheaper cost pair than B. */
5570
5571 static bool
5572 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5573 {
5574 int cmp;
5575
5576 if (!a)
5577 return false;
5578
5579 if (!b)
5580 return true;
5581
5582 cmp = compare_costs (a->cost, b->cost);
5583 if (cmp < 0)
5584 return true;
5585
5586 if (cmp > 0)
5587 return false;
5588
5589 /* In case the costs are the same, prefer the cheaper candidate. */
5590 if (a->cand->cost < b->cand->cost)
5591 return true;
5592
5593 return false;
5594 }
5595
5596
5597 /* Returns candidate by that USE is expressed in IVS. */
5598
5599 static struct cost_pair *
5600 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5601 {
5602 return ivs->cand_for_use[use->id];
5603 }
5604
5605 /* Computes the cost field of IVS structure. */
5606
5607 static void
5608 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5609 {
5610 comp_cost cost = ivs->cand_use_cost;
5611
5612 cost.cost += ivs->cand_cost;
5613
5614 cost.cost += ivopts_global_cost_for_size (data,
5615 ivs->n_regs + ivs->num_used_inv_expr);
5616
5617 ivs->cost = cost;
5618 }
5619
5620 /* Remove invariants in set INVS to set IVS. */
5621
5622 static void
5623 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5624 {
5625 bitmap_iterator bi;
5626 unsigned iid;
5627
5628 if (!invs)
5629 return;
5630
5631 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5632 {
5633 ivs->n_invariant_uses[iid]--;
5634 if (ivs->n_invariant_uses[iid] == 0)
5635 ivs->n_regs--;
5636 }
5637 }
5638
5639 /* Set USE not to be expressed by any candidate in IVS. */
5640
5641 static void
5642 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5643 struct iv_use *use)
5644 {
5645 unsigned uid = use->id, cid;
5646 struct cost_pair *cp;
5647
5648 cp = ivs->cand_for_use[uid];
5649 if (!cp)
5650 return;
5651 cid = cp->cand->id;
5652
5653 ivs->bad_uses++;
5654 ivs->cand_for_use[uid] = NULL;
5655 ivs->n_cand_uses[cid]--;
5656
5657 if (ivs->n_cand_uses[cid] == 0)
5658 {
5659 bitmap_clear_bit (ivs->cands, cid);
5660 /* Do not count the pseudocandidates. */
5661 if (cp->cand->iv)
5662 ivs->n_regs--;
5663 ivs->n_cands--;
5664 ivs->cand_cost -= cp->cand->cost;
5665
5666 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5667 }
5668
5669 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5670
5671 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5672
5673 if (cp->inv_expr_id != -1)
5674 {
5675 ivs->used_inv_expr[cp->inv_expr_id]--;
5676 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5677 ivs->num_used_inv_expr--;
5678 }
5679 iv_ca_recount_cost (data, ivs);
5680 }
5681
5682 /* Add invariants in set INVS to set IVS. */
5683
5684 static void
5685 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5686 {
5687 bitmap_iterator bi;
5688 unsigned iid;
5689
5690 if (!invs)
5691 return;
5692
5693 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5694 {
5695 ivs->n_invariant_uses[iid]++;
5696 if (ivs->n_invariant_uses[iid] == 1)
5697 ivs->n_regs++;
5698 }
5699 }
5700
5701 /* Set cost pair for USE in set IVS to CP. */
5702
5703 static void
5704 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5705 struct iv_use *use, struct cost_pair *cp)
5706 {
5707 unsigned uid = use->id, cid;
5708
5709 if (ivs->cand_for_use[uid] == cp)
5710 return;
5711
5712 if (ivs->cand_for_use[uid])
5713 iv_ca_set_no_cp (data, ivs, use);
5714
5715 if (cp)
5716 {
5717 cid = cp->cand->id;
5718
5719 ivs->bad_uses--;
5720 ivs->cand_for_use[uid] = cp;
5721 ivs->n_cand_uses[cid]++;
5722 if (ivs->n_cand_uses[cid] == 1)
5723 {
5724 bitmap_set_bit (ivs->cands, cid);
5725 /* Do not count the pseudocandidates. */
5726 if (cp->cand->iv)
5727 ivs->n_regs++;
5728 ivs->n_cands++;
5729 ivs->cand_cost += cp->cand->cost;
5730
5731 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5732 }
5733
5734 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5735 iv_ca_set_add_invariants (ivs, cp->depends_on);
5736
5737 if (cp->inv_expr_id != -1)
5738 {
5739 ivs->used_inv_expr[cp->inv_expr_id]++;
5740 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5741 ivs->num_used_inv_expr++;
5742 }
5743 iv_ca_recount_cost (data, ivs);
5744 }
5745 }
5746
5747 /* Extend set IVS by expressing USE by some of the candidates in it
5748 if possible. Consider all important candidates if candidates in
5749 set IVS don't give any result. */
5750
5751 static void
5752 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5753 struct iv_use *use)
5754 {
5755 struct cost_pair *best_cp = NULL, *cp;
5756 bitmap_iterator bi;
5757 unsigned i;
5758 struct iv_cand *cand;
5759
5760 gcc_assert (ivs->upto >= use->id);
5761 ivs->upto++;
5762 ivs->bad_uses++;
5763
5764 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5765 {
5766 cand = iv_cand (data, i);
5767 cp = get_use_iv_cost (data, use, cand);
5768 if (cheaper_cost_pair (cp, best_cp))
5769 best_cp = cp;
5770 }
5771
5772 if (best_cp == NULL)
5773 {
5774 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5775 {
5776 cand = iv_cand (data, i);
5777 cp = get_use_iv_cost (data, use, cand);
5778 if (cheaper_cost_pair (cp, best_cp))
5779 best_cp = cp;
5780 }
5781 }
5782
5783 iv_ca_set_cp (data, ivs, use, best_cp);
5784 }
5785
5786 /* Get cost for assignment IVS. */
5787
5788 static comp_cost
5789 iv_ca_cost (struct iv_ca *ivs)
5790 {
5791 /* This was a conditional expression but it triggered a bug in
5792 Sun C 5.5. */
5793 if (ivs->bad_uses)
5794 return infinite_cost;
5795 else
5796 return ivs->cost;
5797 }
5798
5799 /* Returns true if all dependences of CP are among invariants in IVS. */
5800
5801 static bool
5802 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5803 {
5804 unsigned i;
5805 bitmap_iterator bi;
5806
5807 if (!cp->depends_on)
5808 return true;
5809
5810 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5811 {
5812 if (ivs->n_invariant_uses[i] == 0)
5813 return false;
5814 }
5815
5816 return true;
5817 }
5818
5819 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5820 it before NEXT_CHANGE. */
5821
5822 static struct iv_ca_delta *
5823 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5824 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5825 {
5826 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5827
5828 change->use = use;
5829 change->old_cp = old_cp;
5830 change->new_cp = new_cp;
5831 change->next_change = next_change;
5832
5833 return change;
5834 }
5835
5836 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5837 are rewritten. */
5838
5839 static struct iv_ca_delta *
5840 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5841 {
5842 struct iv_ca_delta *last;
5843
5844 if (!l2)
5845 return l1;
5846
5847 if (!l1)
5848 return l2;
5849
5850 for (last = l1; last->next_change; last = last->next_change)
5851 continue;
5852 last->next_change = l2;
5853
5854 return l1;
5855 }
5856
5857 /* Reverse the list of changes DELTA, forming the inverse to it. */
5858
5859 static struct iv_ca_delta *
5860 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5861 {
5862 struct iv_ca_delta *act, *next, *prev = NULL;
5863
5864 for (act = delta; act; act = next)
5865 {
5866 next = act->next_change;
5867 act->next_change = prev;
5868 prev = act;
5869
5870 std::swap (act->old_cp, act->new_cp);
5871 }
5872
5873 return prev;
5874 }
5875
5876 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5877 reverted instead. */
5878
5879 static void
5880 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5881 struct iv_ca_delta *delta, bool forward)
5882 {
5883 struct cost_pair *from, *to;
5884 struct iv_ca_delta *act;
5885
5886 if (!forward)
5887 delta = iv_ca_delta_reverse (delta);
5888
5889 for (act = delta; act; act = act->next_change)
5890 {
5891 from = act->old_cp;
5892 to = act->new_cp;
5893 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5894 iv_ca_set_cp (data, ivs, act->use, to);
5895 }
5896
5897 if (!forward)
5898 iv_ca_delta_reverse (delta);
5899 }
5900
5901 /* Returns true if CAND is used in IVS. */
5902
5903 static bool
5904 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5905 {
5906 return ivs->n_cand_uses[cand->id] > 0;
5907 }
5908
5909 /* Returns number of induction variable candidates in the set IVS. */
5910
5911 static unsigned
5912 iv_ca_n_cands (struct iv_ca *ivs)
5913 {
5914 return ivs->n_cands;
5915 }
5916
5917 /* Free the list of changes DELTA. */
5918
5919 static void
5920 iv_ca_delta_free (struct iv_ca_delta **delta)
5921 {
5922 struct iv_ca_delta *act, *next;
5923
5924 for (act = *delta; act; act = next)
5925 {
5926 next = act->next_change;
5927 free (act);
5928 }
5929
5930 *delta = NULL;
5931 }
5932
5933 /* Allocates new iv candidates assignment. */
5934
5935 static struct iv_ca *
5936 iv_ca_new (struct ivopts_data *data)
5937 {
5938 struct iv_ca *nw = XNEW (struct iv_ca);
5939
5940 nw->upto = 0;
5941 nw->bad_uses = 0;
5942 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5943 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5944 nw->cands = BITMAP_ALLOC (NULL);
5945 nw->n_cands = 0;
5946 nw->n_regs = 0;
5947 nw->cand_use_cost = no_cost;
5948 nw->cand_cost = 0;
5949 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5950 nw->cost = no_cost;
5951 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5952 nw->num_used_inv_expr = 0;
5953
5954 return nw;
5955 }
5956
5957 /* Free memory occupied by the set IVS. */
5958
5959 static void
5960 iv_ca_free (struct iv_ca **ivs)
5961 {
5962 free ((*ivs)->cand_for_use);
5963 free ((*ivs)->n_cand_uses);
5964 BITMAP_FREE ((*ivs)->cands);
5965 free ((*ivs)->n_invariant_uses);
5966 free ((*ivs)->used_inv_expr);
5967 free (*ivs);
5968 *ivs = NULL;
5969 }
5970
5971 /* Dumps IVS to FILE. */
5972
5973 static void
5974 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5975 {
5976 const char *pref = " invariants ";
5977 unsigned i;
5978 comp_cost cost = iv_ca_cost (ivs);
5979
5980 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5981 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5982 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5983 bitmap_print (file, ivs->cands, " candidates: ","\n");
5984
5985 for (i = 0; i < ivs->upto; i++)
5986 {
5987 struct iv_use *use = iv_use (data, i);
5988 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5989 if (cp)
5990 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5991 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5992 else
5993 fprintf (file, " use:%d --> ??\n", use->id);
5994 }
5995
5996 for (i = 1; i <= data->max_inv_id; i++)
5997 if (ivs->n_invariant_uses[i])
5998 {
5999 fprintf (file, "%s%d", pref, i);
6000 pref = ", ";
6001 }
6002 fprintf (file, "\n\n");
6003 }
6004
6005 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6006 new set, and store differences in DELTA. Number of induction variables
6007 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6008 the function will try to find a solution with mimimal iv candidates. */
6009
6010 static comp_cost
6011 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6012 struct iv_cand *cand, struct iv_ca_delta **delta,
6013 unsigned *n_ivs, bool min_ncand)
6014 {
6015 unsigned i;
6016 comp_cost cost;
6017 struct iv_use *use;
6018 struct cost_pair *old_cp, *new_cp;
6019
6020 *delta = NULL;
6021 for (i = 0; i < ivs->upto; i++)
6022 {
6023 use = iv_use (data, i);
6024 old_cp = iv_ca_cand_for_use (ivs, use);
6025
6026 if (old_cp
6027 && old_cp->cand == cand)
6028 continue;
6029
6030 new_cp = get_use_iv_cost (data, use, cand);
6031 if (!new_cp)
6032 continue;
6033
6034 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6035 continue;
6036
6037 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6038 continue;
6039
6040 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6041 }
6042
6043 iv_ca_delta_commit (data, ivs, *delta, true);
6044 cost = iv_ca_cost (ivs);
6045 if (n_ivs)
6046 *n_ivs = iv_ca_n_cands (ivs);
6047 iv_ca_delta_commit (data, ivs, *delta, false);
6048
6049 return cost;
6050 }
6051
6052 /* Try narrowing set IVS by removing CAND. Return the cost of
6053 the new set and store the differences in DELTA. START is
6054 the candidate with which we start narrowing. */
6055
6056 static comp_cost
6057 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6058 struct iv_cand *cand, struct iv_cand *start,
6059 struct iv_ca_delta **delta)
6060 {
6061 unsigned i, ci;
6062 struct iv_use *use;
6063 struct cost_pair *old_cp, *new_cp, *cp;
6064 bitmap_iterator bi;
6065 struct iv_cand *cnd;
6066 comp_cost cost, best_cost, acost;
6067
6068 *delta = NULL;
6069 for (i = 0; i < n_iv_uses (data); i++)
6070 {
6071 use = iv_use (data, i);
6072
6073 old_cp = iv_ca_cand_for_use (ivs, use);
6074 if (old_cp->cand != cand)
6075 continue;
6076
6077 best_cost = iv_ca_cost (ivs);
6078 /* Start narrowing with START. */
6079 new_cp = get_use_iv_cost (data, use, start);
6080
6081 if (data->consider_all_candidates)
6082 {
6083 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6084 {
6085 if (ci == cand->id || (start && ci == start->id))
6086 continue;
6087
6088 cnd = iv_cand (data, ci);
6089
6090 cp = get_use_iv_cost (data, use, cnd);
6091 if (!cp)
6092 continue;
6093
6094 iv_ca_set_cp (data, ivs, use, cp);
6095 acost = iv_ca_cost (ivs);
6096
6097 if (compare_costs (acost, best_cost) < 0)
6098 {
6099 best_cost = acost;
6100 new_cp = cp;
6101 }
6102 }
6103 }
6104 else
6105 {
6106 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6107 {
6108 if (ci == cand->id || (start && ci == start->id))
6109 continue;
6110
6111 cnd = iv_cand (data, ci);
6112
6113 cp = get_use_iv_cost (data, use, cnd);
6114 if (!cp)
6115 continue;
6116
6117 iv_ca_set_cp (data, ivs, use, cp);
6118 acost = iv_ca_cost (ivs);
6119
6120 if (compare_costs (acost, best_cost) < 0)
6121 {
6122 best_cost = acost;
6123 new_cp = cp;
6124 }
6125 }
6126 }
6127 /* Restore to old cp for use. */
6128 iv_ca_set_cp (data, ivs, use, old_cp);
6129
6130 if (!new_cp)
6131 {
6132 iv_ca_delta_free (delta);
6133 return infinite_cost;
6134 }
6135
6136 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6137 }
6138
6139 iv_ca_delta_commit (data, ivs, *delta, true);
6140 cost = iv_ca_cost (ivs);
6141 iv_ca_delta_commit (data, ivs, *delta, false);
6142
6143 return cost;
6144 }
6145
6146 /* Try optimizing the set of candidates IVS by removing candidates different
6147 from to EXCEPT_CAND from it. Return cost of the new set, and store
6148 differences in DELTA. */
6149
6150 static comp_cost
6151 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6152 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6153 {
6154 bitmap_iterator bi;
6155 struct iv_ca_delta *act_delta, *best_delta;
6156 unsigned i;
6157 comp_cost best_cost, acost;
6158 struct iv_cand *cand;
6159
6160 best_delta = NULL;
6161 best_cost = iv_ca_cost (ivs);
6162
6163 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6164 {
6165 cand = iv_cand (data, i);
6166
6167 if (cand == except_cand)
6168 continue;
6169
6170 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6171
6172 if (compare_costs (acost, best_cost) < 0)
6173 {
6174 best_cost = acost;
6175 iv_ca_delta_free (&best_delta);
6176 best_delta = act_delta;
6177 }
6178 else
6179 iv_ca_delta_free (&act_delta);
6180 }
6181
6182 if (!best_delta)
6183 {
6184 *delta = NULL;
6185 return best_cost;
6186 }
6187
6188 /* Recurse to possibly remove other unnecessary ivs. */
6189 iv_ca_delta_commit (data, ivs, best_delta, true);
6190 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6191 iv_ca_delta_commit (data, ivs, best_delta, false);
6192 *delta = iv_ca_delta_join (best_delta, *delta);
6193 return best_cost;
6194 }
6195
6196 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6197 cheaper local cost for USE than BEST_CP. Return pointer to
6198 the corresponding cost_pair, otherwise just return BEST_CP. */
6199
6200 static struct cost_pair*
6201 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6202 unsigned int cand_idx, struct iv_cand *old_cand,
6203 struct cost_pair *best_cp)
6204 {
6205 struct iv_cand *cand;
6206 struct cost_pair *cp;
6207
6208 gcc_assert (old_cand != NULL && best_cp != NULL);
6209 if (cand_idx == old_cand->id)
6210 return best_cp;
6211
6212 cand = iv_cand (data, cand_idx);
6213 cp = get_use_iv_cost (data, use, cand);
6214 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6215 return cp;
6216
6217 return best_cp;
6218 }
6219
6220 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6221 which are used by more than one iv uses. For each of those candidates,
6222 this function tries to represent iv uses under that candidate using
6223 other ones with lower local cost, then tries to prune the new set.
6224 If the new set has lower cost, It returns the new cost after recording
6225 candidate replacement in list DELTA. */
6226
6227 static comp_cost
6228 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6229 struct iv_ca_delta **delta)
6230 {
6231 bitmap_iterator bi, bj;
6232 unsigned int i, j, k;
6233 struct iv_use *use;
6234 struct iv_cand *cand;
6235 comp_cost orig_cost, acost;
6236 struct iv_ca_delta *act_delta, *tmp_delta;
6237 struct cost_pair *old_cp, *best_cp = NULL;
6238
6239 *delta = NULL;
6240 orig_cost = iv_ca_cost (ivs);
6241
6242 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6243 {
6244 if (ivs->n_cand_uses[i] == 1
6245 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6246 continue;
6247
6248 cand = iv_cand (data, i);
6249
6250 act_delta = NULL;
6251 /* Represent uses under current candidate using other ones with
6252 lower local cost. */
6253 for (j = 0; j < ivs->upto; j++)
6254 {
6255 use = iv_use (data, j);
6256 old_cp = iv_ca_cand_for_use (ivs, use);
6257
6258 if (old_cp->cand != cand)
6259 continue;
6260
6261 best_cp = old_cp;
6262 if (data->consider_all_candidates)
6263 for (k = 0; k < n_iv_cands (data); k++)
6264 best_cp = cheaper_cost_with_cand (data, use, k,
6265 old_cp->cand, best_cp);
6266 else
6267 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6268 best_cp = cheaper_cost_with_cand (data, use, k,
6269 old_cp->cand, best_cp);
6270
6271 if (best_cp == old_cp)
6272 continue;
6273
6274 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6275 }
6276 /* No need for further prune. */
6277 if (!act_delta)
6278 continue;
6279
6280 /* Prune the new candidate set. */
6281 iv_ca_delta_commit (data, ivs, act_delta, true);
6282 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6283 iv_ca_delta_commit (data, ivs, act_delta, false);
6284 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6285
6286 if (compare_costs (acost, orig_cost) < 0)
6287 {
6288 *delta = act_delta;
6289 return acost;
6290 }
6291 else
6292 iv_ca_delta_free (&act_delta);
6293 }
6294
6295 return orig_cost;
6296 }
6297
6298 /* Tries to extend the sets IVS in the best possible way in order
6299 to express the USE. If ORIGINALP is true, prefer candidates from
6300 the original set of IVs, otherwise favor important candidates not
6301 based on any memory object. */
6302
6303 static bool
6304 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6305 struct iv_use *use, bool originalp)
6306 {
6307 comp_cost best_cost, act_cost;
6308 unsigned i;
6309 bitmap_iterator bi;
6310 struct iv_cand *cand;
6311 struct iv_ca_delta *best_delta = NULL, *act_delta;
6312 struct cost_pair *cp;
6313
6314 iv_ca_add_use (data, ivs, use);
6315 best_cost = iv_ca_cost (ivs);
6316 cp = iv_ca_cand_for_use (ivs, use);
6317 if (cp)
6318 {
6319 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6320 iv_ca_set_no_cp (data, ivs, use);
6321 }
6322
6323 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6324 first try important candidates not based on any memory object. Only if
6325 this fails, try the specific ones. Rationale -- in loops with many
6326 variables the best choice often is to use just one generic biv. If we
6327 added here many ivs specific to the uses, the optimization algorithm later
6328 would be likely to get stuck in a local minimum, thus causing us to create
6329 too many ivs. The approach from few ivs to more seems more likely to be
6330 successful -- starting from few ivs, replacing an expensive use by a
6331 specific iv should always be a win. */
6332 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6333 {
6334 cand = iv_cand (data, i);
6335
6336 if (originalp && cand->pos !=IP_ORIGINAL)
6337 continue;
6338
6339 if (!originalp && cand->iv->base_object != NULL_TREE)
6340 continue;
6341
6342 if (iv_ca_cand_used_p (ivs, cand))
6343 continue;
6344
6345 cp = get_use_iv_cost (data, use, cand);
6346 if (!cp)
6347 continue;
6348
6349 iv_ca_set_cp (data, ivs, use, cp);
6350 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6351 true);
6352 iv_ca_set_no_cp (data, ivs, use);
6353 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6354
6355 if (compare_costs (act_cost, best_cost) < 0)
6356 {
6357 best_cost = act_cost;
6358
6359 iv_ca_delta_free (&best_delta);
6360 best_delta = act_delta;
6361 }
6362 else
6363 iv_ca_delta_free (&act_delta);
6364 }
6365
6366 if (infinite_cost_p (best_cost))
6367 {
6368 for (i = 0; i < use->n_map_members; i++)
6369 {
6370 cp = use->cost_map + i;
6371 cand = cp->cand;
6372 if (!cand)
6373 continue;
6374
6375 /* Already tried this. */
6376 if (cand->important)
6377 {
6378 if (originalp && cand->pos == IP_ORIGINAL)
6379 continue;
6380 if (!originalp && cand->iv->base_object == NULL_TREE)
6381 continue;
6382 }
6383
6384 if (iv_ca_cand_used_p (ivs, cand))
6385 continue;
6386
6387 act_delta = NULL;
6388 iv_ca_set_cp (data, ivs, use, cp);
6389 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6390 iv_ca_set_no_cp (data, ivs, use);
6391 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6392 cp, act_delta);
6393
6394 if (compare_costs (act_cost, best_cost) < 0)
6395 {
6396 best_cost = act_cost;
6397
6398 if (best_delta)
6399 iv_ca_delta_free (&best_delta);
6400 best_delta = act_delta;
6401 }
6402 else
6403 iv_ca_delta_free (&act_delta);
6404 }
6405 }
6406
6407 iv_ca_delta_commit (data, ivs, best_delta, true);
6408 iv_ca_delta_free (&best_delta);
6409
6410 return !infinite_cost_p (best_cost);
6411 }
6412
6413 /* Finds an initial assignment of candidates to uses. */
6414
6415 static struct iv_ca *
6416 get_initial_solution (struct ivopts_data *data, bool originalp)
6417 {
6418 struct iv_ca *ivs = iv_ca_new (data);
6419 unsigned i;
6420
6421 for (i = 0; i < n_iv_uses (data); i++)
6422 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6423 {
6424 iv_ca_free (&ivs);
6425 return NULL;
6426 }
6427
6428 return ivs;
6429 }
6430
6431 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6432 points to a bool variable, this function tries to break local
6433 optimal fixed-point by replacing candidates in IVS if it's true. */
6434
6435 static bool
6436 try_improve_iv_set (struct ivopts_data *data,
6437 struct iv_ca *ivs, bool *try_replace_p)
6438 {
6439 unsigned i, n_ivs;
6440 comp_cost acost, best_cost = iv_ca_cost (ivs);
6441 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6442 struct iv_cand *cand;
6443
6444 /* Try extending the set of induction variables by one. */
6445 for (i = 0; i < n_iv_cands (data); i++)
6446 {
6447 cand = iv_cand (data, i);
6448
6449 if (iv_ca_cand_used_p (ivs, cand))
6450 continue;
6451
6452 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6453 if (!act_delta)
6454 continue;
6455
6456 /* If we successfully added the candidate and the set is small enough,
6457 try optimizing it by removing other candidates. */
6458 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6459 {
6460 iv_ca_delta_commit (data, ivs, act_delta, true);
6461 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6462 iv_ca_delta_commit (data, ivs, act_delta, false);
6463 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6464 }
6465
6466 if (compare_costs (acost, best_cost) < 0)
6467 {
6468 best_cost = acost;
6469 iv_ca_delta_free (&best_delta);
6470 best_delta = act_delta;
6471 }
6472 else
6473 iv_ca_delta_free (&act_delta);
6474 }
6475
6476 if (!best_delta)
6477 {
6478 /* Try removing the candidates from the set instead. */
6479 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6480
6481 if (!best_delta && *try_replace_p)
6482 {
6483 *try_replace_p = false;
6484 /* So far candidate selecting algorithm tends to choose fewer IVs
6485 so that it can handle cases in which loops have many variables
6486 but the best choice is often to use only one general biv. One
6487 weakness is it can't handle opposite cases, in which different
6488 candidates should be chosen with respect to each use. To solve
6489 the problem, we replace candidates in a manner described by the
6490 comments of iv_ca_replace, thus give general algorithm a chance
6491 to break local optimal fixed-point in these cases. */
6492 best_cost = iv_ca_replace (data, ivs, &best_delta);
6493 }
6494
6495 if (!best_delta)
6496 return false;
6497 }
6498
6499 iv_ca_delta_commit (data, ivs, best_delta, true);
6500 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6501 iv_ca_delta_free (&best_delta);
6502 return true;
6503 }
6504
6505 /* Attempts to find the optimal set of induction variables. We do simple
6506 greedy heuristic -- we try to replace at most one candidate in the selected
6507 solution and remove the unused ivs while this improves the cost. */
6508
6509 static struct iv_ca *
6510 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6511 {
6512 struct iv_ca *set;
6513 bool try_replace_p = true;
6514
6515 /* Get the initial solution. */
6516 set = get_initial_solution (data, originalp);
6517 if (!set)
6518 {
6519 if (dump_file && (dump_flags & TDF_DETAILS))
6520 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6521 return NULL;
6522 }
6523
6524 if (dump_file && (dump_flags & TDF_DETAILS))
6525 {
6526 fprintf (dump_file, "Initial set of candidates:\n");
6527 iv_ca_dump (data, dump_file, set);
6528 }
6529
6530 while (try_improve_iv_set (data, set, &try_replace_p))
6531 {
6532 if (dump_file && (dump_flags & TDF_DETAILS))
6533 {
6534 fprintf (dump_file, "Improved to:\n");
6535 iv_ca_dump (data, dump_file, set);
6536 }
6537 }
6538
6539 return set;
6540 }
6541
6542 static struct iv_ca *
6543 find_optimal_iv_set (struct ivopts_data *data)
6544 {
6545 unsigned i;
6546 struct iv_ca *set, *origset;
6547 struct iv_use *use;
6548 comp_cost cost, origcost;
6549
6550 /* Determine the cost based on a strategy that starts with original IVs,
6551 and try again using a strategy that prefers candidates not based
6552 on any IVs. */
6553 origset = find_optimal_iv_set_1 (data, true);
6554 set = find_optimal_iv_set_1 (data, false);
6555
6556 if (!origset && !set)
6557 return NULL;
6558
6559 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6560 cost = set ? iv_ca_cost (set) : infinite_cost;
6561
6562 if (dump_file && (dump_flags & TDF_DETAILS))
6563 {
6564 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6565 origcost.cost, origcost.complexity);
6566 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6567 cost.cost, cost.complexity);
6568 }
6569
6570 /* Choose the one with the best cost. */
6571 if (compare_costs (origcost, cost) <= 0)
6572 {
6573 if (set)
6574 iv_ca_free (&set);
6575 set = origset;
6576 }
6577 else if (origset)
6578 iv_ca_free (&origset);
6579
6580 for (i = 0; i < n_iv_uses (data); i++)
6581 {
6582 use = iv_use (data, i);
6583 use->selected = iv_ca_cand_for_use (set, use)->cand;
6584 }
6585
6586 return set;
6587 }
6588
6589 /* Creates a new induction variable corresponding to CAND. */
6590
6591 static void
6592 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6593 {
6594 gimple_stmt_iterator incr_pos;
6595 tree base;
6596 bool after = false;
6597
6598 if (!cand->iv)
6599 return;
6600
6601 switch (cand->pos)
6602 {
6603 case IP_NORMAL:
6604 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6605 break;
6606
6607 case IP_END:
6608 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6609 after = true;
6610 break;
6611
6612 case IP_AFTER_USE:
6613 after = true;
6614 /* fall through */
6615 case IP_BEFORE_USE:
6616 incr_pos = gsi_for_stmt (cand->incremented_at);
6617 break;
6618
6619 case IP_ORIGINAL:
6620 /* Mark that the iv is preserved. */
6621 name_info (data, cand->var_before)->preserve_biv = true;
6622 name_info (data, cand->var_after)->preserve_biv = true;
6623
6624 /* Rewrite the increment so that it uses var_before directly. */
6625 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6626 return;
6627 }
6628
6629 gimple_add_tmp_var (cand->var_before);
6630
6631 base = unshare_expr (cand->iv->base);
6632
6633 create_iv (base, unshare_expr (cand->iv->step),
6634 cand->var_before, data->current_loop,
6635 &incr_pos, after, &cand->var_before, &cand->var_after);
6636 }
6637
6638 /* Creates new induction variables described in SET. */
6639
6640 static void
6641 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6642 {
6643 unsigned i;
6644 struct iv_cand *cand;
6645 bitmap_iterator bi;
6646
6647 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6648 {
6649 cand = iv_cand (data, i);
6650 create_new_iv (data, cand);
6651 }
6652
6653 if (dump_file && (dump_flags & TDF_DETAILS))
6654 {
6655 fprintf (dump_file, "Selected IV set for loop %d",
6656 data->current_loop->num);
6657 if (data->loop_loc != UNKNOWN_LOCATION)
6658 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6659 LOCATION_LINE (data->loop_loc));
6660 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6661 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6662 {
6663 cand = iv_cand (data, i);
6664 dump_cand (dump_file, cand);
6665 }
6666 fprintf (dump_file, "\n");
6667 }
6668 }
6669
6670 /* Rewrites USE (definition of iv used in a nonlinear expression)
6671 using candidate CAND. */
6672
6673 static void
6674 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6675 struct iv_use *use, struct iv_cand *cand)
6676 {
6677 tree comp;
6678 tree op, tgt;
6679 gassign *ass;
6680 gimple_stmt_iterator bsi;
6681
6682 /* An important special case -- if we are asked to express value of
6683 the original iv by itself, just exit; there is no need to
6684 introduce a new computation (that might also need casting the
6685 variable to unsigned and back). */
6686 if (cand->pos == IP_ORIGINAL
6687 && cand->incremented_at == use->stmt)
6688 {
6689 enum tree_code stmt_code;
6690
6691 gcc_assert (is_gimple_assign (use->stmt));
6692 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6693
6694 /* Check whether we may leave the computation unchanged.
6695 This is the case only if it does not rely on other
6696 computations in the loop -- otherwise, the computation
6697 we rely upon may be removed in remove_unused_ivs,
6698 thus leading to ICE. */
6699 stmt_code = gimple_assign_rhs_code (use->stmt);
6700 if (stmt_code == PLUS_EXPR
6701 || stmt_code == MINUS_EXPR
6702 || stmt_code == POINTER_PLUS_EXPR)
6703 {
6704 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6705 op = gimple_assign_rhs2 (use->stmt);
6706 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6707 op = gimple_assign_rhs1 (use->stmt);
6708 else
6709 op = NULL_TREE;
6710 }
6711 else
6712 op = NULL_TREE;
6713
6714 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6715 return;
6716 }
6717
6718 comp = get_computation (data->current_loop, use, cand);
6719 gcc_assert (comp != NULL_TREE);
6720
6721 switch (gimple_code (use->stmt))
6722 {
6723 case GIMPLE_PHI:
6724 tgt = PHI_RESULT (use->stmt);
6725
6726 /* If we should keep the biv, do not replace it. */
6727 if (name_info (data, tgt)->preserve_biv)
6728 return;
6729
6730 bsi = gsi_after_labels (gimple_bb (use->stmt));
6731 break;
6732
6733 case GIMPLE_ASSIGN:
6734 tgt = gimple_assign_lhs (use->stmt);
6735 bsi = gsi_for_stmt (use->stmt);
6736 break;
6737
6738 default:
6739 gcc_unreachable ();
6740 }
6741
6742 if (!valid_gimple_rhs_p (comp)
6743 || (gimple_code (use->stmt) != GIMPLE_PHI
6744 /* We can't allow re-allocating the stmt as it might be pointed
6745 to still. */
6746 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6747 >= gimple_num_ops (gsi_stmt (bsi)))))
6748 {
6749 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6750 true, GSI_SAME_STMT);
6751 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6752 {
6753 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6754 /* As this isn't a plain copy we have to reset alignment
6755 information. */
6756 if (SSA_NAME_PTR_INFO (comp))
6757 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6758 }
6759 }
6760
6761 if (gimple_code (use->stmt) == GIMPLE_PHI)
6762 {
6763 ass = gimple_build_assign (tgt, comp);
6764 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6765
6766 bsi = gsi_for_stmt (use->stmt);
6767 remove_phi_node (&bsi, false);
6768 }
6769 else
6770 {
6771 gimple_assign_set_rhs_from_tree (&bsi, comp);
6772 use->stmt = gsi_stmt (bsi);
6773 }
6774 }
6775
6776 /* Performs a peephole optimization to reorder the iv update statement with
6777 a mem ref to enable instruction combining in later phases. The mem ref uses
6778 the iv value before the update, so the reordering transformation requires
6779 adjustment of the offset. CAND is the selected IV_CAND.
6780
6781 Example:
6782
6783 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6784 iv2 = iv1 + 1;
6785
6786 if (t < val) (1)
6787 goto L;
6788 goto Head;
6789
6790
6791 directly propagating t over to (1) will introduce overlapping live range
6792 thus increase register pressure. This peephole transform it into:
6793
6794
6795 iv2 = iv1 + 1;
6796 t = MEM_REF (base, iv2, 8, 8);
6797 if (t < val)
6798 goto L;
6799 goto Head;
6800 */
6801
6802 static void
6803 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6804 {
6805 tree var_after;
6806 gimple iv_update, stmt;
6807 basic_block bb;
6808 gimple_stmt_iterator gsi, gsi_iv;
6809
6810 if (cand->pos != IP_NORMAL)
6811 return;
6812
6813 var_after = cand->var_after;
6814 iv_update = SSA_NAME_DEF_STMT (var_after);
6815
6816 bb = gimple_bb (iv_update);
6817 gsi = gsi_last_nondebug_bb (bb);
6818 stmt = gsi_stmt (gsi);
6819
6820 /* Only handle conditional statement for now. */
6821 if (gimple_code (stmt) != GIMPLE_COND)
6822 return;
6823
6824 gsi_prev_nondebug (&gsi);
6825 stmt = gsi_stmt (gsi);
6826 if (stmt != iv_update)
6827 return;
6828
6829 gsi_prev_nondebug (&gsi);
6830 if (gsi_end_p (gsi))
6831 return;
6832
6833 stmt = gsi_stmt (gsi);
6834 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6835 return;
6836
6837 if (stmt != use->stmt)
6838 return;
6839
6840 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6841 return;
6842
6843 if (dump_file && (dump_flags & TDF_DETAILS))
6844 {
6845 fprintf (dump_file, "Reordering \n");
6846 print_gimple_stmt (dump_file, iv_update, 0, 0);
6847 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6848 fprintf (dump_file, "\n");
6849 }
6850
6851 gsi = gsi_for_stmt (use->stmt);
6852 gsi_iv = gsi_for_stmt (iv_update);
6853 gsi_move_before (&gsi_iv, &gsi);
6854
6855 cand->pos = IP_BEFORE_USE;
6856 cand->incremented_at = use->stmt;
6857 }
6858
6859 /* Rewrites USE (address that is an iv) using candidate CAND. */
6860
6861 static void
6862 rewrite_use_address_1 (struct ivopts_data *data,
6863 struct iv_use *use, struct iv_cand *cand)
6864 {
6865 aff_tree aff;
6866 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6867 tree base_hint = NULL_TREE;
6868 tree ref, iv;
6869 bool ok;
6870
6871 adjust_iv_update_pos (cand, use);
6872 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6873 gcc_assert (ok);
6874 unshare_aff_combination (&aff);
6875
6876 /* To avoid undefined overflow problems, all IV candidates use unsigned
6877 integer types. The drawback is that this makes it impossible for
6878 create_mem_ref to distinguish an IV that is based on a memory object
6879 from one that represents simply an offset.
6880
6881 To work around this problem, we pass a hint to create_mem_ref that
6882 indicates which variable (if any) in aff is an IV based on a memory
6883 object. Note that we only consider the candidate. If this is not
6884 based on an object, the base of the reference is in some subexpression
6885 of the use -- but these will use pointer types, so they are recognized
6886 by the create_mem_ref heuristics anyway. */
6887 if (cand->iv->base_object)
6888 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6889
6890 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6891 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6892 reference_alias_ptr_type (*use->op_p),
6893 iv, base_hint, data->speed);
6894 copy_ref_info (ref, *use->op_p);
6895 *use->op_p = ref;
6896 }
6897
6898 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6899 first use of a group, rewrites sub uses in the group too. */
6900
6901 static void
6902 rewrite_use_address (struct ivopts_data *data,
6903 struct iv_use *use, struct iv_cand *cand)
6904 {
6905 struct iv_use *next;
6906
6907 gcc_assert (use->sub_id == 0);
6908 rewrite_use_address_1 (data, use, cand);
6909 update_stmt (use->stmt);
6910
6911 for (next = use->next; next != NULL; next = next->next)
6912 {
6913 rewrite_use_address_1 (data, next, cand);
6914 update_stmt (next->stmt);
6915 }
6916
6917 return;
6918 }
6919
6920 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6921 candidate CAND. */
6922
6923 static void
6924 rewrite_use_compare (struct ivopts_data *data,
6925 struct iv_use *use, struct iv_cand *cand)
6926 {
6927 tree comp, *var_p, op, bound;
6928 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6929 enum tree_code compare;
6930 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6931 bool ok;
6932
6933 bound = cp->value;
6934 if (bound)
6935 {
6936 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6937 tree var_type = TREE_TYPE (var);
6938 gimple_seq stmts;
6939
6940 if (dump_file && (dump_flags & TDF_DETAILS))
6941 {
6942 fprintf (dump_file, "Replacing exit test: ");
6943 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6944 }
6945 compare = cp->comp;
6946 bound = unshare_expr (fold_convert (var_type, bound));
6947 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6948 if (stmts)
6949 gsi_insert_seq_on_edge_immediate (
6950 loop_preheader_edge (data->current_loop),
6951 stmts);
6952
6953 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6954 gimple_cond_set_lhs (cond_stmt, var);
6955 gimple_cond_set_code (cond_stmt, compare);
6956 gimple_cond_set_rhs (cond_stmt, op);
6957 return;
6958 }
6959
6960 /* The induction variable elimination failed; just express the original
6961 giv. */
6962 comp = get_computation (data->current_loop, use, cand);
6963 gcc_assert (comp != NULL_TREE);
6964
6965 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6966 gcc_assert (ok);
6967
6968 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6969 true, GSI_SAME_STMT);
6970 }
6971
6972 /* Rewrites USE using candidate CAND. */
6973
6974 static void
6975 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6976 {
6977 switch (use->type)
6978 {
6979 case USE_NONLINEAR_EXPR:
6980 rewrite_use_nonlinear_expr (data, use, cand);
6981 break;
6982
6983 case USE_ADDRESS:
6984 rewrite_use_address (data, use, cand);
6985 break;
6986
6987 case USE_COMPARE:
6988 rewrite_use_compare (data, use, cand);
6989 break;
6990
6991 default:
6992 gcc_unreachable ();
6993 }
6994
6995 update_stmt (use->stmt);
6996 }
6997
6998 /* Rewrite the uses using the selected induction variables. */
6999
7000 static void
7001 rewrite_uses (struct ivopts_data *data)
7002 {
7003 unsigned i;
7004 struct iv_cand *cand;
7005 struct iv_use *use;
7006
7007 for (i = 0; i < n_iv_uses (data); i++)
7008 {
7009 use = iv_use (data, i);
7010 cand = use->selected;
7011 gcc_assert (cand);
7012
7013 rewrite_use (data, use, cand);
7014 }
7015 }
7016
7017 /* Removes the ivs that are not used after rewriting. */
7018
7019 static void
7020 remove_unused_ivs (struct ivopts_data *data)
7021 {
7022 unsigned j;
7023 bitmap_iterator bi;
7024 bitmap toremove = BITMAP_ALLOC (NULL);
7025
7026 /* Figure out an order in which to release SSA DEFs so that we don't
7027 release something that we'd have to propagate into a debug stmt
7028 afterwards. */
7029 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7030 {
7031 struct version_info *info;
7032
7033 info = ver_info (data, j);
7034 if (info->iv
7035 && !integer_zerop (info->iv->step)
7036 && !info->inv_id
7037 && !info->iv->have_use_for
7038 && !info->preserve_biv)
7039 {
7040 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7041
7042 tree def = info->iv->ssa_name;
7043
7044 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7045 {
7046 imm_use_iterator imm_iter;
7047 use_operand_p use_p;
7048 gimple stmt;
7049 int count = 0;
7050
7051 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7052 {
7053 if (!gimple_debug_bind_p (stmt))
7054 continue;
7055
7056 /* We just want to determine whether to do nothing
7057 (count == 0), to substitute the computed
7058 expression into a single use of the SSA DEF by
7059 itself (count == 1), or to use a debug temp
7060 because the SSA DEF is used multiple times or as
7061 part of a larger expression (count > 1). */
7062 count++;
7063 if (gimple_debug_bind_get_value (stmt) != def)
7064 count++;
7065
7066 if (count > 1)
7067 BREAK_FROM_IMM_USE_STMT (imm_iter);
7068 }
7069
7070 if (!count)
7071 continue;
7072
7073 struct iv_use dummy_use;
7074 struct iv_cand *best_cand = NULL, *cand;
7075 unsigned i, best_pref = 0, cand_pref;
7076
7077 memset (&dummy_use, 0, sizeof (dummy_use));
7078 dummy_use.iv = info->iv;
7079 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7080 {
7081 cand = iv_use (data, i)->selected;
7082 if (cand == best_cand)
7083 continue;
7084 cand_pref = operand_equal_p (cand->iv->step,
7085 info->iv->step, 0)
7086 ? 4 : 0;
7087 cand_pref
7088 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7089 == TYPE_MODE (TREE_TYPE (info->iv->base))
7090 ? 2 : 0;
7091 cand_pref
7092 += TREE_CODE (cand->iv->base) == INTEGER_CST
7093 ? 1 : 0;
7094 if (best_cand == NULL || best_pref < cand_pref)
7095 {
7096 best_cand = cand;
7097 best_pref = cand_pref;
7098 }
7099 }
7100
7101 if (!best_cand)
7102 continue;
7103
7104 tree comp = get_computation_at (data->current_loop,
7105 &dummy_use, best_cand,
7106 SSA_NAME_DEF_STMT (def));
7107 if (!comp)
7108 continue;
7109
7110 if (count > 1)
7111 {
7112 tree vexpr = make_node (DEBUG_EXPR_DECL);
7113 DECL_ARTIFICIAL (vexpr) = 1;
7114 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7115 if (SSA_NAME_VAR (def))
7116 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7117 else
7118 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7119 gdebug *def_temp
7120 = gimple_build_debug_bind (vexpr, comp, NULL);
7121 gimple_stmt_iterator gsi;
7122
7123 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7124 gsi = gsi_after_labels (gimple_bb
7125 (SSA_NAME_DEF_STMT (def)));
7126 else
7127 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7128
7129 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7130 comp = vexpr;
7131 }
7132
7133 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7134 {
7135 if (!gimple_debug_bind_p (stmt))
7136 continue;
7137
7138 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7139 SET_USE (use_p, comp);
7140
7141 update_stmt (stmt);
7142 }
7143 }
7144 }
7145 }
7146
7147 release_defs_bitset (toremove);
7148
7149 BITMAP_FREE (toremove);
7150 }
7151
7152 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7153 for hash_map::traverse. */
7154
7155 bool
7156 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7157 {
7158 free (value);
7159 return true;
7160 }
7161
7162 /* Frees data allocated by the optimization of a single loop. */
7163
7164 static void
7165 free_loop_data (struct ivopts_data *data)
7166 {
7167 unsigned i, j;
7168 bitmap_iterator bi;
7169 tree obj;
7170
7171 if (data->niters)
7172 {
7173 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7174 delete data->niters;
7175 data->niters = NULL;
7176 }
7177
7178 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7179 {
7180 struct version_info *info;
7181
7182 info = ver_info (data, i);
7183 info->iv = NULL;
7184 info->has_nonlin_use = false;
7185 info->preserve_biv = false;
7186 info->inv_id = 0;
7187 }
7188 bitmap_clear (data->relevant);
7189 bitmap_clear (data->important_candidates);
7190
7191 for (i = 0; i < n_iv_uses (data); i++)
7192 {
7193 struct iv_use *use = iv_use (data, i);
7194 struct iv_use *pre = use, *sub = use->next;
7195
7196 while (sub)
7197 {
7198 gcc_assert (sub->related_cands == NULL);
7199 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7200
7201 pre = sub;
7202 sub = sub->next;
7203 free (pre);
7204 }
7205
7206 BITMAP_FREE (use->related_cands);
7207 for (j = 0; j < use->n_map_members; j++)
7208 if (use->cost_map[j].depends_on)
7209 BITMAP_FREE (use->cost_map[j].depends_on);
7210 free (use->cost_map);
7211 free (use);
7212 }
7213 data->iv_uses.truncate (0);
7214
7215 for (i = 0; i < n_iv_cands (data); i++)
7216 {
7217 struct iv_cand *cand = iv_cand (data, i);
7218
7219 if (cand->depends_on)
7220 BITMAP_FREE (cand->depends_on);
7221 free (cand);
7222 }
7223 data->iv_candidates.truncate (0);
7224
7225 if (data->version_info_size < num_ssa_names)
7226 {
7227 data->version_info_size = 2 * num_ssa_names;
7228 free (data->version_info);
7229 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7230 }
7231
7232 data->max_inv_id = 0;
7233
7234 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7235 SET_DECL_RTL (obj, NULL_RTX);
7236
7237 decl_rtl_to_reset.truncate (0);
7238
7239 data->inv_expr_tab->empty ();
7240 data->inv_expr_id = 0;
7241 }
7242
7243 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7244 loop tree. */
7245
7246 static void
7247 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7248 {
7249 free_loop_data (data);
7250 free (data->version_info);
7251 BITMAP_FREE (data->relevant);
7252 BITMAP_FREE (data->important_candidates);
7253
7254 decl_rtl_to_reset.release ();
7255 data->iv_uses.release ();
7256 data->iv_candidates.release ();
7257 delete data->inv_expr_tab;
7258 data->inv_expr_tab = NULL;
7259 free_affine_expand_cache (&data->name_expansion_cache);
7260 obstack_free (&data->iv_obstack, NULL);
7261 }
7262
7263 /* Returns true if the loop body BODY includes any function calls. */
7264
7265 static bool
7266 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7267 {
7268 gimple_stmt_iterator gsi;
7269 unsigned i;
7270
7271 for (i = 0; i < num_nodes; i++)
7272 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7273 {
7274 gimple stmt = gsi_stmt (gsi);
7275 if (is_gimple_call (stmt)
7276 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7277 return true;
7278 }
7279 return false;
7280 }
7281
7282 /* Optimizes the LOOP. Returns true if anything changed. */
7283
7284 static bool
7285 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7286 {
7287 bool changed = false;
7288 struct iv_ca *iv_ca;
7289 edge exit = single_dom_exit (loop);
7290 basic_block *body;
7291
7292 gcc_assert (!data->niters);
7293 data->current_loop = loop;
7294 data->loop_loc = find_loop_location (loop);
7295 data->speed = optimize_loop_for_speed_p (loop);
7296
7297 if (dump_file && (dump_flags & TDF_DETAILS))
7298 {
7299 fprintf (dump_file, "Processing loop %d", loop->num);
7300 if (data->loop_loc != UNKNOWN_LOCATION)
7301 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7302 LOCATION_LINE (data->loop_loc));
7303 fprintf (dump_file, "\n");
7304
7305 if (exit)
7306 {
7307 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7308 exit->src->index, exit->dest->index);
7309 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7310 fprintf (dump_file, "\n");
7311 }
7312
7313 fprintf (dump_file, "\n");
7314 }
7315
7316 body = get_loop_body (loop);
7317 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7318 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7319 free (body);
7320
7321 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7322
7323 /* For each ssa name determines whether it behaves as an induction variable
7324 in some loop. */
7325 if (!find_induction_variables (data))
7326 goto finish;
7327
7328 /* Finds interesting uses (item 1). */
7329 find_interesting_uses (data);
7330 group_address_uses (data);
7331 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7332 goto finish;
7333
7334 /* Finds candidates for the induction variables (item 2). */
7335 find_iv_candidates (data);
7336
7337 /* Calculates the costs (item 3, part 1). */
7338 determine_iv_costs (data);
7339 determine_use_iv_costs (data);
7340 determine_set_costs (data);
7341
7342 /* Find the optimal set of induction variables (item 3, part 2). */
7343 iv_ca = find_optimal_iv_set (data);
7344 if (!iv_ca)
7345 goto finish;
7346 changed = true;
7347
7348 /* Create the new induction variables (item 4, part 1). */
7349 create_new_ivs (data, iv_ca);
7350 iv_ca_free (&iv_ca);
7351
7352 /* Rewrite the uses (item 4, part 2). */
7353 rewrite_uses (data);
7354
7355 /* Remove the ivs that are unused after rewriting. */
7356 remove_unused_ivs (data);
7357
7358 /* We have changed the structure of induction variables; it might happen
7359 that definitions in the scev database refer to some of them that were
7360 eliminated. */
7361 scev_reset ();
7362
7363 finish:
7364 free_loop_data (data);
7365
7366 return changed;
7367 }
7368
7369 /* Main entry point. Optimizes induction variables in loops. */
7370
7371 void
7372 tree_ssa_iv_optimize (void)
7373 {
7374 struct loop *loop;
7375 struct ivopts_data data;
7376
7377 tree_ssa_iv_optimize_init (&data);
7378
7379 /* Optimize the loops starting with the innermost ones. */
7380 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7381 {
7382 if (dump_file && (dump_flags & TDF_DETAILS))
7383 flow_loop_dump (loop, dump_file, NULL, 1);
7384
7385 tree_ssa_iv_optimize_loop (&data, loop);
7386 }
7387
7388 tree_ssa_iv_optimize_finalize (&data);
7389 }