coretypes.h: Include machmode.h...
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "vec.h"
70 #include "input.h"
71 #include "alias.h"
72 #include "symtab.h"
73 #include "inchash.h"
74 #include "tree.h"
75 #include "fold-const.h"
76 #include "stor-layout.h"
77 #include "tm_p.h"
78 #include "predict.h"
79 #include "hard-reg-set.h"
80 #include "function.h"
81 #include "dominance.h"
82 #include "cfg.h"
83 #include "basic-block.h"
84 #include "gimple-pretty-print.h"
85 #include "hash-map.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "tree-eh.h"
90 #include "gimple-expr.h"
91 #include "is-a.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "gimple-iterator.h"
95 #include "gimplify-me.h"
96 #include "gimple-ssa.h"
97 #include "plugin-api.h"
98 #include "ipa-ref.h"
99 #include "cgraph.h"
100 #include "tree-cfg.h"
101 #include "tree-phinodes.h"
102 #include "ssa-iterators.h"
103 #include "stringpool.h"
104 #include "tree-ssanames.h"
105 #include "tree-ssa-loop-ivopts.h"
106 #include "tree-ssa-loop-manip.h"
107 #include "tree-ssa-loop-niter.h"
108 #include "tree-ssa-loop.h"
109 #include "hashtab.h"
110 #include "rtl.h"
111 #include "flags.h"
112 #include "statistics.h"
113 #include "insn-config.h"
114 #include "expmed.h"
115 #include "dojump.h"
116 #include "explow.h"
117 #include "calls.h"
118 #include "emit-rtl.h"
119 #include "varasm.h"
120 #include "stmt.h"
121 #include "expr.h"
122 #include "tree-dfa.h"
123 #include "tree-ssa.h"
124 #include "cfgloop.h"
125 #include "tree-pass.h"
126 #include "tree-chrec.h"
127 #include "tree-scalar-evolution.h"
128 #include "params.h"
129 #include "langhooks.h"
130 #include "tree-affine.h"
131 #include "target.h"
132 #include "tree-inline.h"
133 #include "tree-ssa-propagate.h"
134 #include "tree-ssa-address.h"
135 #include "builtins.h"
136 #include "tree-vectorizer.h"
137
138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
139 cost of different addressing modes. This should be moved to a TBD
140 interface between the GIMPLE and RTL worlds. */
141 #include "recog.h"
142
143 /* The infinite cost. */
144 #define INFTY 10000000
145
146 #define AVG_LOOP_NITER(LOOP) 5
147
148 /* Returns the expected number of loop iterations for LOOP.
149 The average trip count is computed from profile data if it
150 exists. */
151
152 static inline HOST_WIDE_INT
153 avg_loop_niter (struct loop *loop)
154 {
155 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
156 if (niter == -1)
157 return AVG_LOOP_NITER (loop);
158
159 return niter;
160 }
161
162 /* Representation of the induction variable. */
163 struct iv
164 {
165 tree base; /* Initial value of the iv. */
166 tree base_object; /* A memory object to that the induction variable points. */
167 tree step; /* Step of the iv (constant only). */
168 tree ssa_name; /* The ssa name with the value. */
169 unsigned use_id; /* The identifier in the use if it is the case. */
170 bool biv_p; /* Is it a biv? */
171 bool have_use_for; /* Do we already have a use for it? */
172 bool no_overflow; /* True if the iv doesn't overflow. */
173 };
174
175 /* Per-ssa version information (induction variable descriptions, etc.). */
176 struct version_info
177 {
178 tree name; /* The ssa name. */
179 struct iv *iv; /* Induction variable description. */
180 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
181 an expression that is not an induction variable. */
182 bool preserve_biv; /* For the original biv, whether to preserve it. */
183 unsigned inv_id; /* Id of an invariant. */
184 };
185
186 /* Types of uses. */
187 enum use_type
188 {
189 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
190 USE_ADDRESS, /* Use in an address. */
191 USE_COMPARE /* Use is a compare. */
192 };
193
194 /* Cost of a computation. */
195 typedef struct
196 {
197 int cost; /* The runtime cost. */
198 unsigned complexity; /* The estimate of the complexity of the code for
199 the computation (in no concrete units --
200 complexity field should be larger for more
201 complex expressions and addressing modes). */
202 } comp_cost;
203
204 static const comp_cost no_cost = {0, 0};
205 static const comp_cost infinite_cost = {INFTY, INFTY};
206
207 /* The candidate - cost pair. */
208 struct cost_pair
209 {
210 struct iv_cand *cand; /* The candidate. */
211 comp_cost cost; /* The cost. */
212 bitmap depends_on; /* The list of invariants that have to be
213 preserved. */
214 tree value; /* For final value elimination, the expression for
215 the final value of the iv. For iv elimination,
216 the new bound to compare with. */
217 enum tree_code comp; /* For iv elimination, the comparison. */
218 int inv_expr_id; /* Loop invariant expression id. */
219 };
220
221 /* Use. */
222 struct iv_use
223 {
224 unsigned id; /* The id of the use. */
225 unsigned sub_id; /* The id of the sub use. */
226 enum use_type type; /* Type of the use. */
227 struct iv *iv; /* The induction variable it is based on. */
228 gimple stmt; /* Statement in that it occurs. */
229 tree *op_p; /* The place where it occurs. */
230 bitmap related_cands; /* The set of "related" iv candidates, plus the common
231 important ones. */
232
233 unsigned n_map_members; /* Number of candidates in the cost_map list. */
234 struct cost_pair *cost_map;
235 /* The costs wrto the iv candidates. */
236
237 struct iv_cand *selected;
238 /* The selected candidate. */
239
240 struct iv_use *next; /* The next sub use. */
241 tree addr_base; /* Base address with const offset stripped. */
242 unsigned HOST_WIDE_INT addr_offset;
243 /* Const offset stripped from base address. */
244 };
245
246 /* The position where the iv is computed. */
247 enum iv_position
248 {
249 IP_NORMAL, /* At the end, just before the exit condition. */
250 IP_END, /* At the end of the latch block. */
251 IP_BEFORE_USE, /* Immediately before a specific use. */
252 IP_AFTER_USE, /* Immediately after a specific use. */
253 IP_ORIGINAL /* The original biv. */
254 };
255
256 /* The induction variable candidate. */
257 struct iv_cand
258 {
259 unsigned id; /* The number of the candidate. */
260 bool important; /* Whether this is an "important" candidate, i.e. such
261 that it should be considered by all uses. */
262 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
263 gimple incremented_at;/* For original biv, the statement where it is
264 incremented. */
265 tree var_before; /* The variable used for it before increment. */
266 tree var_after; /* The variable used for it after increment. */
267 struct iv *iv; /* The value of the candidate. NULL for
268 "pseudocandidate" used to indicate the possibility
269 to replace the final value of an iv by direct
270 computation of the value. */
271 unsigned cost; /* Cost of the candidate. */
272 unsigned cost_step; /* Cost of the candidate's increment operation. */
273 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
274 where it is incremented. */
275 bitmap depends_on; /* The list of invariants that are used in step of the
276 biv. */
277 };
278
279 /* Loop invariant expression hashtable entry. */
280 struct iv_inv_expr_ent
281 {
282 tree expr;
283 int id;
284 hashval_t hash;
285 };
286
287 /* The data used by the induction variable optimizations. */
288
289 typedef struct iv_use *iv_use_p;
290
291 typedef struct iv_cand *iv_cand_p;
292
293 /* Hashtable helpers. */
294
295 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
296 {
297 typedef iv_inv_expr_ent *value_type;
298 typedef iv_inv_expr_ent *compare_type;
299 static inline hashval_t hash (const iv_inv_expr_ent *);
300 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
301 };
302
303 /* Hash function for loop invariant expressions. */
304
305 inline hashval_t
306 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
307 {
308 return expr->hash;
309 }
310
311 /* Hash table equality function for expressions. */
312
313 inline bool
314 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
315 const iv_inv_expr_ent *expr2)
316 {
317 return expr1->hash == expr2->hash
318 && operand_equal_p (expr1->expr, expr2->expr, 0);
319 }
320
321 struct ivopts_data
322 {
323 /* The currently optimized loop. */
324 struct loop *current_loop;
325 source_location loop_loc;
326
327 /* Numbers of iterations for all exits of the current loop. */
328 hash_map<edge, tree_niter_desc *> *niters;
329
330 /* Number of registers used in it. */
331 unsigned regs_used;
332
333 /* The size of version_info array allocated. */
334 unsigned version_info_size;
335
336 /* The array of information for the ssa names. */
337 struct version_info *version_info;
338
339 /* The hashtable of loop invariant expressions created
340 by ivopt. */
341 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
342
343 /* Loop invariant expression id. */
344 int inv_expr_id;
345
346 /* The bitmap of indices in version_info whose value was changed. */
347 bitmap relevant;
348
349 /* The uses of induction variables. */
350 vec<iv_use_p> iv_uses;
351
352 /* The candidates. */
353 vec<iv_cand_p> iv_candidates;
354
355 /* A bitmap of important candidates. */
356 bitmap important_candidates;
357
358 /* Cache used by tree_to_aff_combination_expand. */
359 hash_map<tree, name_expansion *> *name_expansion_cache;
360
361 /* The maximum invariant id. */
362 unsigned max_inv_id;
363
364 /* Whether to consider just related and important candidates when replacing a
365 use. */
366 bool consider_all_candidates;
367
368 /* Are we optimizing for speed? */
369 bool speed;
370
371 /* Whether the loop body includes any function calls. */
372 bool body_includes_call;
373
374 /* Whether the loop body can only be exited via single exit. */
375 bool loop_single_exit_p;
376 };
377
378 /* An assignment of iv candidates to uses. */
379
380 struct iv_ca
381 {
382 /* The number of uses covered by the assignment. */
383 unsigned upto;
384
385 /* Number of uses that cannot be expressed by the candidates in the set. */
386 unsigned bad_uses;
387
388 /* Candidate assigned to a use, together with the related costs. */
389 struct cost_pair **cand_for_use;
390
391 /* Number of times each candidate is used. */
392 unsigned *n_cand_uses;
393
394 /* The candidates used. */
395 bitmap cands;
396
397 /* The number of candidates in the set. */
398 unsigned n_cands;
399
400 /* Total number of registers needed. */
401 unsigned n_regs;
402
403 /* Total cost of expressing uses. */
404 comp_cost cand_use_cost;
405
406 /* Total cost of candidates. */
407 unsigned cand_cost;
408
409 /* Number of times each invariant is used. */
410 unsigned *n_invariant_uses;
411
412 /* The array holding the number of uses of each loop
413 invariant expressions created by ivopt. */
414 unsigned *used_inv_expr;
415
416 /* The number of created loop invariants. */
417 unsigned num_used_inv_expr;
418
419 /* Total cost of the assignment. */
420 comp_cost cost;
421 };
422
423 /* Difference of two iv candidate assignments. */
424
425 struct iv_ca_delta
426 {
427 /* Changed use. */
428 struct iv_use *use;
429
430 /* An old assignment (for rollback purposes). */
431 struct cost_pair *old_cp;
432
433 /* A new assignment. */
434 struct cost_pair *new_cp;
435
436 /* Next change in the list. */
437 struct iv_ca_delta *next_change;
438 };
439
440 /* Bound on number of candidates below that all candidates are considered. */
441
442 #define CONSIDER_ALL_CANDIDATES_BOUND \
443 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
444
445 /* If there are more iv occurrences, we just give up (it is quite unlikely that
446 optimizing such a loop would help, and it would take ages). */
447
448 #define MAX_CONSIDERED_USES \
449 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
450
451 /* If there are at most this number of ivs in the set, try removing unnecessary
452 ivs from the set always. */
453
454 #define ALWAYS_PRUNE_CAND_SET_BOUND \
455 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
456
457 /* The list of trees for that the decl_rtl field must be reset is stored
458 here. */
459
460 static vec<tree> decl_rtl_to_reset;
461
462 static comp_cost force_expr_to_var_cost (tree, bool);
463
464 /* Number of uses recorded in DATA. */
465
466 static inline unsigned
467 n_iv_uses (struct ivopts_data *data)
468 {
469 return data->iv_uses.length ();
470 }
471
472 /* Ith use recorded in DATA. */
473
474 static inline struct iv_use *
475 iv_use (struct ivopts_data *data, unsigned i)
476 {
477 return data->iv_uses[i];
478 }
479
480 /* Number of candidates recorded in DATA. */
481
482 static inline unsigned
483 n_iv_cands (struct ivopts_data *data)
484 {
485 return data->iv_candidates.length ();
486 }
487
488 /* Ith candidate recorded in DATA. */
489
490 static inline struct iv_cand *
491 iv_cand (struct ivopts_data *data, unsigned i)
492 {
493 return data->iv_candidates[i];
494 }
495
496 /* The single loop exit if it dominates the latch, NULL otherwise. */
497
498 edge
499 single_dom_exit (struct loop *loop)
500 {
501 edge exit = single_exit (loop);
502
503 if (!exit)
504 return NULL;
505
506 if (!just_once_each_iteration_p (loop, exit->src))
507 return NULL;
508
509 return exit;
510 }
511
512 /* Dumps information about the induction variable IV to FILE. */
513
514 void
515 dump_iv (FILE *file, struct iv *iv, bool dump_name)
516 {
517 if (iv->ssa_name && dump_name)
518 {
519 fprintf (file, "ssa name ");
520 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
521 fprintf (file, "\n");
522 }
523
524 fprintf (file, " type ");
525 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
526 fprintf (file, "\n");
527
528 if (iv->step)
529 {
530 fprintf (file, " base ");
531 print_generic_expr (file, iv->base, TDF_SLIM);
532 fprintf (file, "\n");
533
534 fprintf (file, " step ");
535 print_generic_expr (file, iv->step, TDF_SLIM);
536 fprintf (file, "\n");
537 }
538 else
539 {
540 fprintf (file, " invariant ");
541 print_generic_expr (file, iv->base, TDF_SLIM);
542 fprintf (file, "\n");
543 }
544
545 if (iv->base_object)
546 {
547 fprintf (file, " base object ");
548 print_generic_expr (file, iv->base_object, TDF_SLIM);
549 fprintf (file, "\n");
550 }
551
552 if (iv->biv_p)
553 fprintf (file, " is a biv\n");
554 }
555
556 /* Dumps information about the USE to FILE. */
557
558 void
559 dump_use (FILE *file, struct iv_use *use)
560 {
561 fprintf (file, "use %d", use->id);
562 if (use->sub_id)
563 fprintf (file, ".%d", use->sub_id);
564
565 fprintf (file, "\n");
566
567 switch (use->type)
568 {
569 case USE_NONLINEAR_EXPR:
570 fprintf (file, " generic\n");
571 break;
572
573 case USE_ADDRESS:
574 fprintf (file, " address\n");
575 break;
576
577 case USE_COMPARE:
578 fprintf (file, " compare\n");
579 break;
580
581 default:
582 gcc_unreachable ();
583 }
584
585 fprintf (file, " in statement ");
586 print_gimple_stmt (file, use->stmt, 0, 0);
587 fprintf (file, "\n");
588
589 fprintf (file, " at position ");
590 if (use->op_p)
591 print_generic_expr (file, *use->op_p, TDF_SLIM);
592 fprintf (file, "\n");
593
594 dump_iv (file, use->iv, false);
595
596 if (use->related_cands)
597 {
598 fprintf (file, " related candidates ");
599 dump_bitmap (file, use->related_cands);
600 }
601 }
602
603 /* Dumps information about the uses to FILE. */
604
605 void
606 dump_uses (FILE *file, struct ivopts_data *data)
607 {
608 unsigned i;
609 struct iv_use *use;
610
611 for (i = 0; i < n_iv_uses (data); i++)
612 {
613 use = iv_use (data, i);
614 do
615 {
616 dump_use (file, use);
617 use = use->next;
618 }
619 while (use);
620 fprintf (file, "\n");
621 }
622 }
623
624 /* Dumps information about induction variable candidate CAND to FILE. */
625
626 void
627 dump_cand (FILE *file, struct iv_cand *cand)
628 {
629 struct iv *iv = cand->iv;
630
631 fprintf (file, "candidate %d%s\n",
632 cand->id, cand->important ? " (important)" : "");
633
634 if (cand->depends_on)
635 {
636 fprintf (file, " depends on ");
637 dump_bitmap (file, cand->depends_on);
638 }
639
640 if (!iv)
641 {
642 fprintf (file, " final value replacement\n");
643 return;
644 }
645
646 if (cand->var_before)
647 {
648 fprintf (file, " var_before ");
649 print_generic_expr (file, cand->var_before, TDF_SLIM);
650 fprintf (file, "\n");
651 }
652 if (cand->var_after)
653 {
654 fprintf (file, " var_after ");
655 print_generic_expr (file, cand->var_after, TDF_SLIM);
656 fprintf (file, "\n");
657 }
658
659 switch (cand->pos)
660 {
661 case IP_NORMAL:
662 fprintf (file, " incremented before exit test\n");
663 break;
664
665 case IP_BEFORE_USE:
666 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
667 break;
668
669 case IP_AFTER_USE:
670 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
671 break;
672
673 case IP_END:
674 fprintf (file, " incremented at end\n");
675 break;
676
677 case IP_ORIGINAL:
678 fprintf (file, " original biv\n");
679 break;
680 }
681
682 dump_iv (file, iv, false);
683 }
684
685 /* Returns the info for ssa version VER. */
686
687 static inline struct version_info *
688 ver_info (struct ivopts_data *data, unsigned ver)
689 {
690 return data->version_info + ver;
691 }
692
693 /* Returns the info for ssa name NAME. */
694
695 static inline struct version_info *
696 name_info (struct ivopts_data *data, tree name)
697 {
698 return ver_info (data, SSA_NAME_VERSION (name));
699 }
700
701 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
702 emitted in LOOP. */
703
704 static bool
705 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
706 {
707 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
708
709 gcc_assert (bb);
710
711 if (sbb == loop->latch)
712 return true;
713
714 if (sbb != bb)
715 return false;
716
717 return stmt == last_stmt (bb);
718 }
719
720 /* Returns true if STMT if after the place where the original induction
721 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
722 if the positions are identical. */
723
724 static bool
725 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
726 {
727 basic_block cand_bb = gimple_bb (cand->incremented_at);
728 basic_block stmt_bb = gimple_bb (stmt);
729
730 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
731 return false;
732
733 if (stmt_bb != cand_bb)
734 return true;
735
736 if (true_if_equal
737 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
738 return true;
739 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
740 }
741
742 /* Returns true if STMT if after the place where the induction variable
743 CAND is incremented in LOOP. */
744
745 static bool
746 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
747 {
748 switch (cand->pos)
749 {
750 case IP_END:
751 return false;
752
753 case IP_NORMAL:
754 return stmt_after_ip_normal_pos (loop, stmt);
755
756 case IP_ORIGINAL:
757 case IP_AFTER_USE:
758 return stmt_after_inc_pos (cand, stmt, false);
759
760 case IP_BEFORE_USE:
761 return stmt_after_inc_pos (cand, stmt, true);
762
763 default:
764 gcc_unreachable ();
765 }
766 }
767
768 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
769
770 static bool
771 abnormal_ssa_name_p (tree exp)
772 {
773 if (!exp)
774 return false;
775
776 if (TREE_CODE (exp) != SSA_NAME)
777 return false;
778
779 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
780 }
781
782 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
783 abnormal phi node. Callback for for_each_index. */
784
785 static bool
786 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
787 void *data ATTRIBUTE_UNUSED)
788 {
789 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
790 {
791 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
792 return false;
793 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
794 return false;
795 }
796
797 return !abnormal_ssa_name_p (*index);
798 }
799
800 /* Returns true if EXPR contains a ssa name that occurs in an
801 abnormal phi node. */
802
803 bool
804 contains_abnormal_ssa_name_p (tree expr)
805 {
806 enum tree_code code;
807 enum tree_code_class codeclass;
808
809 if (!expr)
810 return false;
811
812 code = TREE_CODE (expr);
813 codeclass = TREE_CODE_CLASS (code);
814
815 if (code == SSA_NAME)
816 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
817
818 if (code == INTEGER_CST
819 || is_gimple_min_invariant (expr))
820 return false;
821
822 if (code == ADDR_EXPR)
823 return !for_each_index (&TREE_OPERAND (expr, 0),
824 idx_contains_abnormal_ssa_name_p,
825 NULL);
826
827 if (code == COND_EXPR)
828 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
829 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
830 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
831
832 switch (codeclass)
833 {
834 case tcc_binary:
835 case tcc_comparison:
836 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
837 return true;
838
839 /* Fallthru. */
840 case tcc_unary:
841 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
842 return true;
843
844 break;
845
846 default:
847 gcc_unreachable ();
848 }
849
850 return false;
851 }
852
853 /* Returns the structure describing number of iterations determined from
854 EXIT of DATA->current_loop, or NULL if something goes wrong. */
855
856 static struct tree_niter_desc *
857 niter_for_exit (struct ivopts_data *data, edge exit)
858 {
859 struct tree_niter_desc *desc;
860 tree_niter_desc **slot;
861
862 if (!data->niters)
863 {
864 data->niters = new hash_map<edge, tree_niter_desc *>;
865 slot = NULL;
866 }
867 else
868 slot = data->niters->get (exit);
869
870 if (!slot)
871 {
872 /* Try to determine number of iterations. We cannot safely work with ssa
873 names that appear in phi nodes on abnormal edges, so that we do not
874 create overlapping life ranges for them (PR 27283). */
875 desc = XNEW (struct tree_niter_desc);
876 if (!number_of_iterations_exit (data->current_loop,
877 exit, desc, true)
878 || contains_abnormal_ssa_name_p (desc->niter))
879 {
880 XDELETE (desc);
881 desc = NULL;
882 }
883 data->niters->put (exit, desc);
884 }
885 else
886 desc = *slot;
887
888 return desc;
889 }
890
891 /* Returns the structure describing number of iterations determined from
892 single dominating exit of DATA->current_loop, or NULL if something
893 goes wrong. */
894
895 static struct tree_niter_desc *
896 niter_for_single_dom_exit (struct ivopts_data *data)
897 {
898 edge exit = single_dom_exit (data->current_loop);
899
900 if (!exit)
901 return NULL;
902
903 return niter_for_exit (data, exit);
904 }
905
906 /* Initializes data structures used by the iv optimization pass, stored
907 in DATA. */
908
909 static void
910 tree_ssa_iv_optimize_init (struct ivopts_data *data)
911 {
912 data->version_info_size = 2 * num_ssa_names;
913 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
914 data->relevant = BITMAP_ALLOC (NULL);
915 data->important_candidates = BITMAP_ALLOC (NULL);
916 data->max_inv_id = 0;
917 data->niters = NULL;
918 data->iv_uses.create (20);
919 data->iv_candidates.create (20);
920 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
921 data->inv_expr_id = 0;
922 data->name_expansion_cache = NULL;
923 decl_rtl_to_reset.create (20);
924 }
925
926 /* Returns a memory object to that EXPR points. In case we are able to
927 determine that it does not point to any such object, NULL is returned. */
928
929 static tree
930 determine_base_object (tree expr)
931 {
932 enum tree_code code = TREE_CODE (expr);
933 tree base, obj;
934
935 /* If this is a pointer casted to any type, we need to determine
936 the base object for the pointer; so handle conversions before
937 throwing away non-pointer expressions. */
938 if (CONVERT_EXPR_P (expr))
939 return determine_base_object (TREE_OPERAND (expr, 0));
940
941 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
942 return NULL_TREE;
943
944 switch (code)
945 {
946 case INTEGER_CST:
947 return NULL_TREE;
948
949 case ADDR_EXPR:
950 obj = TREE_OPERAND (expr, 0);
951 base = get_base_address (obj);
952
953 if (!base)
954 return expr;
955
956 if (TREE_CODE (base) == MEM_REF)
957 return determine_base_object (TREE_OPERAND (base, 0));
958
959 return fold_convert (ptr_type_node,
960 build_fold_addr_expr (base));
961
962 case POINTER_PLUS_EXPR:
963 return determine_base_object (TREE_OPERAND (expr, 0));
964
965 case PLUS_EXPR:
966 case MINUS_EXPR:
967 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
968 gcc_unreachable ();
969
970 default:
971 return fold_convert (ptr_type_node, expr);
972 }
973 }
974
975 /* Return true if address expression with non-DECL_P operand appears
976 in EXPR. */
977
978 static bool
979 contain_complex_addr_expr (tree expr)
980 {
981 bool res = false;
982
983 STRIP_NOPS (expr);
984 switch (TREE_CODE (expr))
985 {
986 case POINTER_PLUS_EXPR:
987 case PLUS_EXPR:
988 case MINUS_EXPR:
989 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
990 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
991 break;
992
993 case ADDR_EXPR:
994 return (!DECL_P (TREE_OPERAND (expr, 0)));
995
996 default:
997 return false;
998 }
999
1000 return res;
1001 }
1002
1003 /* Allocates an induction variable with given initial value BASE and step STEP
1004 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1005
1006 static struct iv *
1007 alloc_iv (tree base, tree step, bool no_overflow = false)
1008 {
1009 tree expr = base;
1010 struct iv *iv = XCNEW (struct iv);
1011 gcc_assert (step != NULL_TREE);
1012
1013 /* Lower address expression in base except ones with DECL_P as operand.
1014 By doing this:
1015 1) More accurate cost can be computed for address expressions;
1016 2) Duplicate candidates won't be created for bases in different
1017 forms, like &a[0] and &a. */
1018 STRIP_NOPS (expr);
1019 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1020 || contain_complex_addr_expr (expr))
1021 {
1022 aff_tree comb;
1023 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1024 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1025 }
1026
1027 iv->base = base;
1028 iv->base_object = determine_base_object (base);
1029 iv->step = step;
1030 iv->biv_p = false;
1031 iv->have_use_for = false;
1032 iv->use_id = 0;
1033 iv->ssa_name = NULL_TREE;
1034 iv->no_overflow = no_overflow;
1035
1036 return iv;
1037 }
1038
1039 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1040 doesn't overflow. */
1041
1042 static void
1043 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1044 bool no_overflow)
1045 {
1046 struct version_info *info = name_info (data, iv);
1047
1048 gcc_assert (!info->iv);
1049
1050 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1051 info->iv = alloc_iv (base, step, no_overflow);
1052 info->iv->ssa_name = iv;
1053 }
1054
1055 /* Finds induction variable declaration for VAR. */
1056
1057 static struct iv *
1058 get_iv (struct ivopts_data *data, tree var)
1059 {
1060 basic_block bb;
1061 tree type = TREE_TYPE (var);
1062
1063 if (!POINTER_TYPE_P (type)
1064 && !INTEGRAL_TYPE_P (type))
1065 return NULL;
1066
1067 if (!name_info (data, var)->iv)
1068 {
1069 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1070
1071 if (!bb
1072 || !flow_bb_inside_loop_p (data->current_loop, bb))
1073 set_iv (data, var, var, build_int_cst (type, 0), true);
1074 }
1075
1076 return name_info (data, var)->iv;
1077 }
1078
1079 /* Return the first non-invariant ssa var found in EXPR. */
1080
1081 static tree
1082 extract_single_var_from_expr (tree expr)
1083 {
1084 int i, n;
1085 tree tmp;
1086 enum tree_code code;
1087
1088 if (!expr || is_gimple_min_invariant (expr))
1089 return NULL;
1090
1091 code = TREE_CODE (expr);
1092 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1093 {
1094 n = TREE_OPERAND_LENGTH (expr);
1095 for (i = 0; i < n; i++)
1096 {
1097 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1098
1099 if (tmp)
1100 return tmp;
1101 }
1102 }
1103 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1104 }
1105
1106 /* Finds basic ivs. */
1107
1108 static bool
1109 find_bivs (struct ivopts_data *data)
1110 {
1111 gphi *phi;
1112 affine_iv iv;
1113 tree step, type, base, stop;
1114 bool found = false;
1115 struct loop *loop = data->current_loop;
1116 gphi_iterator psi;
1117
1118 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1119 {
1120 phi = psi.phi ();
1121
1122 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1123 continue;
1124
1125 if (virtual_operand_p (PHI_RESULT (phi)))
1126 continue;
1127
1128 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1129 continue;
1130
1131 if (integer_zerop (iv.step))
1132 continue;
1133
1134 step = iv.step;
1135 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1136 /* Stop expanding iv base at the first ssa var referred by iv step.
1137 Ideally we should stop at any ssa var, because that's expensive
1138 and unusual to happen, we just do it on the first one.
1139
1140 See PR64705 for the rationale. */
1141 stop = extract_single_var_from_expr (step);
1142 base = expand_simple_operations (base, stop);
1143 if (contains_abnormal_ssa_name_p (base)
1144 || contains_abnormal_ssa_name_p (step))
1145 continue;
1146
1147 type = TREE_TYPE (PHI_RESULT (phi));
1148 base = fold_convert (type, base);
1149 if (step)
1150 {
1151 if (POINTER_TYPE_P (type))
1152 step = convert_to_ptrofftype (step);
1153 else
1154 step = fold_convert (type, step);
1155 }
1156
1157 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1158 found = true;
1159 }
1160
1161 return found;
1162 }
1163
1164 /* Marks basic ivs. */
1165
1166 static void
1167 mark_bivs (struct ivopts_data *data)
1168 {
1169 gphi *phi;
1170 gimple def;
1171 tree var;
1172 struct iv *iv, *incr_iv;
1173 struct loop *loop = data->current_loop;
1174 basic_block incr_bb;
1175 gphi_iterator psi;
1176
1177 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1178 {
1179 phi = psi.phi ();
1180
1181 iv = get_iv (data, PHI_RESULT (phi));
1182 if (!iv)
1183 continue;
1184
1185 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1186 def = SSA_NAME_DEF_STMT (var);
1187 /* Don't mark iv peeled from other one as biv. */
1188 if (def
1189 && gimple_code (def) == GIMPLE_PHI
1190 && gimple_bb (def) == loop->header)
1191 continue;
1192
1193 incr_iv = get_iv (data, var);
1194 if (!incr_iv)
1195 continue;
1196
1197 /* If the increment is in the subloop, ignore it. */
1198 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1199 if (incr_bb->loop_father != data->current_loop
1200 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1201 continue;
1202
1203 iv->biv_p = true;
1204 incr_iv->biv_p = true;
1205 }
1206 }
1207
1208 /* Checks whether STMT defines a linear induction variable and stores its
1209 parameters to IV. */
1210
1211 static bool
1212 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1213 {
1214 tree lhs, stop;
1215 struct loop *loop = data->current_loop;
1216
1217 iv->base = NULL_TREE;
1218 iv->step = NULL_TREE;
1219
1220 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1221 return false;
1222
1223 lhs = gimple_assign_lhs (stmt);
1224 if (TREE_CODE (lhs) != SSA_NAME)
1225 return false;
1226
1227 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1228 return false;
1229
1230 /* Stop expanding iv base at the first ssa var referred by iv step.
1231 Ideally we should stop at any ssa var, because that's expensive
1232 and unusual to happen, we just do it on the first one.
1233
1234 See PR64705 for the rationale. */
1235 stop = extract_single_var_from_expr (iv->step);
1236 iv->base = expand_simple_operations (iv->base, stop);
1237 if (contains_abnormal_ssa_name_p (iv->base)
1238 || contains_abnormal_ssa_name_p (iv->step))
1239 return false;
1240
1241 /* If STMT could throw, then do not consider STMT as defining a GIV.
1242 While this will suppress optimizations, we can not safely delete this
1243 GIV and associated statements, even if it appears it is not used. */
1244 if (stmt_could_throw_p (stmt))
1245 return false;
1246
1247 return true;
1248 }
1249
1250 /* Finds general ivs in statement STMT. */
1251
1252 static void
1253 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1254 {
1255 affine_iv iv;
1256
1257 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1258 return;
1259
1260 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1261 }
1262
1263 /* Finds general ivs in basic block BB. */
1264
1265 static void
1266 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1267 {
1268 gimple_stmt_iterator bsi;
1269
1270 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1271 find_givs_in_stmt (data, gsi_stmt (bsi));
1272 }
1273
1274 /* Finds general ivs. */
1275
1276 static void
1277 find_givs (struct ivopts_data *data)
1278 {
1279 struct loop *loop = data->current_loop;
1280 basic_block *body = get_loop_body_in_dom_order (loop);
1281 unsigned i;
1282
1283 for (i = 0; i < loop->num_nodes; i++)
1284 find_givs_in_bb (data, body[i]);
1285 free (body);
1286 }
1287
1288 /* For each ssa name defined in LOOP determines whether it is an induction
1289 variable and if so, its initial value and step. */
1290
1291 static bool
1292 find_induction_variables (struct ivopts_data *data)
1293 {
1294 unsigned i;
1295 bitmap_iterator bi;
1296
1297 if (!find_bivs (data))
1298 return false;
1299
1300 find_givs (data);
1301 mark_bivs (data);
1302
1303 if (dump_file && (dump_flags & TDF_DETAILS))
1304 {
1305 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1306
1307 if (niter)
1308 {
1309 fprintf (dump_file, " number of iterations ");
1310 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1311 if (!integer_zerop (niter->may_be_zero))
1312 {
1313 fprintf (dump_file, "; zero if ");
1314 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1315 }
1316 fprintf (dump_file, "\n\n");
1317 };
1318
1319 fprintf (dump_file, "Induction variables:\n\n");
1320
1321 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322 {
1323 if (ver_info (data, i)->iv)
1324 dump_iv (dump_file, ver_info (data, i)->iv, true);
1325 }
1326 }
1327
1328 return true;
1329 }
1330
1331 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1332 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1333 is the const offset stripped from IV base. For uses of other types,
1334 ADDR_BASE and ADDR_OFFSET are zero by default. */
1335
1336 static struct iv_use *
1337 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1338 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1339 unsigned HOST_WIDE_INT addr_offset = 0)
1340 {
1341 struct iv_use *use = XCNEW (struct iv_use);
1342
1343 use->id = n_iv_uses (data);
1344 use->sub_id = 0;
1345 use->type = use_type;
1346 use->iv = iv;
1347 use->stmt = stmt;
1348 use->op_p = use_p;
1349 use->related_cands = BITMAP_ALLOC (NULL);
1350 use->next = NULL;
1351 use->addr_base = addr_base;
1352 use->addr_offset = addr_offset;
1353
1354 data->iv_uses.safe_push (use);
1355
1356 return use;
1357 }
1358
1359 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 The sub use is recorded under the one whose use id is ID_GROUP. */
1361
1362 static struct iv_use *
1363 record_sub_use (struct ivopts_data *data, tree *use_p,
1364 struct iv *iv, gimple stmt, enum use_type use_type,
1365 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1366 unsigned int id_group)
1367 {
1368 struct iv_use *use = XCNEW (struct iv_use);
1369 struct iv_use *group = iv_use (data, id_group);
1370
1371 use->id = group->id;
1372 use->sub_id = 0;
1373 use->type = use_type;
1374 use->iv = iv;
1375 use->stmt = stmt;
1376 use->op_p = use_p;
1377 use->related_cands = NULL;
1378 use->addr_base = addr_base;
1379 use->addr_offset = addr_offset;
1380
1381 /* Sub use list is maintained in offset ascending order. */
1382 if (addr_offset <= group->addr_offset)
1383 {
1384 use->related_cands = group->related_cands;
1385 group->related_cands = NULL;
1386 use->next = group;
1387 data->iv_uses[id_group] = use;
1388 }
1389 else
1390 {
1391 struct iv_use *pre;
1392 do
1393 {
1394 pre = group;
1395 group = group->next;
1396 }
1397 while (group && addr_offset > group->addr_offset);
1398 use->next = pre->next;
1399 pre->next = use;
1400 }
1401
1402 /* To avoid showing ssa name in the dumps, if it was not reset by the
1403 caller. */
1404 iv->ssa_name = NULL_TREE;
1405
1406 return use;
1407 }
1408
1409 /* Checks whether OP is a loop-level invariant and if so, records it.
1410 NONLINEAR_USE is true if the invariant is used in a way we do not
1411 handle specially. */
1412
1413 static void
1414 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1415 {
1416 basic_block bb;
1417 struct version_info *info;
1418
1419 if (TREE_CODE (op) != SSA_NAME
1420 || virtual_operand_p (op))
1421 return;
1422
1423 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1424 if (bb
1425 && flow_bb_inside_loop_p (data->current_loop, bb))
1426 return;
1427
1428 info = name_info (data, op);
1429 info->name = op;
1430 info->has_nonlin_use |= nonlinear_use;
1431 if (!info->inv_id)
1432 info->inv_id = ++data->max_inv_id;
1433 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1434 }
1435
1436 /* Checks whether the use OP is interesting and if so, records it. */
1437
1438 static struct iv_use *
1439 find_interesting_uses_op (struct ivopts_data *data, tree op)
1440 {
1441 struct iv *iv;
1442 struct iv *civ;
1443 gimple stmt;
1444 struct iv_use *use;
1445
1446 if (TREE_CODE (op) != SSA_NAME)
1447 return NULL;
1448
1449 iv = get_iv (data, op);
1450 if (!iv)
1451 return NULL;
1452
1453 if (iv->have_use_for)
1454 {
1455 use = iv_use (data, iv->use_id);
1456
1457 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1458 return use;
1459 }
1460
1461 if (integer_zerop (iv->step))
1462 {
1463 record_invariant (data, op, true);
1464 return NULL;
1465 }
1466 iv->have_use_for = true;
1467
1468 civ = XNEW (struct iv);
1469 *civ = *iv;
1470
1471 stmt = SSA_NAME_DEF_STMT (op);
1472 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1473 || is_gimple_assign (stmt));
1474
1475 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1476 iv->use_id = use->id;
1477
1478 return use;
1479 }
1480
1481 /* Given a condition in statement STMT, checks whether it is a compare
1482 of an induction variable and an invariant. If this is the case,
1483 CONTROL_VAR is set to location of the iv, BOUND to the location of
1484 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1485 induction variable descriptions, and true is returned. If this is not
1486 the case, CONTROL_VAR and BOUND are set to the arguments of the
1487 condition and false is returned. */
1488
1489 static bool
1490 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1491 tree **control_var, tree **bound,
1492 struct iv **iv_var, struct iv **iv_bound)
1493 {
1494 /* The objects returned when COND has constant operands. */
1495 static struct iv const_iv;
1496 static tree zero;
1497 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1498 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1499 bool ret = false;
1500
1501 if (gimple_code (stmt) == GIMPLE_COND)
1502 {
1503 gcond *cond_stmt = as_a <gcond *> (stmt);
1504 op0 = gimple_cond_lhs_ptr (cond_stmt);
1505 op1 = gimple_cond_rhs_ptr (cond_stmt);
1506 }
1507 else
1508 {
1509 op0 = gimple_assign_rhs1_ptr (stmt);
1510 op1 = gimple_assign_rhs2_ptr (stmt);
1511 }
1512
1513 zero = integer_zero_node;
1514 const_iv.step = integer_zero_node;
1515
1516 if (TREE_CODE (*op0) == SSA_NAME)
1517 iv0 = get_iv (data, *op0);
1518 if (TREE_CODE (*op1) == SSA_NAME)
1519 iv1 = get_iv (data, *op1);
1520
1521 /* Exactly one of the compared values must be an iv, and the other one must
1522 be an invariant. */
1523 if (!iv0 || !iv1)
1524 goto end;
1525
1526 if (integer_zerop (iv0->step))
1527 {
1528 /* Control variable may be on the other side. */
1529 tmp_op = op0; op0 = op1; op1 = tmp_op;
1530 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1531 }
1532 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1533
1534 end:
1535 if (control_var)
1536 *control_var = op0;
1537 if (iv_var)
1538 *iv_var = iv0;
1539 if (bound)
1540 *bound = op1;
1541 if (iv_bound)
1542 *iv_bound = iv1;
1543
1544 return ret;
1545 }
1546
1547 /* Checks whether the condition in STMT is interesting and if so,
1548 records it. */
1549
1550 static void
1551 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1552 {
1553 tree *var_p, *bound_p;
1554 struct iv *var_iv, *civ;
1555
1556 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1557 {
1558 find_interesting_uses_op (data, *var_p);
1559 find_interesting_uses_op (data, *bound_p);
1560 return;
1561 }
1562
1563 civ = XNEW (struct iv);
1564 *civ = *var_iv;
1565 record_use (data, NULL, civ, stmt, USE_COMPARE);
1566 }
1567
1568 /* Returns the outermost loop EXPR is obviously invariant in
1569 relative to the loop LOOP, i.e. if all its operands are defined
1570 outside of the returned loop. Returns NULL if EXPR is not
1571 even obviously invariant in LOOP. */
1572
1573 struct loop *
1574 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1575 {
1576 basic_block def_bb;
1577 unsigned i, len;
1578
1579 if (is_gimple_min_invariant (expr))
1580 return current_loops->tree_root;
1581
1582 if (TREE_CODE (expr) == SSA_NAME)
1583 {
1584 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1585 if (def_bb)
1586 {
1587 if (flow_bb_inside_loop_p (loop, def_bb))
1588 return NULL;
1589 return superloop_at_depth (loop,
1590 loop_depth (def_bb->loop_father) + 1);
1591 }
1592
1593 return current_loops->tree_root;
1594 }
1595
1596 if (!EXPR_P (expr))
1597 return NULL;
1598
1599 unsigned maxdepth = 0;
1600 len = TREE_OPERAND_LENGTH (expr);
1601 for (i = 0; i < len; i++)
1602 {
1603 struct loop *ivloop;
1604 if (!TREE_OPERAND (expr, i))
1605 continue;
1606
1607 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1608 if (!ivloop)
1609 return NULL;
1610 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1611 }
1612
1613 return superloop_at_depth (loop, maxdepth);
1614 }
1615
1616 /* Returns true if expression EXPR is obviously invariant in LOOP,
1617 i.e. if all its operands are defined outside of the LOOP. LOOP
1618 should not be the function body. */
1619
1620 bool
1621 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1622 {
1623 basic_block def_bb;
1624 unsigned i, len;
1625
1626 gcc_assert (loop_depth (loop) > 0);
1627
1628 if (is_gimple_min_invariant (expr))
1629 return true;
1630
1631 if (TREE_CODE (expr) == SSA_NAME)
1632 {
1633 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1634 if (def_bb
1635 && flow_bb_inside_loop_p (loop, def_bb))
1636 return false;
1637
1638 return true;
1639 }
1640
1641 if (!EXPR_P (expr))
1642 return false;
1643
1644 len = TREE_OPERAND_LENGTH (expr);
1645 for (i = 0; i < len; i++)
1646 if (TREE_OPERAND (expr, i)
1647 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1648 return false;
1649
1650 return true;
1651 }
1652
1653 /* Cumulates the steps of indices into DATA and replaces their values with the
1654 initial ones. Returns false when the value of the index cannot be determined.
1655 Callback for for_each_index. */
1656
1657 struct ifs_ivopts_data
1658 {
1659 struct ivopts_data *ivopts_data;
1660 gimple stmt;
1661 tree step;
1662 };
1663
1664 static bool
1665 idx_find_step (tree base, tree *idx, void *data)
1666 {
1667 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1668 struct iv *iv;
1669 bool use_overflow_semantics = false;
1670 tree step, iv_base, iv_step, lbound, off;
1671 struct loop *loop = dta->ivopts_data->current_loop;
1672
1673 /* If base is a component ref, require that the offset of the reference
1674 be invariant. */
1675 if (TREE_CODE (base) == COMPONENT_REF)
1676 {
1677 off = component_ref_field_offset (base);
1678 return expr_invariant_in_loop_p (loop, off);
1679 }
1680
1681 /* If base is array, first check whether we will be able to move the
1682 reference out of the loop (in order to take its address in strength
1683 reduction). In order for this to work we need both lower bound
1684 and step to be loop invariants. */
1685 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1686 {
1687 /* Moreover, for a range, the size needs to be invariant as well. */
1688 if (TREE_CODE (base) == ARRAY_RANGE_REF
1689 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1690 return false;
1691
1692 step = array_ref_element_size (base);
1693 lbound = array_ref_low_bound (base);
1694
1695 if (!expr_invariant_in_loop_p (loop, step)
1696 || !expr_invariant_in_loop_p (loop, lbound))
1697 return false;
1698 }
1699
1700 if (TREE_CODE (*idx) != SSA_NAME)
1701 return true;
1702
1703 iv = get_iv (dta->ivopts_data, *idx);
1704 if (!iv)
1705 return false;
1706
1707 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1708 *&x[0], which is not folded and does not trigger the
1709 ARRAY_REF path below. */
1710 *idx = iv->base;
1711
1712 if (integer_zerop (iv->step))
1713 return true;
1714
1715 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1716 {
1717 step = array_ref_element_size (base);
1718
1719 /* We only handle addresses whose step is an integer constant. */
1720 if (TREE_CODE (step) != INTEGER_CST)
1721 return false;
1722 }
1723 else
1724 /* The step for pointer arithmetics already is 1 byte. */
1725 step = size_one_node;
1726
1727 iv_base = iv->base;
1728 iv_step = iv->step;
1729 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1730 use_overflow_semantics = true;
1731
1732 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1733 sizetype, &iv_base, &iv_step, dta->stmt,
1734 use_overflow_semantics))
1735 {
1736 /* The index might wrap. */
1737 return false;
1738 }
1739
1740 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1741 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1742
1743 return true;
1744 }
1745
1746 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1747 object is passed to it in DATA. */
1748
1749 static bool
1750 idx_record_use (tree base, tree *idx,
1751 void *vdata)
1752 {
1753 struct ivopts_data *data = (struct ivopts_data *) vdata;
1754 find_interesting_uses_op (data, *idx);
1755 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1756 {
1757 find_interesting_uses_op (data, array_ref_element_size (base));
1758 find_interesting_uses_op (data, array_ref_low_bound (base));
1759 }
1760 return true;
1761 }
1762
1763 /* If we can prove that TOP = cst * BOT for some constant cst,
1764 store cst to MUL and return true. Otherwise return false.
1765 The returned value is always sign-extended, regardless of the
1766 signedness of TOP and BOT. */
1767
1768 static bool
1769 constant_multiple_of (tree top, tree bot, widest_int *mul)
1770 {
1771 tree mby;
1772 enum tree_code code;
1773 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1774 widest_int res, p0, p1;
1775
1776 STRIP_NOPS (top);
1777 STRIP_NOPS (bot);
1778
1779 if (operand_equal_p (top, bot, 0))
1780 {
1781 *mul = 1;
1782 return true;
1783 }
1784
1785 code = TREE_CODE (top);
1786 switch (code)
1787 {
1788 case MULT_EXPR:
1789 mby = TREE_OPERAND (top, 1);
1790 if (TREE_CODE (mby) != INTEGER_CST)
1791 return false;
1792
1793 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1794 return false;
1795
1796 *mul = wi::sext (res * wi::to_widest (mby), precision);
1797 return true;
1798
1799 case PLUS_EXPR:
1800 case MINUS_EXPR:
1801 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1802 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1803 return false;
1804
1805 if (code == MINUS_EXPR)
1806 p1 = -p1;
1807 *mul = wi::sext (p0 + p1, precision);
1808 return true;
1809
1810 case INTEGER_CST:
1811 if (TREE_CODE (bot) != INTEGER_CST)
1812 return false;
1813
1814 p0 = widest_int::from (top, SIGNED);
1815 p1 = widest_int::from (bot, SIGNED);
1816 if (p1 == 0)
1817 return false;
1818 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1819 return res == 0;
1820
1821 default:
1822 return false;
1823 }
1824 }
1825
1826 /* Return true if memory reference REF with step STEP may be unaligned. */
1827
1828 static bool
1829 may_be_unaligned_p (tree ref, tree step)
1830 {
1831 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1832 thus they are not misaligned. */
1833 if (TREE_CODE (ref) == TARGET_MEM_REF)
1834 return false;
1835
1836 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1837 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1838 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1839
1840 unsigned HOST_WIDE_INT bitpos;
1841 unsigned int ref_align;
1842 get_object_alignment_1 (ref, &ref_align, &bitpos);
1843 if (ref_align < align
1844 || (bitpos % align) != 0
1845 || (bitpos % BITS_PER_UNIT) != 0)
1846 return true;
1847
1848 unsigned int trailing_zeros = tree_ctz (step);
1849 if (trailing_zeros < HOST_BITS_PER_INT
1850 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1851 return true;
1852
1853 return false;
1854 }
1855
1856 /* Return true if EXPR may be non-addressable. */
1857
1858 bool
1859 may_be_nonaddressable_p (tree expr)
1860 {
1861 switch (TREE_CODE (expr))
1862 {
1863 case TARGET_MEM_REF:
1864 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1865 target, thus they are always addressable. */
1866 return false;
1867
1868 case COMPONENT_REF:
1869 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1870 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1871
1872 case VIEW_CONVERT_EXPR:
1873 /* This kind of view-conversions may wrap non-addressable objects
1874 and make them look addressable. After some processing the
1875 non-addressability may be uncovered again, causing ADDR_EXPRs
1876 of inappropriate objects to be built. */
1877 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1878 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1879 return true;
1880
1881 /* ... fall through ... */
1882
1883 case ARRAY_REF:
1884 case ARRAY_RANGE_REF:
1885 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1886
1887 CASE_CONVERT:
1888 return true;
1889
1890 default:
1891 break;
1892 }
1893
1894 return false;
1895 }
1896
1897 static tree
1898 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1899
1900 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1901 If there is an existing use which has same stripped iv base and step,
1902 this function records this one as a sub use to that; otherwise records
1903 it as a normal one. */
1904
1905 static struct iv_use *
1906 record_group_use (struct ivopts_data *data, tree *use_p,
1907 struct iv *iv, gimple stmt, enum use_type use_type)
1908 {
1909 unsigned int i;
1910 struct iv_use *use;
1911 tree addr_base;
1912 unsigned HOST_WIDE_INT addr_offset;
1913
1914 /* Only support sub use for address type uses, that is, with base
1915 object. */
1916 if (!iv->base_object)
1917 return record_use (data, use_p, iv, stmt, use_type);
1918
1919 addr_base = strip_offset (iv->base, &addr_offset);
1920 for (i = 0; i < n_iv_uses (data); i++)
1921 {
1922 use = iv_use (data, i);
1923 if (use->type != USE_ADDRESS || !use->iv->base_object)
1924 continue;
1925
1926 /* Check if it has the same stripped base and step. */
1927 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1928 && operand_equal_p (iv->step, use->iv->step, 0)
1929 && operand_equal_p (addr_base, use->addr_base, 0))
1930 break;
1931 }
1932
1933 if (i == n_iv_uses (data))
1934 return record_use (data, use_p, iv, stmt,
1935 use_type, addr_base, addr_offset);
1936 else
1937 return record_sub_use (data, use_p, iv, stmt,
1938 use_type, addr_base, addr_offset, i);
1939 }
1940
1941 /* Finds addresses in *OP_P inside STMT. */
1942
1943 static void
1944 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1945 {
1946 tree base = *op_p, step = size_zero_node;
1947 struct iv *civ;
1948 struct ifs_ivopts_data ifs_ivopts_data;
1949
1950 /* Do not play with volatile memory references. A bit too conservative,
1951 perhaps, but safe. */
1952 if (gimple_has_volatile_ops (stmt))
1953 goto fail;
1954
1955 /* Ignore bitfields for now. Not really something terribly complicated
1956 to handle. TODO. */
1957 if (TREE_CODE (base) == BIT_FIELD_REF)
1958 goto fail;
1959
1960 base = unshare_expr (base);
1961
1962 if (TREE_CODE (base) == TARGET_MEM_REF)
1963 {
1964 tree type = build_pointer_type (TREE_TYPE (base));
1965 tree astep;
1966
1967 if (TMR_BASE (base)
1968 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1969 {
1970 civ = get_iv (data, TMR_BASE (base));
1971 if (!civ)
1972 goto fail;
1973
1974 TMR_BASE (base) = civ->base;
1975 step = civ->step;
1976 }
1977 if (TMR_INDEX2 (base)
1978 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1979 {
1980 civ = get_iv (data, TMR_INDEX2 (base));
1981 if (!civ)
1982 goto fail;
1983
1984 TMR_INDEX2 (base) = civ->base;
1985 step = civ->step;
1986 }
1987 if (TMR_INDEX (base)
1988 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1989 {
1990 civ = get_iv (data, TMR_INDEX (base));
1991 if (!civ)
1992 goto fail;
1993
1994 TMR_INDEX (base) = civ->base;
1995 astep = civ->step;
1996
1997 if (astep)
1998 {
1999 if (TMR_STEP (base))
2000 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2001
2002 step = fold_build2 (PLUS_EXPR, type, step, astep);
2003 }
2004 }
2005
2006 if (integer_zerop (step))
2007 goto fail;
2008 base = tree_mem_ref_addr (type, base);
2009 }
2010 else
2011 {
2012 ifs_ivopts_data.ivopts_data = data;
2013 ifs_ivopts_data.stmt = stmt;
2014 ifs_ivopts_data.step = size_zero_node;
2015 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2016 || integer_zerop (ifs_ivopts_data.step))
2017 goto fail;
2018 step = ifs_ivopts_data.step;
2019
2020 /* Check that the base expression is addressable. This needs
2021 to be done after substituting bases of IVs into it. */
2022 if (may_be_nonaddressable_p (base))
2023 goto fail;
2024
2025 /* Moreover, on strict alignment platforms, check that it is
2026 sufficiently aligned. */
2027 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2028 goto fail;
2029
2030 base = build_fold_addr_expr (base);
2031
2032 /* Substituting bases of IVs into the base expression might
2033 have caused folding opportunities. */
2034 if (TREE_CODE (base) == ADDR_EXPR)
2035 {
2036 tree *ref = &TREE_OPERAND (base, 0);
2037 while (handled_component_p (*ref))
2038 ref = &TREE_OPERAND (*ref, 0);
2039 if (TREE_CODE (*ref) == MEM_REF)
2040 {
2041 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2042 TREE_OPERAND (*ref, 0),
2043 TREE_OPERAND (*ref, 1));
2044 if (tem)
2045 *ref = tem;
2046 }
2047 }
2048 }
2049
2050 civ = alloc_iv (base, step);
2051 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2052 return;
2053
2054 fail:
2055 for_each_index (op_p, idx_record_use, data);
2056 }
2057
2058 /* Finds and records invariants used in STMT. */
2059
2060 static void
2061 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2062 {
2063 ssa_op_iter iter;
2064 use_operand_p use_p;
2065 tree op;
2066
2067 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2068 {
2069 op = USE_FROM_PTR (use_p);
2070 record_invariant (data, op, false);
2071 }
2072 }
2073
2074 /* Finds interesting uses of induction variables in the statement STMT. */
2075
2076 static void
2077 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2078 {
2079 struct iv *iv;
2080 tree op, *lhs, *rhs;
2081 ssa_op_iter iter;
2082 use_operand_p use_p;
2083 enum tree_code code;
2084
2085 find_invariants_stmt (data, stmt);
2086
2087 if (gimple_code (stmt) == GIMPLE_COND)
2088 {
2089 find_interesting_uses_cond (data, stmt);
2090 return;
2091 }
2092
2093 if (is_gimple_assign (stmt))
2094 {
2095 lhs = gimple_assign_lhs_ptr (stmt);
2096 rhs = gimple_assign_rhs1_ptr (stmt);
2097
2098 if (TREE_CODE (*lhs) == SSA_NAME)
2099 {
2100 /* If the statement defines an induction variable, the uses are not
2101 interesting by themselves. */
2102
2103 iv = get_iv (data, *lhs);
2104
2105 if (iv && !integer_zerop (iv->step))
2106 return;
2107 }
2108
2109 code = gimple_assign_rhs_code (stmt);
2110 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2111 && (REFERENCE_CLASS_P (*rhs)
2112 || is_gimple_val (*rhs)))
2113 {
2114 if (REFERENCE_CLASS_P (*rhs))
2115 find_interesting_uses_address (data, stmt, rhs);
2116 else
2117 find_interesting_uses_op (data, *rhs);
2118
2119 if (REFERENCE_CLASS_P (*lhs))
2120 find_interesting_uses_address (data, stmt, lhs);
2121 return;
2122 }
2123 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2124 {
2125 find_interesting_uses_cond (data, stmt);
2126 return;
2127 }
2128
2129 /* TODO -- we should also handle address uses of type
2130
2131 memory = call (whatever);
2132
2133 and
2134
2135 call (memory). */
2136 }
2137
2138 if (gimple_code (stmt) == GIMPLE_PHI
2139 && gimple_bb (stmt) == data->current_loop->header)
2140 {
2141 iv = get_iv (data, PHI_RESULT (stmt));
2142
2143 if (iv && !integer_zerop (iv->step))
2144 return;
2145 }
2146
2147 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2148 {
2149 op = USE_FROM_PTR (use_p);
2150
2151 if (TREE_CODE (op) != SSA_NAME)
2152 continue;
2153
2154 iv = get_iv (data, op);
2155 if (!iv)
2156 continue;
2157
2158 find_interesting_uses_op (data, op);
2159 }
2160 }
2161
2162 /* Finds interesting uses of induction variables outside of loops
2163 on loop exit edge EXIT. */
2164
2165 static void
2166 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2167 {
2168 gphi *phi;
2169 gphi_iterator psi;
2170 tree def;
2171
2172 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2173 {
2174 phi = psi.phi ();
2175 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2176 if (!virtual_operand_p (def))
2177 find_interesting_uses_op (data, def);
2178 }
2179 }
2180
2181 /* Finds uses of the induction variables that are interesting. */
2182
2183 static void
2184 find_interesting_uses (struct ivopts_data *data)
2185 {
2186 basic_block bb;
2187 gimple_stmt_iterator bsi;
2188 basic_block *body = get_loop_body (data->current_loop);
2189 unsigned i;
2190 struct version_info *info;
2191 edge e;
2192
2193 if (dump_file && (dump_flags & TDF_DETAILS))
2194 fprintf (dump_file, "Uses:\n\n");
2195
2196 for (i = 0; i < data->current_loop->num_nodes; i++)
2197 {
2198 edge_iterator ei;
2199 bb = body[i];
2200
2201 FOR_EACH_EDGE (e, ei, bb->succs)
2202 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2203 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2204 find_interesting_uses_outside (data, e);
2205
2206 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2207 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2208 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2209 if (!is_gimple_debug (gsi_stmt (bsi)))
2210 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2211 }
2212
2213 if (dump_file && (dump_flags & TDF_DETAILS))
2214 {
2215 bitmap_iterator bi;
2216
2217 fprintf (dump_file, "\n");
2218
2219 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2220 {
2221 info = ver_info (data, i);
2222 if (info->inv_id)
2223 {
2224 fprintf (dump_file, " ");
2225 print_generic_expr (dump_file, info->name, TDF_SLIM);
2226 fprintf (dump_file, " is invariant (%d)%s\n",
2227 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2228 }
2229 }
2230
2231 fprintf (dump_file, "\n");
2232 }
2233
2234 free (body);
2235 }
2236
2237 /* Compute maximum offset of [base + offset] addressing mode
2238 for memory reference represented by USE. */
2239
2240 static HOST_WIDE_INT
2241 compute_max_addr_offset (struct iv_use *use)
2242 {
2243 int width;
2244 rtx reg, addr;
2245 HOST_WIDE_INT i, off;
2246 unsigned list_index, num;
2247 addr_space_t as;
2248 machine_mode mem_mode, addr_mode;
2249 static vec<HOST_WIDE_INT> max_offset_list;
2250
2251 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2252 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2253
2254 num = max_offset_list.length ();
2255 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2256 if (list_index >= num)
2257 {
2258 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2259 for (; num < max_offset_list.length (); num++)
2260 max_offset_list[num] = -1;
2261 }
2262
2263 off = max_offset_list[list_index];
2264 if (off != -1)
2265 return off;
2266
2267 addr_mode = targetm.addr_space.address_mode (as);
2268 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2269 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2270
2271 width = GET_MODE_BITSIZE (addr_mode) - 1;
2272 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2273 width = HOST_BITS_PER_WIDE_INT - 1;
2274
2275 for (i = width; i > 0; i--)
2276 {
2277 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2278 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2279 if (memory_address_addr_space_p (mem_mode, addr, as))
2280 break;
2281
2282 /* For some strict-alignment targets, the offset must be naturally
2283 aligned. Try an aligned offset if mem_mode is not QImode. */
2284 off = ((unsigned HOST_WIDE_INT) 1 << i);
2285 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2286 {
2287 off -= GET_MODE_SIZE (mem_mode);
2288 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2289 if (memory_address_addr_space_p (mem_mode, addr, as))
2290 break;
2291 }
2292 }
2293 if (i == 0)
2294 off = 0;
2295
2296 max_offset_list[list_index] = off;
2297 return off;
2298 }
2299
2300 /* Check if all small groups should be split. Return true if and
2301 only if:
2302
2303 1) At least one groups contain two uses with different offsets.
2304 2) No group contains more than two uses with different offsets.
2305
2306 Return false otherwise. We want to split such groups because:
2307
2308 1) Small groups don't have much benefit and may interfer with
2309 general candidate selection.
2310 2) Size for problem with only small groups is usually small and
2311 general algorithm can handle it well.
2312
2313 TODO -- Above claim may not hold when auto increment is supported. */
2314
2315 static bool
2316 split_all_small_groups (struct ivopts_data *data)
2317 {
2318 bool split_p = false;
2319 unsigned int i, n, distinct;
2320 struct iv_use *pre, *use;
2321
2322 n = n_iv_uses (data);
2323 for (i = 0; i < n; i++)
2324 {
2325 use = iv_use (data, i);
2326 if (!use->next)
2327 continue;
2328
2329 distinct = 1;
2330 gcc_assert (use->type == USE_ADDRESS);
2331 for (pre = use, use = use->next; use; pre = use, use = use->next)
2332 {
2333 if (pre->addr_offset != use->addr_offset)
2334 distinct++;
2335
2336 if (distinct > 2)
2337 return false;
2338 }
2339 if (distinct == 2)
2340 split_p = true;
2341 }
2342
2343 return split_p;
2344 }
2345
2346 /* For each group of address type uses, this function further groups
2347 these uses according to the maximum offset supported by target's
2348 [base + offset] addressing mode. */
2349
2350 static void
2351 group_address_uses (struct ivopts_data *data)
2352 {
2353 HOST_WIDE_INT max_offset = -1;
2354 unsigned int i, n, sub_id;
2355 struct iv_use *pre, *use;
2356 unsigned HOST_WIDE_INT addr_offset_first;
2357
2358 /* Reset max offset to split all small groups. */
2359 if (split_all_small_groups (data))
2360 max_offset = 0;
2361
2362 n = n_iv_uses (data);
2363 for (i = 0; i < n; i++)
2364 {
2365 use = iv_use (data, i);
2366 if (!use->next)
2367 continue;
2368
2369 gcc_assert (use->type == USE_ADDRESS);
2370 if (max_offset != 0)
2371 max_offset = compute_max_addr_offset (use);
2372
2373 while (use)
2374 {
2375 sub_id = 0;
2376 addr_offset_first = use->addr_offset;
2377 /* Only uses with offset that can fit in offset part against
2378 the first use can be grouped together. */
2379 for (pre = use, use = use->next;
2380 use && (use->addr_offset - addr_offset_first
2381 <= (unsigned HOST_WIDE_INT) max_offset);
2382 pre = use, use = use->next)
2383 {
2384 use->id = pre->id;
2385 use->sub_id = ++sub_id;
2386 }
2387
2388 /* Break the list and create new group. */
2389 if (use)
2390 {
2391 pre->next = NULL;
2392 use->id = n_iv_uses (data);
2393 use->related_cands = BITMAP_ALLOC (NULL);
2394 data->iv_uses.safe_push (use);
2395 }
2396 }
2397 }
2398
2399 if (dump_file && (dump_flags & TDF_DETAILS))
2400 dump_uses (dump_file, data);
2401 }
2402
2403 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2404 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2405 we are at the top-level of the processed address. */
2406
2407 static tree
2408 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2409 HOST_WIDE_INT *offset)
2410 {
2411 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2412 enum tree_code code;
2413 tree type, orig_type = TREE_TYPE (expr);
2414 HOST_WIDE_INT off0, off1, st;
2415 tree orig_expr = expr;
2416
2417 STRIP_NOPS (expr);
2418
2419 type = TREE_TYPE (expr);
2420 code = TREE_CODE (expr);
2421 *offset = 0;
2422
2423 switch (code)
2424 {
2425 case INTEGER_CST:
2426 if (!cst_and_fits_in_hwi (expr)
2427 || integer_zerop (expr))
2428 return orig_expr;
2429
2430 *offset = int_cst_value (expr);
2431 return build_int_cst (orig_type, 0);
2432
2433 case POINTER_PLUS_EXPR:
2434 case PLUS_EXPR:
2435 case MINUS_EXPR:
2436 op0 = TREE_OPERAND (expr, 0);
2437 op1 = TREE_OPERAND (expr, 1);
2438
2439 op0 = strip_offset_1 (op0, false, false, &off0);
2440 op1 = strip_offset_1 (op1, false, false, &off1);
2441
2442 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2443 if (op0 == TREE_OPERAND (expr, 0)
2444 && op1 == TREE_OPERAND (expr, 1))
2445 return orig_expr;
2446
2447 if (integer_zerop (op1))
2448 expr = op0;
2449 else if (integer_zerop (op0))
2450 {
2451 if (code == MINUS_EXPR)
2452 expr = fold_build1 (NEGATE_EXPR, type, op1);
2453 else
2454 expr = op1;
2455 }
2456 else
2457 expr = fold_build2 (code, type, op0, op1);
2458
2459 return fold_convert (orig_type, expr);
2460
2461 case MULT_EXPR:
2462 op1 = TREE_OPERAND (expr, 1);
2463 if (!cst_and_fits_in_hwi (op1))
2464 return orig_expr;
2465
2466 op0 = TREE_OPERAND (expr, 0);
2467 op0 = strip_offset_1 (op0, false, false, &off0);
2468 if (op0 == TREE_OPERAND (expr, 0))
2469 return orig_expr;
2470
2471 *offset = off0 * int_cst_value (op1);
2472 if (integer_zerop (op0))
2473 expr = op0;
2474 else
2475 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2476
2477 return fold_convert (orig_type, expr);
2478
2479 case ARRAY_REF:
2480 case ARRAY_RANGE_REF:
2481 if (!inside_addr)
2482 return orig_expr;
2483
2484 step = array_ref_element_size (expr);
2485 if (!cst_and_fits_in_hwi (step))
2486 break;
2487
2488 st = int_cst_value (step);
2489 op1 = TREE_OPERAND (expr, 1);
2490 op1 = strip_offset_1 (op1, false, false, &off1);
2491 *offset = off1 * st;
2492
2493 if (top_compref
2494 && integer_zerop (op1))
2495 {
2496 /* Strip the component reference completely. */
2497 op0 = TREE_OPERAND (expr, 0);
2498 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2499 *offset += off0;
2500 return op0;
2501 }
2502 break;
2503
2504 case COMPONENT_REF:
2505 {
2506 tree field;
2507
2508 if (!inside_addr)
2509 return orig_expr;
2510
2511 tmp = component_ref_field_offset (expr);
2512 field = TREE_OPERAND (expr, 1);
2513 if (top_compref
2514 && cst_and_fits_in_hwi (tmp)
2515 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2516 {
2517 HOST_WIDE_INT boffset, abs_off;
2518
2519 /* Strip the component reference completely. */
2520 op0 = TREE_OPERAND (expr, 0);
2521 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2522 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2523 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2524 if (boffset < 0)
2525 abs_off = -abs_off;
2526
2527 *offset = off0 + int_cst_value (tmp) + abs_off;
2528 return op0;
2529 }
2530 }
2531 break;
2532
2533 case ADDR_EXPR:
2534 op0 = TREE_OPERAND (expr, 0);
2535 op0 = strip_offset_1 (op0, true, true, &off0);
2536 *offset += off0;
2537
2538 if (op0 == TREE_OPERAND (expr, 0))
2539 return orig_expr;
2540
2541 expr = build_fold_addr_expr (op0);
2542 return fold_convert (orig_type, expr);
2543
2544 case MEM_REF:
2545 /* ??? Offset operand? */
2546 inside_addr = false;
2547 break;
2548
2549 default:
2550 return orig_expr;
2551 }
2552
2553 /* Default handling of expressions for that we want to recurse into
2554 the first operand. */
2555 op0 = TREE_OPERAND (expr, 0);
2556 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2557 *offset += off0;
2558
2559 if (op0 == TREE_OPERAND (expr, 0)
2560 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2561 return orig_expr;
2562
2563 expr = copy_node (expr);
2564 TREE_OPERAND (expr, 0) = op0;
2565 if (op1)
2566 TREE_OPERAND (expr, 1) = op1;
2567
2568 /* Inside address, we might strip the top level component references,
2569 thus changing type of the expression. Handling of ADDR_EXPR
2570 will fix that. */
2571 expr = fold_convert (orig_type, expr);
2572
2573 return expr;
2574 }
2575
2576 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2577
2578 static tree
2579 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2580 {
2581 HOST_WIDE_INT off;
2582 tree core = strip_offset_1 (expr, false, false, &off);
2583 *offset = off;
2584 return core;
2585 }
2586
2587 /* Returns variant of TYPE that can be used as base for different uses.
2588 We return unsigned type with the same precision, which avoids problems
2589 with overflows. */
2590
2591 static tree
2592 generic_type_for (tree type)
2593 {
2594 if (POINTER_TYPE_P (type))
2595 return unsigned_type_for (type);
2596
2597 if (TYPE_UNSIGNED (type))
2598 return type;
2599
2600 return unsigned_type_for (type);
2601 }
2602
2603 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2604 the bitmap to that we should store it. */
2605
2606 static struct ivopts_data *fd_ivopts_data;
2607 static tree
2608 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2609 {
2610 bitmap *depends_on = (bitmap *) data;
2611 struct version_info *info;
2612
2613 if (TREE_CODE (*expr_p) != SSA_NAME)
2614 return NULL_TREE;
2615 info = name_info (fd_ivopts_data, *expr_p);
2616
2617 if (!info->inv_id || info->has_nonlin_use)
2618 return NULL_TREE;
2619
2620 if (!*depends_on)
2621 *depends_on = BITMAP_ALLOC (NULL);
2622 bitmap_set_bit (*depends_on, info->inv_id);
2623
2624 return NULL_TREE;
2625 }
2626
2627 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2628 position to POS. If USE is not NULL, the candidate is set as related to
2629 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2630 replacement of the final value of the iv by a direct computation. */
2631
2632 static struct iv_cand *
2633 add_candidate_1 (struct ivopts_data *data,
2634 tree base, tree step, bool important, enum iv_position pos,
2635 struct iv_use *use, gimple incremented_at)
2636 {
2637 unsigned i;
2638 struct iv_cand *cand = NULL;
2639 tree type, orig_type;
2640
2641 /* For non-original variables, make sure their values are computed in a type
2642 that does not invoke undefined behavior on overflows (since in general,
2643 we cannot prove that these induction variables are non-wrapping). */
2644 if (pos != IP_ORIGINAL)
2645 {
2646 orig_type = TREE_TYPE (base);
2647 type = generic_type_for (orig_type);
2648 if (type != orig_type)
2649 {
2650 base = fold_convert (type, base);
2651 step = fold_convert (type, step);
2652 }
2653 }
2654
2655 for (i = 0; i < n_iv_cands (data); i++)
2656 {
2657 cand = iv_cand (data, i);
2658
2659 if (cand->pos != pos)
2660 continue;
2661
2662 if (cand->incremented_at != incremented_at
2663 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2664 && cand->ainc_use != use))
2665 continue;
2666
2667 if (!cand->iv)
2668 {
2669 if (!base && !step)
2670 break;
2671
2672 continue;
2673 }
2674
2675 if (!base && !step)
2676 continue;
2677
2678 if (operand_equal_p (base, cand->iv->base, 0)
2679 && operand_equal_p (step, cand->iv->step, 0)
2680 && (TYPE_PRECISION (TREE_TYPE (base))
2681 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2682 break;
2683 }
2684
2685 if (i == n_iv_cands (data))
2686 {
2687 cand = XCNEW (struct iv_cand);
2688 cand->id = i;
2689
2690 if (!base && !step)
2691 cand->iv = NULL;
2692 else
2693 cand->iv = alloc_iv (base, step);
2694
2695 cand->pos = pos;
2696 if (pos != IP_ORIGINAL && cand->iv)
2697 {
2698 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2699 cand->var_after = cand->var_before;
2700 }
2701 cand->important = important;
2702 cand->incremented_at = incremented_at;
2703 data->iv_candidates.safe_push (cand);
2704
2705 if (step
2706 && TREE_CODE (step) != INTEGER_CST)
2707 {
2708 fd_ivopts_data = data;
2709 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2710 }
2711
2712 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2713 cand->ainc_use = use;
2714 else
2715 cand->ainc_use = NULL;
2716
2717 if (dump_file && (dump_flags & TDF_DETAILS))
2718 dump_cand (dump_file, cand);
2719 }
2720
2721 if (important && !cand->important)
2722 {
2723 cand->important = true;
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2726 }
2727
2728 if (use)
2729 {
2730 bitmap_set_bit (use->related_cands, i);
2731 if (dump_file && (dump_flags & TDF_DETAILS))
2732 fprintf (dump_file, "Candidate %d is related to use %d\n",
2733 cand->id, use->id);
2734 }
2735
2736 return cand;
2737 }
2738
2739 /* Returns true if incrementing the induction variable at the end of the LOOP
2740 is allowed.
2741
2742 The purpose is to avoid splitting latch edge with a biv increment, thus
2743 creating a jump, possibly confusing other optimization passes and leaving
2744 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2745 is not available (so we do not have a better alternative), or if the latch
2746 edge is already nonempty. */
2747
2748 static bool
2749 allow_ip_end_pos_p (struct loop *loop)
2750 {
2751 if (!ip_normal_pos (loop))
2752 return true;
2753
2754 if (!empty_block_p (ip_end_pos (loop)))
2755 return true;
2756
2757 return false;
2758 }
2759
2760 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2761 Important field is set to IMPORTANT. */
2762
2763 static void
2764 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2765 bool important, struct iv_use *use)
2766 {
2767 basic_block use_bb = gimple_bb (use->stmt);
2768 machine_mode mem_mode;
2769 unsigned HOST_WIDE_INT cstepi;
2770
2771 /* If we insert the increment in any position other than the standard
2772 ones, we must ensure that it is incremented once per iteration.
2773 It must not be in an inner nested loop, or one side of an if
2774 statement. */
2775 if (use_bb->loop_father != data->current_loop
2776 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2777 || stmt_could_throw_p (use->stmt)
2778 || !cst_and_fits_in_hwi (step))
2779 return;
2780
2781 cstepi = int_cst_value (step);
2782
2783 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2784 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2785 || USE_STORE_PRE_INCREMENT (mem_mode))
2786 && GET_MODE_SIZE (mem_mode) == cstepi)
2787 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2788 || USE_STORE_PRE_DECREMENT (mem_mode))
2789 && GET_MODE_SIZE (mem_mode) == -cstepi))
2790 {
2791 enum tree_code code = MINUS_EXPR;
2792 tree new_base;
2793 tree new_step = step;
2794
2795 if (POINTER_TYPE_P (TREE_TYPE (base)))
2796 {
2797 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2798 code = POINTER_PLUS_EXPR;
2799 }
2800 else
2801 new_step = fold_convert (TREE_TYPE (base), new_step);
2802 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2803 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2804 use->stmt);
2805 }
2806 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2807 || USE_STORE_POST_INCREMENT (mem_mode))
2808 && GET_MODE_SIZE (mem_mode) == cstepi)
2809 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2810 || USE_STORE_POST_DECREMENT (mem_mode))
2811 && GET_MODE_SIZE (mem_mode) == -cstepi))
2812 {
2813 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2814 use->stmt);
2815 }
2816 }
2817
2818 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2819 position to POS. If USE is not NULL, the candidate is set as related to
2820 it. The candidate computation is scheduled on all available positions. */
2821
2822 static void
2823 add_candidate (struct ivopts_data *data,
2824 tree base, tree step, bool important, struct iv_use *use)
2825 {
2826 gcc_assert (use == NULL || use->sub_id == 0);
2827
2828 if (ip_normal_pos (data->current_loop))
2829 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2830 if (ip_end_pos (data->current_loop)
2831 && allow_ip_end_pos_p (data->current_loop))
2832 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2833
2834 if (use != NULL && use->type == USE_ADDRESS)
2835 add_autoinc_candidates (data, base, step, important, use);
2836 }
2837
2838 /* Adds standard iv candidates. */
2839
2840 static void
2841 add_standard_iv_candidates (struct ivopts_data *data)
2842 {
2843 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2844
2845 /* The same for a double-integer type if it is still fast enough. */
2846 if (TYPE_PRECISION
2847 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2848 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2849 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2850 build_int_cst (long_integer_type_node, 1), true, NULL);
2851
2852 /* The same for a double-integer type if it is still fast enough. */
2853 if (TYPE_PRECISION
2854 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2855 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2856 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2857 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2858 }
2859
2860
2861 /* Adds candidates bases on the old induction variable IV. */
2862
2863 static void
2864 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2865 {
2866 gimple phi;
2867 tree def;
2868 struct iv_cand *cand;
2869
2870 add_candidate (data, iv->base, iv->step, true, NULL);
2871
2872 /* The same, but with initial value zero. */
2873 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2874 add_candidate (data, size_int (0), iv->step, true, NULL);
2875 else
2876 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2877 iv->step, true, NULL);
2878
2879 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2880 if (gimple_code (phi) == GIMPLE_PHI)
2881 {
2882 /* Additionally record the possibility of leaving the original iv
2883 untouched. */
2884 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2885 /* Don't add candidate if it's from another PHI node because
2886 it's an affine iv appearing in the form of PEELED_CHREC. */
2887 phi = SSA_NAME_DEF_STMT (def);
2888 if (gimple_code (phi) != GIMPLE_PHI)
2889 {
2890 cand = add_candidate_1 (data,
2891 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2892 SSA_NAME_DEF_STMT (def));
2893 cand->var_before = iv->ssa_name;
2894 cand->var_after = def;
2895 }
2896 else
2897 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2898 }
2899 }
2900
2901 /* Adds candidates based on the old induction variables. */
2902
2903 static void
2904 add_old_ivs_candidates (struct ivopts_data *data)
2905 {
2906 unsigned i;
2907 struct iv *iv;
2908 bitmap_iterator bi;
2909
2910 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2911 {
2912 iv = ver_info (data, i)->iv;
2913 if (iv && iv->biv_p && !integer_zerop (iv->step))
2914 add_old_iv_candidates (data, iv);
2915 }
2916 }
2917
2918 /* Adds candidates based on the value of the induction variable IV and USE. */
2919
2920 static void
2921 add_iv_value_candidates (struct ivopts_data *data,
2922 struct iv *iv, struct iv_use *use)
2923 {
2924 unsigned HOST_WIDE_INT offset;
2925 tree base;
2926 tree basetype;
2927
2928 add_candidate (data, iv->base, iv->step, false, use);
2929
2930 /* The same, but with initial value zero. Make such variable important,
2931 since it is generic enough so that possibly many uses may be based
2932 on it. */
2933 basetype = TREE_TYPE (iv->base);
2934 if (POINTER_TYPE_P (basetype))
2935 basetype = sizetype;
2936 add_candidate (data, build_int_cst (basetype, 0),
2937 iv->step, true, use);
2938
2939 /* Third, try removing the constant offset. Make sure to even
2940 add a candidate for &a[0] vs. (T *)&a. */
2941 base = strip_offset (iv->base, &offset);
2942 if (offset
2943 || base != iv->base)
2944 add_candidate (data, base, iv->step, false, use);
2945 }
2946
2947 /* Adds candidates based on the uses. */
2948
2949 static void
2950 add_derived_ivs_candidates (struct ivopts_data *data)
2951 {
2952 unsigned i;
2953
2954 for (i = 0; i < n_iv_uses (data); i++)
2955 {
2956 struct iv_use *use = iv_use (data, i);
2957
2958 if (!use)
2959 continue;
2960
2961 switch (use->type)
2962 {
2963 case USE_NONLINEAR_EXPR:
2964 case USE_COMPARE:
2965 case USE_ADDRESS:
2966 /* Just add the ivs based on the value of the iv used here. */
2967 add_iv_value_candidates (data, use->iv, use);
2968 break;
2969
2970 default:
2971 gcc_unreachable ();
2972 }
2973 }
2974 }
2975
2976 /* Record important candidates and add them to related_cands bitmaps
2977 if needed. */
2978
2979 static void
2980 record_important_candidates (struct ivopts_data *data)
2981 {
2982 unsigned i;
2983 struct iv_use *use;
2984
2985 for (i = 0; i < n_iv_cands (data); i++)
2986 {
2987 struct iv_cand *cand = iv_cand (data, i);
2988
2989 if (cand->important)
2990 bitmap_set_bit (data->important_candidates, i);
2991 }
2992
2993 data->consider_all_candidates = (n_iv_cands (data)
2994 <= CONSIDER_ALL_CANDIDATES_BOUND);
2995
2996 if (data->consider_all_candidates)
2997 {
2998 /* We will not need "related_cands" bitmaps in this case,
2999 so release them to decrease peak memory consumption. */
3000 for (i = 0; i < n_iv_uses (data); i++)
3001 {
3002 use = iv_use (data, i);
3003 BITMAP_FREE (use->related_cands);
3004 }
3005 }
3006 else
3007 {
3008 /* Add important candidates to the related_cands bitmaps. */
3009 for (i = 0; i < n_iv_uses (data); i++)
3010 bitmap_ior_into (iv_use (data, i)->related_cands,
3011 data->important_candidates);
3012 }
3013 }
3014
3015 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3016 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3017 we allocate a simple list to every use. */
3018
3019 static void
3020 alloc_use_cost_map (struct ivopts_data *data)
3021 {
3022 unsigned i, size, s;
3023
3024 for (i = 0; i < n_iv_uses (data); i++)
3025 {
3026 struct iv_use *use = iv_use (data, i);
3027
3028 if (data->consider_all_candidates)
3029 size = n_iv_cands (data);
3030 else
3031 {
3032 s = bitmap_count_bits (use->related_cands);
3033
3034 /* Round up to the power of two, so that moduling by it is fast. */
3035 size = s ? (1 << ceil_log2 (s)) : 1;
3036 }
3037
3038 use->n_map_members = size;
3039 use->cost_map = XCNEWVEC (struct cost_pair, size);
3040 }
3041 }
3042
3043 /* Returns description of computation cost of expression whose runtime
3044 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3045
3046 static comp_cost
3047 new_cost (unsigned runtime, unsigned complexity)
3048 {
3049 comp_cost cost;
3050
3051 cost.cost = runtime;
3052 cost.complexity = complexity;
3053
3054 return cost;
3055 }
3056
3057 /* Returns true if COST is infinite. */
3058
3059 static bool
3060 infinite_cost_p (comp_cost cost)
3061 {
3062 return cost.cost == INFTY;
3063 }
3064
3065 /* Adds costs COST1 and COST2. */
3066
3067 static comp_cost
3068 add_costs (comp_cost cost1, comp_cost cost2)
3069 {
3070 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3071 return infinite_cost;
3072
3073 cost1.cost += cost2.cost;
3074 cost1.complexity += cost2.complexity;
3075
3076 return cost1;
3077 }
3078 /* Subtracts costs COST1 and COST2. */
3079
3080 static comp_cost
3081 sub_costs (comp_cost cost1, comp_cost cost2)
3082 {
3083 cost1.cost -= cost2.cost;
3084 cost1.complexity -= cost2.complexity;
3085
3086 return cost1;
3087 }
3088
3089 /* Returns a negative number if COST1 < COST2, a positive number if
3090 COST1 > COST2, and 0 if COST1 = COST2. */
3091
3092 static int
3093 compare_costs (comp_cost cost1, comp_cost cost2)
3094 {
3095 if (cost1.cost == cost2.cost)
3096 return cost1.complexity - cost2.complexity;
3097
3098 return cost1.cost - cost2.cost;
3099 }
3100
3101 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3102 on invariants DEPENDS_ON and that the value used in expressing it
3103 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3104
3105 static void
3106 set_use_iv_cost (struct ivopts_data *data,
3107 struct iv_use *use, struct iv_cand *cand,
3108 comp_cost cost, bitmap depends_on, tree value,
3109 enum tree_code comp, int inv_expr_id)
3110 {
3111 unsigned i, s;
3112
3113 if (infinite_cost_p (cost))
3114 {
3115 BITMAP_FREE (depends_on);
3116 return;
3117 }
3118
3119 if (data->consider_all_candidates)
3120 {
3121 use->cost_map[cand->id].cand = cand;
3122 use->cost_map[cand->id].cost = cost;
3123 use->cost_map[cand->id].depends_on = depends_on;
3124 use->cost_map[cand->id].value = value;
3125 use->cost_map[cand->id].comp = comp;
3126 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3127 return;
3128 }
3129
3130 /* n_map_members is a power of two, so this computes modulo. */
3131 s = cand->id & (use->n_map_members - 1);
3132 for (i = s; i < use->n_map_members; i++)
3133 if (!use->cost_map[i].cand)
3134 goto found;
3135 for (i = 0; i < s; i++)
3136 if (!use->cost_map[i].cand)
3137 goto found;
3138
3139 gcc_unreachable ();
3140
3141 found:
3142 use->cost_map[i].cand = cand;
3143 use->cost_map[i].cost = cost;
3144 use->cost_map[i].depends_on = depends_on;
3145 use->cost_map[i].value = value;
3146 use->cost_map[i].comp = comp;
3147 use->cost_map[i].inv_expr_id = inv_expr_id;
3148 }
3149
3150 /* Gets cost of (USE, CANDIDATE) pair. */
3151
3152 static struct cost_pair *
3153 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3154 struct iv_cand *cand)
3155 {
3156 unsigned i, s;
3157 struct cost_pair *ret;
3158
3159 if (!cand)
3160 return NULL;
3161
3162 if (data->consider_all_candidates)
3163 {
3164 ret = use->cost_map + cand->id;
3165 if (!ret->cand)
3166 return NULL;
3167
3168 return ret;
3169 }
3170
3171 /* n_map_members is a power of two, so this computes modulo. */
3172 s = cand->id & (use->n_map_members - 1);
3173 for (i = s; i < use->n_map_members; i++)
3174 if (use->cost_map[i].cand == cand)
3175 return use->cost_map + i;
3176 else if (use->cost_map[i].cand == NULL)
3177 return NULL;
3178 for (i = 0; i < s; i++)
3179 if (use->cost_map[i].cand == cand)
3180 return use->cost_map + i;
3181 else if (use->cost_map[i].cand == NULL)
3182 return NULL;
3183
3184 return NULL;
3185 }
3186
3187 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3188 static rtx
3189 produce_memory_decl_rtl (tree obj, int *regno)
3190 {
3191 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3192 machine_mode address_mode = targetm.addr_space.address_mode (as);
3193 rtx x;
3194
3195 gcc_assert (obj);
3196 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3197 {
3198 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3199 x = gen_rtx_SYMBOL_REF (address_mode, name);
3200 SET_SYMBOL_REF_DECL (x, obj);
3201 x = gen_rtx_MEM (DECL_MODE (obj), x);
3202 set_mem_addr_space (x, as);
3203 targetm.encode_section_info (obj, x, true);
3204 }
3205 else
3206 {
3207 x = gen_raw_REG (address_mode, (*regno)++);
3208 x = gen_rtx_MEM (DECL_MODE (obj), x);
3209 set_mem_addr_space (x, as);
3210 }
3211
3212 return x;
3213 }
3214
3215 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3216 walk_tree. DATA contains the actual fake register number. */
3217
3218 static tree
3219 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3220 {
3221 tree obj = NULL_TREE;
3222 rtx x = NULL_RTX;
3223 int *regno = (int *) data;
3224
3225 switch (TREE_CODE (*expr_p))
3226 {
3227 case ADDR_EXPR:
3228 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3229 handled_component_p (*expr_p);
3230 expr_p = &TREE_OPERAND (*expr_p, 0))
3231 continue;
3232 obj = *expr_p;
3233 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3234 x = produce_memory_decl_rtl (obj, regno);
3235 break;
3236
3237 case SSA_NAME:
3238 *ws = 0;
3239 obj = SSA_NAME_VAR (*expr_p);
3240 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3241 if (!obj)
3242 return NULL_TREE;
3243 if (!DECL_RTL_SET_P (obj))
3244 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3245 break;
3246
3247 case VAR_DECL:
3248 case PARM_DECL:
3249 case RESULT_DECL:
3250 *ws = 0;
3251 obj = *expr_p;
3252
3253 if (DECL_RTL_SET_P (obj))
3254 break;
3255
3256 if (DECL_MODE (obj) == BLKmode)
3257 x = produce_memory_decl_rtl (obj, regno);
3258 else
3259 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3260
3261 break;
3262
3263 default:
3264 break;
3265 }
3266
3267 if (x)
3268 {
3269 decl_rtl_to_reset.safe_push (obj);
3270 SET_DECL_RTL (obj, x);
3271 }
3272
3273 return NULL_TREE;
3274 }
3275
3276 /* Determines cost of the computation of EXPR. */
3277
3278 static unsigned
3279 computation_cost (tree expr, bool speed)
3280 {
3281 rtx_insn *seq;
3282 rtx rslt;
3283 tree type = TREE_TYPE (expr);
3284 unsigned cost;
3285 /* Avoid using hard regs in ways which may be unsupported. */
3286 int regno = LAST_VIRTUAL_REGISTER + 1;
3287 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3288 enum node_frequency real_frequency = node->frequency;
3289
3290 node->frequency = NODE_FREQUENCY_NORMAL;
3291 crtl->maybe_hot_insn_p = speed;
3292 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3293 start_sequence ();
3294 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3295 seq = get_insns ();
3296 end_sequence ();
3297 default_rtl_profile ();
3298 node->frequency = real_frequency;
3299
3300 cost = seq_cost (seq, speed);
3301 if (MEM_P (rslt))
3302 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3303 TYPE_ADDR_SPACE (type), speed);
3304 else if (!REG_P (rslt))
3305 cost += set_src_cost (rslt, speed);
3306
3307 return cost;
3308 }
3309
3310 /* Returns variable containing the value of candidate CAND at statement AT. */
3311
3312 static tree
3313 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3314 {
3315 if (stmt_after_increment (loop, cand, stmt))
3316 return cand->var_after;
3317 else
3318 return cand->var_before;
3319 }
3320
3321 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3322 same precision that is at least as wide as the precision of TYPE, stores
3323 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3324 type of A and B. */
3325
3326 static tree
3327 determine_common_wider_type (tree *a, tree *b)
3328 {
3329 tree wider_type = NULL;
3330 tree suba, subb;
3331 tree atype = TREE_TYPE (*a);
3332
3333 if (CONVERT_EXPR_P (*a))
3334 {
3335 suba = TREE_OPERAND (*a, 0);
3336 wider_type = TREE_TYPE (suba);
3337 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3338 return atype;
3339 }
3340 else
3341 return atype;
3342
3343 if (CONVERT_EXPR_P (*b))
3344 {
3345 subb = TREE_OPERAND (*b, 0);
3346 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3347 return atype;
3348 }
3349 else
3350 return atype;
3351
3352 *a = suba;
3353 *b = subb;
3354 return wider_type;
3355 }
3356
3357 /* Determines the expression by that USE is expressed from induction variable
3358 CAND at statement AT in LOOP. The expression is stored in a decomposed
3359 form into AFF. Returns false if USE cannot be expressed using CAND. */
3360
3361 static bool
3362 get_computation_aff (struct loop *loop,
3363 struct iv_use *use, struct iv_cand *cand, gimple at,
3364 struct aff_tree *aff)
3365 {
3366 tree ubase = use->iv->base;
3367 tree ustep = use->iv->step;
3368 tree cbase = cand->iv->base;
3369 tree cstep = cand->iv->step, cstep_common;
3370 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3371 tree common_type, var;
3372 tree uutype;
3373 aff_tree cbase_aff, var_aff;
3374 widest_int rat;
3375
3376 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3377 {
3378 /* We do not have a precision to express the values of use. */
3379 return false;
3380 }
3381
3382 var = var_at_stmt (loop, cand, at);
3383 uutype = unsigned_type_for (utype);
3384
3385 /* If the conversion is not noop, perform it. */
3386 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3387 {
3388 cstep = fold_convert (uutype, cstep);
3389 cbase = fold_convert (uutype, cbase);
3390 var = fold_convert (uutype, var);
3391 }
3392
3393 if (!constant_multiple_of (ustep, cstep, &rat))
3394 return false;
3395
3396 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3397 type, we achieve better folding by computing their difference in this
3398 wider type, and cast the result to UUTYPE. We do not need to worry about
3399 overflows, as all the arithmetics will in the end be performed in UUTYPE
3400 anyway. */
3401 common_type = determine_common_wider_type (&ubase, &cbase);
3402
3403 /* use = ubase - ratio * cbase + ratio * var. */
3404 tree_to_aff_combination (ubase, common_type, aff);
3405 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3406 tree_to_aff_combination (var, uutype, &var_aff);
3407
3408 /* We need to shift the value if we are after the increment. */
3409 if (stmt_after_increment (loop, cand, at))
3410 {
3411 aff_tree cstep_aff;
3412
3413 if (common_type != uutype)
3414 cstep_common = fold_convert (common_type, cstep);
3415 else
3416 cstep_common = cstep;
3417
3418 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3419 aff_combination_add (&cbase_aff, &cstep_aff);
3420 }
3421
3422 aff_combination_scale (&cbase_aff, -rat);
3423 aff_combination_add (aff, &cbase_aff);
3424 if (common_type != uutype)
3425 aff_combination_convert (aff, uutype);
3426
3427 aff_combination_scale (&var_aff, rat);
3428 aff_combination_add (aff, &var_aff);
3429
3430 return true;
3431 }
3432
3433 /* Return the type of USE. */
3434
3435 static tree
3436 get_use_type (struct iv_use *use)
3437 {
3438 tree base_type = TREE_TYPE (use->iv->base);
3439 tree type;
3440
3441 if (use->type == USE_ADDRESS)
3442 {
3443 /* The base_type may be a void pointer. Create a pointer type based on
3444 the mem_ref instead. */
3445 type = build_pointer_type (TREE_TYPE (*use->op_p));
3446 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3447 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3448 }
3449 else
3450 type = base_type;
3451
3452 return type;
3453 }
3454
3455 /* Determines the expression by that USE is expressed from induction variable
3456 CAND at statement AT in LOOP. The computation is unshared. */
3457
3458 static tree
3459 get_computation_at (struct loop *loop,
3460 struct iv_use *use, struct iv_cand *cand, gimple at)
3461 {
3462 aff_tree aff;
3463 tree type = get_use_type (use);
3464
3465 if (!get_computation_aff (loop, use, cand, at, &aff))
3466 return NULL_TREE;
3467 unshare_aff_combination (&aff);
3468 return fold_convert (type, aff_combination_to_tree (&aff));
3469 }
3470
3471 /* Determines the expression by that USE is expressed from induction variable
3472 CAND in LOOP. The computation is unshared. */
3473
3474 static tree
3475 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3476 {
3477 return get_computation_at (loop, use, cand, use->stmt);
3478 }
3479
3480 /* Adjust the cost COST for being in loop setup rather than loop body.
3481 If we're optimizing for space, the loop setup overhead is constant;
3482 if we're optimizing for speed, amortize it over the per-iteration cost. */
3483 static unsigned
3484 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3485 {
3486 if (cost == INFTY)
3487 return cost;
3488 else if (optimize_loop_for_speed_p (data->current_loop))
3489 return cost / avg_loop_niter (data->current_loop);
3490 else
3491 return cost;
3492 }
3493
3494 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3495 validity for a memory reference accessing memory of mode MODE in
3496 address space AS. */
3497
3498
3499 bool
3500 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3501 addr_space_t as)
3502 {
3503 #define MAX_RATIO 128
3504 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3505 static vec<sbitmap> valid_mult_list;
3506 sbitmap valid_mult;
3507
3508 if (data_index >= valid_mult_list.length ())
3509 valid_mult_list.safe_grow_cleared (data_index + 1);
3510
3511 valid_mult = valid_mult_list[data_index];
3512 if (!valid_mult)
3513 {
3514 machine_mode address_mode = targetm.addr_space.address_mode (as);
3515 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3516 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3517 rtx addr, scaled;
3518 HOST_WIDE_INT i;
3519
3520 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3521 bitmap_clear (valid_mult);
3522 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3523 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3524 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3525 {
3526 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3527 if (memory_address_addr_space_p (mode, addr, as)
3528 || memory_address_addr_space_p (mode, scaled, as))
3529 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3530 }
3531
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 {
3534 fprintf (dump_file, " allowed multipliers:");
3535 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3536 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3537 fprintf (dump_file, " %d", (int) i);
3538 fprintf (dump_file, "\n");
3539 fprintf (dump_file, "\n");
3540 }
3541
3542 valid_mult_list[data_index] = valid_mult;
3543 }
3544
3545 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3546 return false;
3547
3548 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3549 }
3550
3551 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3552 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3553 variable is omitted. Compute the cost for a memory reference that accesses
3554 a memory location of mode MEM_MODE in address space AS.
3555
3556 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3557 size of MEM_MODE / RATIO) is available. To make this determination, we
3558 look at the size of the increment to be made, which is given in CSTEP.
3559 CSTEP may be zero if the step is unknown.
3560 STMT_AFTER_INC is true iff the statement we're looking at is after the
3561 increment of the original biv.
3562
3563 TODO -- there must be some better way. This all is quite crude. */
3564
3565 enum ainc_type
3566 {
3567 AINC_PRE_INC, /* Pre increment. */
3568 AINC_PRE_DEC, /* Pre decrement. */
3569 AINC_POST_INC, /* Post increment. */
3570 AINC_POST_DEC, /* Post decrement. */
3571 AINC_NONE /* Also the number of auto increment types. */
3572 };
3573
3574 typedef struct address_cost_data_s
3575 {
3576 HOST_WIDE_INT min_offset, max_offset;
3577 unsigned costs[2][2][2][2];
3578 unsigned ainc_costs[AINC_NONE];
3579 } *address_cost_data;
3580
3581
3582 static comp_cost
3583 get_address_cost (bool symbol_present, bool var_present,
3584 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3585 HOST_WIDE_INT cstep, machine_mode mem_mode,
3586 addr_space_t as, bool speed,
3587 bool stmt_after_inc, bool *may_autoinc)
3588 {
3589 machine_mode address_mode = targetm.addr_space.address_mode (as);
3590 static vec<address_cost_data> address_cost_data_list;
3591 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3592 address_cost_data data;
3593 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3594 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3595 unsigned cost, acost, complexity;
3596 enum ainc_type autoinc_type;
3597 bool offset_p, ratio_p, autoinc;
3598 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3599 unsigned HOST_WIDE_INT mask;
3600 unsigned bits;
3601
3602 if (data_index >= address_cost_data_list.length ())
3603 address_cost_data_list.safe_grow_cleared (data_index + 1);
3604
3605 data = address_cost_data_list[data_index];
3606 if (!data)
3607 {
3608 HOST_WIDE_INT i;
3609 HOST_WIDE_INT rat, off = 0;
3610 int old_cse_not_expected, width;
3611 unsigned sym_p, var_p, off_p, rat_p, add_c;
3612 rtx_insn *seq;
3613 rtx addr, base;
3614 rtx reg0, reg1;
3615
3616 data = (address_cost_data) xcalloc (1, sizeof (*data));
3617
3618 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3619
3620 width = GET_MODE_BITSIZE (address_mode) - 1;
3621 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3622 width = HOST_BITS_PER_WIDE_INT - 1;
3623 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3624
3625 for (i = width; i >= 0; i--)
3626 {
3627 off = -((unsigned HOST_WIDE_INT) 1 << i);
3628 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3629 if (memory_address_addr_space_p (mem_mode, addr, as))
3630 break;
3631 }
3632 data->min_offset = (i == -1? 0 : off);
3633
3634 for (i = width; i >= 0; i--)
3635 {
3636 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3637 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3638 if (memory_address_addr_space_p (mem_mode, addr, as))
3639 break;
3640 /* For some strict-alignment targets, the offset must be naturally
3641 aligned. Try an aligned offset if mem_mode is not QImode. */
3642 off = mem_mode != QImode
3643 ? ((unsigned HOST_WIDE_INT) 1 << i)
3644 - GET_MODE_SIZE (mem_mode)
3645 : 0;
3646 if (off > 0)
3647 {
3648 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3649 if (memory_address_addr_space_p (mem_mode, addr, as))
3650 break;
3651 }
3652 }
3653 if (i == -1)
3654 off = 0;
3655 data->max_offset = off;
3656
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3658 {
3659 fprintf (dump_file, "get_address_cost:\n");
3660 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3661 GET_MODE_NAME (mem_mode),
3662 data->min_offset);
3663 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3664 GET_MODE_NAME (mem_mode),
3665 data->max_offset);
3666 }
3667
3668 rat = 1;
3669 for (i = 2; i <= MAX_RATIO; i++)
3670 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3671 {
3672 rat = i;
3673 break;
3674 }
3675
3676 /* Compute the cost of various addressing modes. */
3677 acost = 0;
3678 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3679 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3680
3681 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3682 || USE_STORE_PRE_DECREMENT (mem_mode))
3683 {
3684 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3685 has_predec[mem_mode]
3686 = memory_address_addr_space_p (mem_mode, addr, as);
3687
3688 if (has_predec[mem_mode])
3689 data->ainc_costs[AINC_PRE_DEC]
3690 = address_cost (addr, mem_mode, as, speed);
3691 }
3692 if (USE_LOAD_POST_DECREMENT (mem_mode)
3693 || USE_STORE_POST_DECREMENT (mem_mode))
3694 {
3695 addr = gen_rtx_POST_DEC (address_mode, reg0);
3696 has_postdec[mem_mode]
3697 = memory_address_addr_space_p (mem_mode, addr, as);
3698
3699 if (has_postdec[mem_mode])
3700 data->ainc_costs[AINC_POST_DEC]
3701 = address_cost (addr, mem_mode, as, speed);
3702 }
3703 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3704 || USE_STORE_PRE_DECREMENT (mem_mode))
3705 {
3706 addr = gen_rtx_PRE_INC (address_mode, reg0);
3707 has_preinc[mem_mode]
3708 = memory_address_addr_space_p (mem_mode, addr, as);
3709
3710 if (has_preinc[mem_mode])
3711 data->ainc_costs[AINC_PRE_INC]
3712 = address_cost (addr, mem_mode, as, speed);
3713 }
3714 if (USE_LOAD_POST_INCREMENT (mem_mode)
3715 || USE_STORE_POST_INCREMENT (mem_mode))
3716 {
3717 addr = gen_rtx_POST_INC (address_mode, reg0);
3718 has_postinc[mem_mode]
3719 = memory_address_addr_space_p (mem_mode, addr, as);
3720
3721 if (has_postinc[mem_mode])
3722 data->ainc_costs[AINC_POST_INC]
3723 = address_cost (addr, mem_mode, as, speed);
3724 }
3725 for (i = 0; i < 16; i++)
3726 {
3727 sym_p = i & 1;
3728 var_p = (i >> 1) & 1;
3729 off_p = (i >> 2) & 1;
3730 rat_p = (i >> 3) & 1;
3731
3732 addr = reg0;
3733 if (rat_p)
3734 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3735 gen_int_mode (rat, address_mode));
3736
3737 if (var_p)
3738 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3739
3740 if (sym_p)
3741 {
3742 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3743 /* ??? We can run into trouble with some backends by presenting
3744 it with symbols which haven't been properly passed through
3745 targetm.encode_section_info. By setting the local bit, we
3746 enhance the probability of things working. */
3747 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3748
3749 if (off_p)
3750 base = gen_rtx_fmt_e (CONST, address_mode,
3751 gen_rtx_fmt_ee
3752 (PLUS, address_mode, base,
3753 gen_int_mode (off, address_mode)));
3754 }
3755 else if (off_p)
3756 base = gen_int_mode (off, address_mode);
3757 else
3758 base = NULL_RTX;
3759
3760 if (base)
3761 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3762
3763 start_sequence ();
3764 /* To avoid splitting addressing modes, pretend that no cse will
3765 follow. */
3766 old_cse_not_expected = cse_not_expected;
3767 cse_not_expected = true;
3768 addr = memory_address_addr_space (mem_mode, addr, as);
3769 cse_not_expected = old_cse_not_expected;
3770 seq = get_insns ();
3771 end_sequence ();
3772
3773 acost = seq_cost (seq, speed);
3774 acost += address_cost (addr, mem_mode, as, speed);
3775
3776 if (!acost)
3777 acost = 1;
3778 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3779 }
3780
3781 /* On some targets, it is quite expensive to load symbol to a register,
3782 which makes addresses that contain symbols look much more expensive.
3783 However, the symbol will have to be loaded in any case before the
3784 loop (and quite likely we have it in register already), so it does not
3785 make much sense to penalize them too heavily. So make some final
3786 tweaks for the SYMBOL_PRESENT modes:
3787
3788 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3789 var is cheaper, use this mode with small penalty.
3790 If VAR_PRESENT is true, try whether the mode with
3791 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3792 if this is the case, use it. */
3793 add_c = add_cost (speed, address_mode);
3794 for (i = 0; i < 8; i++)
3795 {
3796 var_p = i & 1;
3797 off_p = (i >> 1) & 1;
3798 rat_p = (i >> 2) & 1;
3799
3800 acost = data->costs[0][1][off_p][rat_p] + 1;
3801 if (var_p)
3802 acost += add_c;
3803
3804 if (acost < data->costs[1][var_p][off_p][rat_p])
3805 data->costs[1][var_p][off_p][rat_p] = acost;
3806 }
3807
3808 if (dump_file && (dump_flags & TDF_DETAILS))
3809 {
3810 fprintf (dump_file, "Address costs:\n");
3811
3812 for (i = 0; i < 16; i++)
3813 {
3814 sym_p = i & 1;
3815 var_p = (i >> 1) & 1;
3816 off_p = (i >> 2) & 1;
3817 rat_p = (i >> 3) & 1;
3818
3819 fprintf (dump_file, " ");
3820 if (sym_p)
3821 fprintf (dump_file, "sym + ");
3822 if (var_p)
3823 fprintf (dump_file, "var + ");
3824 if (off_p)
3825 fprintf (dump_file, "cst + ");
3826 if (rat_p)
3827 fprintf (dump_file, "rat * ");
3828
3829 acost = data->costs[sym_p][var_p][off_p][rat_p];
3830 fprintf (dump_file, "index costs %d\n", acost);
3831 }
3832 if (has_predec[mem_mode] || has_postdec[mem_mode]
3833 || has_preinc[mem_mode] || has_postinc[mem_mode])
3834 fprintf (dump_file, " May include autoinc/dec\n");
3835 fprintf (dump_file, "\n");
3836 }
3837
3838 address_cost_data_list[data_index] = data;
3839 }
3840
3841 bits = GET_MODE_BITSIZE (address_mode);
3842 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3843 offset &= mask;
3844 if ((offset >> (bits - 1) & 1))
3845 offset |= ~mask;
3846 s_offset = offset;
3847
3848 autoinc = false;
3849 autoinc_type = AINC_NONE;
3850 msize = GET_MODE_SIZE (mem_mode);
3851 autoinc_offset = offset;
3852 if (stmt_after_inc)
3853 autoinc_offset += ratio * cstep;
3854 if (symbol_present || var_present || ratio != 1)
3855 autoinc = false;
3856 else
3857 {
3858 if (has_postinc[mem_mode] && autoinc_offset == 0
3859 && msize == cstep)
3860 autoinc_type = AINC_POST_INC;
3861 else if (has_postdec[mem_mode] && autoinc_offset == 0
3862 && msize == -cstep)
3863 autoinc_type = AINC_POST_DEC;
3864 else if (has_preinc[mem_mode] && autoinc_offset == msize
3865 && msize == cstep)
3866 autoinc_type = AINC_PRE_INC;
3867 else if (has_predec[mem_mode] && autoinc_offset == -msize
3868 && msize == -cstep)
3869 autoinc_type = AINC_PRE_DEC;
3870
3871 if (autoinc_type != AINC_NONE)
3872 autoinc = true;
3873 }
3874
3875 cost = 0;
3876 offset_p = (s_offset != 0
3877 && data->min_offset <= s_offset
3878 && s_offset <= data->max_offset);
3879 ratio_p = (ratio != 1
3880 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3881
3882 if (ratio != 1 && !ratio_p)
3883 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3884
3885 if (s_offset && !offset_p && !symbol_present)
3886 cost += add_cost (speed, address_mode);
3887
3888 if (may_autoinc)
3889 *may_autoinc = autoinc;
3890 if (autoinc)
3891 acost = data->ainc_costs[autoinc_type];
3892 else
3893 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3894 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3895 return new_cost (cost + acost, complexity);
3896 }
3897
3898 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3899 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3900 calculating the operands of EXPR. Returns true if successful, and returns
3901 the cost in COST. */
3902
3903 static bool
3904 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3905 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3906 {
3907 comp_cost res;
3908 tree op1 = TREE_OPERAND (expr, 1);
3909 tree cst = TREE_OPERAND (mult, 1);
3910 tree multop = TREE_OPERAND (mult, 0);
3911 int m = exact_log2 (int_cst_value (cst));
3912 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3913 int as_cost, sa_cost;
3914 bool mult_in_op1;
3915
3916 if (!(m >= 0 && m < maxm))
3917 return false;
3918
3919 mult_in_op1 = operand_equal_p (op1, mult, 0);
3920
3921 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3922
3923 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3924 use that in preference to a shift insn followed by an add insn. */
3925 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3926 ? shiftadd_cost (speed, mode, m)
3927 : (mult_in_op1
3928 ? shiftsub1_cost (speed, mode, m)
3929 : shiftsub0_cost (speed, mode, m)));
3930
3931 res = new_cost (MIN (as_cost, sa_cost), 0);
3932 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3933
3934 STRIP_NOPS (multop);
3935 if (!is_gimple_val (multop))
3936 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3937
3938 *cost = res;
3939 return true;
3940 }
3941
3942 /* Estimates cost of forcing expression EXPR into a variable. */
3943
3944 static comp_cost
3945 force_expr_to_var_cost (tree expr, bool speed)
3946 {
3947 static bool costs_initialized = false;
3948 static unsigned integer_cost [2];
3949 static unsigned symbol_cost [2];
3950 static unsigned address_cost [2];
3951 tree op0, op1;
3952 comp_cost cost0, cost1, cost;
3953 machine_mode mode;
3954
3955 if (!costs_initialized)
3956 {
3957 tree type = build_pointer_type (integer_type_node);
3958 tree var, addr;
3959 rtx x;
3960 int i;
3961
3962 var = create_tmp_var_raw (integer_type_node, "test_var");
3963 TREE_STATIC (var) = 1;
3964 x = produce_memory_decl_rtl (var, NULL);
3965 SET_DECL_RTL (var, x);
3966
3967 addr = build1 (ADDR_EXPR, type, var);
3968
3969
3970 for (i = 0; i < 2; i++)
3971 {
3972 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3973 2000), i);
3974
3975 symbol_cost[i] = computation_cost (addr, i) + 1;
3976
3977 address_cost[i]
3978 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3979 if (dump_file && (dump_flags & TDF_DETAILS))
3980 {
3981 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3982 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3983 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3984 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3985 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3986 fprintf (dump_file, "\n");
3987 }
3988 }
3989
3990 costs_initialized = true;
3991 }
3992
3993 STRIP_NOPS (expr);
3994
3995 if (SSA_VAR_P (expr))
3996 return no_cost;
3997
3998 if (is_gimple_min_invariant (expr))
3999 {
4000 if (TREE_CODE (expr) == INTEGER_CST)
4001 return new_cost (integer_cost [speed], 0);
4002
4003 if (TREE_CODE (expr) == ADDR_EXPR)
4004 {
4005 tree obj = TREE_OPERAND (expr, 0);
4006
4007 if (TREE_CODE (obj) == VAR_DECL
4008 || TREE_CODE (obj) == PARM_DECL
4009 || TREE_CODE (obj) == RESULT_DECL)
4010 return new_cost (symbol_cost [speed], 0);
4011 }
4012
4013 return new_cost (address_cost [speed], 0);
4014 }
4015
4016 switch (TREE_CODE (expr))
4017 {
4018 case POINTER_PLUS_EXPR:
4019 case PLUS_EXPR:
4020 case MINUS_EXPR:
4021 case MULT_EXPR:
4022 op0 = TREE_OPERAND (expr, 0);
4023 op1 = TREE_OPERAND (expr, 1);
4024 STRIP_NOPS (op0);
4025 STRIP_NOPS (op1);
4026 break;
4027
4028 CASE_CONVERT:
4029 case NEGATE_EXPR:
4030 op0 = TREE_OPERAND (expr, 0);
4031 STRIP_NOPS (op0);
4032 op1 = NULL_TREE;
4033 break;
4034
4035 default:
4036 /* Just an arbitrary value, FIXME. */
4037 return new_cost (target_spill_cost[speed], 0);
4038 }
4039
4040 if (op0 == NULL_TREE
4041 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4042 cost0 = no_cost;
4043 else
4044 cost0 = force_expr_to_var_cost (op0, speed);
4045
4046 if (op1 == NULL_TREE
4047 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4048 cost1 = no_cost;
4049 else
4050 cost1 = force_expr_to_var_cost (op1, speed);
4051
4052 mode = TYPE_MODE (TREE_TYPE (expr));
4053 switch (TREE_CODE (expr))
4054 {
4055 case POINTER_PLUS_EXPR:
4056 case PLUS_EXPR:
4057 case MINUS_EXPR:
4058 case NEGATE_EXPR:
4059 cost = new_cost (add_cost (speed, mode), 0);
4060 if (TREE_CODE (expr) != NEGATE_EXPR)
4061 {
4062 tree mult = NULL_TREE;
4063 comp_cost sa_cost;
4064 if (TREE_CODE (op1) == MULT_EXPR)
4065 mult = op1;
4066 else if (TREE_CODE (op0) == MULT_EXPR)
4067 mult = op0;
4068
4069 if (mult != NULL_TREE
4070 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4071 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4072 speed, &sa_cost))
4073 return sa_cost;
4074 }
4075 break;
4076
4077 CASE_CONVERT:
4078 {
4079 tree inner_mode, outer_mode;
4080 outer_mode = TREE_TYPE (expr);
4081 inner_mode = TREE_TYPE (op0);
4082 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4083 TYPE_MODE (inner_mode), speed), 0);
4084 }
4085 break;
4086
4087 case MULT_EXPR:
4088 if (cst_and_fits_in_hwi (op0))
4089 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4090 mode, speed), 0);
4091 else if (cst_and_fits_in_hwi (op1))
4092 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4093 mode, speed), 0);
4094 else
4095 return new_cost (target_spill_cost [speed], 0);
4096 break;
4097
4098 default:
4099 gcc_unreachable ();
4100 }
4101
4102 cost = add_costs (cost, cost0);
4103 cost = add_costs (cost, cost1);
4104
4105 /* Bound the cost by target_spill_cost. The parts of complicated
4106 computations often are either loop invariant or at least can
4107 be shared between several iv uses, so letting this grow without
4108 limits would not give reasonable results. */
4109 if (cost.cost > (int) target_spill_cost [speed])
4110 cost.cost = target_spill_cost [speed];
4111
4112 return cost;
4113 }
4114
4115 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4116 invariants the computation depends on. */
4117
4118 static comp_cost
4119 force_var_cost (struct ivopts_data *data,
4120 tree expr, bitmap *depends_on)
4121 {
4122 if (depends_on)
4123 {
4124 fd_ivopts_data = data;
4125 walk_tree (&expr, find_depends, depends_on, NULL);
4126 }
4127
4128 return force_expr_to_var_cost (expr, data->speed);
4129 }
4130
4131 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4132 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4133 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4134 invariants the computation depends on. */
4135
4136 static comp_cost
4137 split_address_cost (struct ivopts_data *data,
4138 tree addr, bool *symbol_present, bool *var_present,
4139 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4140 {
4141 tree core;
4142 HOST_WIDE_INT bitsize;
4143 HOST_WIDE_INT bitpos;
4144 tree toffset;
4145 machine_mode mode;
4146 int unsignedp, volatilep;
4147
4148 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4149 &unsignedp, &volatilep, false);
4150
4151 if (toffset != 0
4152 || bitpos % BITS_PER_UNIT != 0
4153 || TREE_CODE (core) != VAR_DECL)
4154 {
4155 *symbol_present = false;
4156 *var_present = true;
4157 fd_ivopts_data = data;
4158 walk_tree (&addr, find_depends, depends_on, NULL);
4159 return new_cost (target_spill_cost[data->speed], 0);
4160 }
4161
4162 *offset += bitpos / BITS_PER_UNIT;
4163 if (TREE_STATIC (core)
4164 || DECL_EXTERNAL (core))
4165 {
4166 *symbol_present = true;
4167 *var_present = false;
4168 return no_cost;
4169 }
4170
4171 *symbol_present = false;
4172 *var_present = true;
4173 return no_cost;
4174 }
4175
4176 /* Estimates cost of expressing difference of addresses E1 - E2 as
4177 var + symbol + offset. The value of offset is added to OFFSET,
4178 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4179 part is missing. DEPENDS_ON is a set of the invariants the computation
4180 depends on. */
4181
4182 static comp_cost
4183 ptr_difference_cost (struct ivopts_data *data,
4184 tree e1, tree e2, bool *symbol_present, bool *var_present,
4185 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4186 {
4187 HOST_WIDE_INT diff = 0;
4188 aff_tree aff_e1, aff_e2;
4189 tree type;
4190
4191 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4192
4193 if (ptr_difference_const (e1, e2, &diff))
4194 {
4195 *offset += diff;
4196 *symbol_present = false;
4197 *var_present = false;
4198 return no_cost;
4199 }
4200
4201 if (integer_zerop (e2))
4202 return split_address_cost (data, TREE_OPERAND (e1, 0),
4203 symbol_present, var_present, offset, depends_on);
4204
4205 *symbol_present = false;
4206 *var_present = true;
4207
4208 type = signed_type_for (TREE_TYPE (e1));
4209 tree_to_aff_combination (e1, type, &aff_e1);
4210 tree_to_aff_combination (e2, type, &aff_e2);
4211 aff_combination_scale (&aff_e2, -1);
4212 aff_combination_add (&aff_e1, &aff_e2);
4213
4214 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4215 }
4216
4217 /* Estimates cost of expressing difference E1 - E2 as
4218 var + symbol + offset. The value of offset is added to OFFSET,
4219 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4220 part is missing. DEPENDS_ON is a set of the invariants the computation
4221 depends on. */
4222
4223 static comp_cost
4224 difference_cost (struct ivopts_data *data,
4225 tree e1, tree e2, bool *symbol_present, bool *var_present,
4226 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4227 {
4228 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4229 unsigned HOST_WIDE_INT off1, off2;
4230 aff_tree aff_e1, aff_e2;
4231 tree type;
4232
4233 e1 = strip_offset (e1, &off1);
4234 e2 = strip_offset (e2, &off2);
4235 *offset += off1 - off2;
4236
4237 STRIP_NOPS (e1);
4238 STRIP_NOPS (e2);
4239
4240 if (TREE_CODE (e1) == ADDR_EXPR)
4241 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4242 offset, depends_on);
4243 *symbol_present = false;
4244
4245 if (operand_equal_p (e1, e2, 0))
4246 {
4247 *var_present = false;
4248 return no_cost;
4249 }
4250
4251 *var_present = true;
4252
4253 if (integer_zerop (e2))
4254 return force_var_cost (data, e1, depends_on);
4255
4256 if (integer_zerop (e1))
4257 {
4258 comp_cost cost = force_var_cost (data, e2, depends_on);
4259 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4260 return cost;
4261 }
4262
4263 type = signed_type_for (TREE_TYPE (e1));
4264 tree_to_aff_combination (e1, type, &aff_e1);
4265 tree_to_aff_combination (e2, type, &aff_e2);
4266 aff_combination_scale (&aff_e2, -1);
4267 aff_combination_add (&aff_e1, &aff_e2);
4268
4269 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4270 }
4271
4272 /* Returns true if AFF1 and AFF2 are identical. */
4273
4274 static bool
4275 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4276 {
4277 unsigned i;
4278
4279 if (aff1->n != aff2->n)
4280 return false;
4281
4282 for (i = 0; i < aff1->n; i++)
4283 {
4284 if (aff1->elts[i].coef != aff2->elts[i].coef)
4285 return false;
4286
4287 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4288 return false;
4289 }
4290 return true;
4291 }
4292
4293 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4294
4295 static int
4296 get_expr_id (struct ivopts_data *data, tree expr)
4297 {
4298 struct iv_inv_expr_ent ent;
4299 struct iv_inv_expr_ent **slot;
4300
4301 ent.expr = expr;
4302 ent.hash = iterative_hash_expr (expr, 0);
4303 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4304 if (*slot)
4305 return (*slot)->id;
4306
4307 *slot = XNEW (struct iv_inv_expr_ent);
4308 (*slot)->expr = expr;
4309 (*slot)->hash = ent.hash;
4310 (*slot)->id = data->inv_expr_id++;
4311 return (*slot)->id;
4312 }
4313
4314 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4315 requires a new compiler generated temporary. Returns -1 otherwise.
4316 ADDRESS_P is a flag indicating if the expression is for address
4317 computation. */
4318
4319 static int
4320 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4321 tree cbase, HOST_WIDE_INT ratio,
4322 bool address_p)
4323 {
4324 aff_tree ubase_aff, cbase_aff;
4325 tree expr, ub, cb;
4326
4327 STRIP_NOPS (ubase);
4328 STRIP_NOPS (cbase);
4329 ub = ubase;
4330 cb = cbase;
4331
4332 if ((TREE_CODE (ubase) == INTEGER_CST)
4333 && (TREE_CODE (cbase) == INTEGER_CST))
4334 return -1;
4335
4336 /* Strips the constant part. */
4337 if (TREE_CODE (ubase) == PLUS_EXPR
4338 || TREE_CODE (ubase) == MINUS_EXPR
4339 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4340 {
4341 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4342 ubase = TREE_OPERAND (ubase, 0);
4343 }
4344
4345 /* Strips the constant part. */
4346 if (TREE_CODE (cbase) == PLUS_EXPR
4347 || TREE_CODE (cbase) == MINUS_EXPR
4348 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4349 {
4350 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4351 cbase = TREE_OPERAND (cbase, 0);
4352 }
4353
4354 if (address_p)
4355 {
4356 if (((TREE_CODE (ubase) == SSA_NAME)
4357 || (TREE_CODE (ubase) == ADDR_EXPR
4358 && is_gimple_min_invariant (ubase)))
4359 && (TREE_CODE (cbase) == INTEGER_CST))
4360 return -1;
4361
4362 if (((TREE_CODE (cbase) == SSA_NAME)
4363 || (TREE_CODE (cbase) == ADDR_EXPR
4364 && is_gimple_min_invariant (cbase)))
4365 && (TREE_CODE (ubase) == INTEGER_CST))
4366 return -1;
4367 }
4368
4369 if (ratio == 1)
4370 {
4371 if (operand_equal_p (ubase, cbase, 0))
4372 return -1;
4373
4374 if (TREE_CODE (ubase) == ADDR_EXPR
4375 && TREE_CODE (cbase) == ADDR_EXPR)
4376 {
4377 tree usym, csym;
4378
4379 usym = TREE_OPERAND (ubase, 0);
4380 csym = TREE_OPERAND (cbase, 0);
4381 if (TREE_CODE (usym) == ARRAY_REF)
4382 {
4383 tree ind = TREE_OPERAND (usym, 1);
4384 if (TREE_CODE (ind) == INTEGER_CST
4385 && tree_fits_shwi_p (ind)
4386 && tree_to_shwi (ind) == 0)
4387 usym = TREE_OPERAND (usym, 0);
4388 }
4389 if (TREE_CODE (csym) == ARRAY_REF)
4390 {
4391 tree ind = TREE_OPERAND (csym, 1);
4392 if (TREE_CODE (ind) == INTEGER_CST
4393 && tree_fits_shwi_p (ind)
4394 && tree_to_shwi (ind) == 0)
4395 csym = TREE_OPERAND (csym, 0);
4396 }
4397 if (operand_equal_p (usym, csym, 0))
4398 return -1;
4399 }
4400 /* Now do more complex comparison */
4401 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4402 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4403 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4404 return -1;
4405 }
4406
4407 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4408 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4409
4410 aff_combination_scale (&cbase_aff, -1 * ratio);
4411 aff_combination_add (&ubase_aff, &cbase_aff);
4412 expr = aff_combination_to_tree (&ubase_aff);
4413 return get_expr_id (data, expr);
4414 }
4415
4416
4417
4418 /* Determines the cost of the computation by that USE is expressed
4419 from induction variable CAND. If ADDRESS_P is true, we just need
4420 to create an address from it, otherwise we want to get it into
4421 register. A set of invariants we depend on is stored in
4422 DEPENDS_ON. AT is the statement at that the value is computed.
4423 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4424 addressing is likely. */
4425
4426 static comp_cost
4427 get_computation_cost_at (struct ivopts_data *data,
4428 struct iv_use *use, struct iv_cand *cand,
4429 bool address_p, bitmap *depends_on, gimple at,
4430 bool *can_autoinc,
4431 int *inv_expr_id)
4432 {
4433 tree ubase = use->iv->base, ustep = use->iv->step;
4434 tree cbase, cstep;
4435 tree utype = TREE_TYPE (ubase), ctype;
4436 unsigned HOST_WIDE_INT cstepi, offset = 0;
4437 HOST_WIDE_INT ratio, aratio;
4438 bool var_present, symbol_present, stmt_is_after_inc;
4439 comp_cost cost;
4440 widest_int rat;
4441 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4442 machine_mode mem_mode = (address_p
4443 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4444 : VOIDmode);
4445
4446 *depends_on = NULL;
4447
4448 /* Only consider real candidates. */
4449 if (!cand->iv)
4450 return infinite_cost;
4451
4452 cbase = cand->iv->base;
4453 cstep = cand->iv->step;
4454 ctype = TREE_TYPE (cbase);
4455
4456 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4457 {
4458 /* We do not have a precision to express the values of use. */
4459 return infinite_cost;
4460 }
4461
4462 if (address_p
4463 || (use->iv->base_object
4464 && cand->iv->base_object
4465 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4466 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4467 {
4468 /* Do not try to express address of an object with computation based
4469 on address of a different object. This may cause problems in rtl
4470 level alias analysis (that does not expect this to be happening,
4471 as this is illegal in C), and would be unlikely to be useful
4472 anyway. */
4473 if (use->iv->base_object
4474 && cand->iv->base_object
4475 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4476 return infinite_cost;
4477 }
4478
4479 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4480 {
4481 /* TODO -- add direct handling of this case. */
4482 goto fallback;
4483 }
4484
4485 /* CSTEPI is removed from the offset in case statement is after the
4486 increment. If the step is not constant, we use zero instead.
4487 This is a bit imprecise (there is the extra addition), but
4488 redundancy elimination is likely to transform the code so that
4489 it uses value of the variable before increment anyway,
4490 so it is not that much unrealistic. */
4491 if (cst_and_fits_in_hwi (cstep))
4492 cstepi = int_cst_value (cstep);
4493 else
4494 cstepi = 0;
4495
4496 if (!constant_multiple_of (ustep, cstep, &rat))
4497 return infinite_cost;
4498
4499 if (wi::fits_shwi_p (rat))
4500 ratio = rat.to_shwi ();
4501 else
4502 return infinite_cost;
4503
4504 STRIP_NOPS (cbase);
4505 ctype = TREE_TYPE (cbase);
4506
4507 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4508
4509 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4510 or ratio == 1, it is better to handle this like
4511
4512 ubase - ratio * cbase + ratio * var
4513
4514 (also holds in the case ratio == -1, TODO. */
4515
4516 if (cst_and_fits_in_hwi (cbase))
4517 {
4518 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4519 cost = difference_cost (data,
4520 ubase, build_int_cst (utype, 0),
4521 &symbol_present, &var_present, &offset,
4522 depends_on);
4523 cost.cost /= avg_loop_niter (data->current_loop);
4524 }
4525 else if (ratio == 1)
4526 {
4527 tree real_cbase = cbase;
4528
4529 /* Check to see if any adjustment is needed. */
4530 if (cstepi == 0 && stmt_is_after_inc)
4531 {
4532 aff_tree real_cbase_aff;
4533 aff_tree cstep_aff;
4534
4535 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4536 &real_cbase_aff);
4537 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4538
4539 aff_combination_add (&real_cbase_aff, &cstep_aff);
4540 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4541 }
4542
4543 cost = difference_cost (data,
4544 ubase, real_cbase,
4545 &symbol_present, &var_present, &offset,
4546 depends_on);
4547 cost.cost /= avg_loop_niter (data->current_loop);
4548 }
4549 else if (address_p
4550 && !POINTER_TYPE_P (ctype)
4551 && multiplier_allowed_in_address_p
4552 (ratio, mem_mode,
4553 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4554 {
4555 cbase
4556 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4557 cost = difference_cost (data,
4558 ubase, cbase,
4559 &symbol_present, &var_present, &offset,
4560 depends_on);
4561 cost.cost /= avg_loop_niter (data->current_loop);
4562 }
4563 else
4564 {
4565 cost = force_var_cost (data, cbase, depends_on);
4566 cost = add_costs (cost,
4567 difference_cost (data,
4568 ubase, build_int_cst (utype, 0),
4569 &symbol_present, &var_present,
4570 &offset, depends_on));
4571 cost.cost /= avg_loop_niter (data->current_loop);
4572 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4573 }
4574
4575 /* Set of invariants depended on by sub use has already been computed
4576 for the first use in the group. */
4577 if (use->sub_id)
4578 {
4579 cost.cost = 0;
4580 if (depends_on && *depends_on)
4581 bitmap_clear (*depends_on);
4582 }
4583 else if (inv_expr_id)
4584 {
4585 *inv_expr_id =
4586 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4587 /* Clear depends on. */
4588 if (*inv_expr_id != -1 && depends_on && *depends_on)
4589 bitmap_clear (*depends_on);
4590 }
4591
4592 /* If we are after the increment, the value of the candidate is higher by
4593 one iteration. */
4594 if (stmt_is_after_inc)
4595 offset -= ratio * cstepi;
4596
4597 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4598 (symbol/var1/const parts may be omitted). If we are looking for an
4599 address, find the cost of addressing this. */
4600 if (address_p)
4601 return add_costs (cost,
4602 get_address_cost (symbol_present, var_present,
4603 offset, ratio, cstepi,
4604 mem_mode,
4605 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4606 speed, stmt_is_after_inc,
4607 can_autoinc));
4608
4609 /* Otherwise estimate the costs for computing the expression. */
4610 if (!symbol_present && !var_present && !offset)
4611 {
4612 if (ratio != 1)
4613 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4614 return cost;
4615 }
4616
4617 /* Symbol + offset should be compile-time computable so consider that they
4618 are added once to the variable, if present. */
4619 if (var_present && (symbol_present || offset))
4620 cost.cost += adjust_setup_cost (data,
4621 add_cost (speed, TYPE_MODE (ctype)));
4622
4623 /* Having offset does not affect runtime cost in case it is added to
4624 symbol, but it increases complexity. */
4625 if (offset)
4626 cost.complexity++;
4627
4628 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4629
4630 aratio = ratio > 0 ? ratio : -ratio;
4631 if (aratio != 1)
4632 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4633 return cost;
4634
4635 fallback:
4636 if (can_autoinc)
4637 *can_autoinc = false;
4638
4639 {
4640 /* Just get the expression, expand it and measure the cost. */
4641 tree comp = get_computation_at (data->current_loop, use, cand, at);
4642
4643 if (!comp)
4644 return infinite_cost;
4645
4646 if (address_p)
4647 comp = build_simple_mem_ref (comp);
4648
4649 return new_cost (computation_cost (comp, speed), 0);
4650 }
4651 }
4652
4653 /* Determines the cost of the computation by that USE is expressed
4654 from induction variable CAND. If ADDRESS_P is true, we just need
4655 to create an address from it, otherwise we want to get it into
4656 register. A set of invariants we depend on is stored in
4657 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4658 autoinc addressing is likely. */
4659
4660 static comp_cost
4661 get_computation_cost (struct ivopts_data *data,
4662 struct iv_use *use, struct iv_cand *cand,
4663 bool address_p, bitmap *depends_on,
4664 bool *can_autoinc, int *inv_expr_id)
4665 {
4666 return get_computation_cost_at (data,
4667 use, cand, address_p, depends_on, use->stmt,
4668 can_autoinc, inv_expr_id);
4669 }
4670
4671 /* Determines cost of basing replacement of USE on CAND in a generic
4672 expression. */
4673
4674 static bool
4675 determine_use_iv_cost_generic (struct ivopts_data *data,
4676 struct iv_use *use, struct iv_cand *cand)
4677 {
4678 bitmap depends_on;
4679 comp_cost cost;
4680 int inv_expr_id = -1;
4681
4682 /* The simple case first -- if we need to express value of the preserved
4683 original biv, the cost is 0. This also prevents us from counting the
4684 cost of increment twice -- once at this use and once in the cost of
4685 the candidate. */
4686 if (cand->pos == IP_ORIGINAL
4687 && cand->incremented_at == use->stmt)
4688 {
4689 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4690 ERROR_MARK, -1);
4691 return true;
4692 }
4693
4694 cost = get_computation_cost (data, use, cand, false, &depends_on,
4695 NULL, &inv_expr_id);
4696
4697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4698 inv_expr_id);
4699
4700 return !infinite_cost_p (cost);
4701 }
4702
4703 /* Determines cost of basing replacement of USE on CAND in an address. */
4704
4705 static bool
4706 determine_use_iv_cost_address (struct ivopts_data *data,
4707 struct iv_use *use, struct iv_cand *cand)
4708 {
4709 bitmap depends_on;
4710 bool can_autoinc;
4711 int inv_expr_id = -1;
4712 struct iv_use *sub_use;
4713 comp_cost sub_cost;
4714 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4715 &can_autoinc, &inv_expr_id);
4716
4717 if (cand->ainc_use == use)
4718 {
4719 if (can_autoinc)
4720 cost.cost -= cand->cost_step;
4721 /* If we generated the candidate solely for exploiting autoincrement
4722 opportunities, and it turns out it can't be used, set the cost to
4723 infinity to make sure we ignore it. */
4724 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4725 cost = infinite_cost;
4726 }
4727 for (sub_use = use->next;
4728 sub_use && !infinite_cost_p (cost);
4729 sub_use = sub_use->next)
4730 {
4731 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4732 &can_autoinc, &inv_expr_id);
4733 cost = add_costs (cost, sub_cost);
4734 }
4735
4736 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4737 inv_expr_id);
4738
4739 return !infinite_cost_p (cost);
4740 }
4741
4742 /* Computes value of candidate CAND at position AT in iteration NITER, and
4743 stores it to VAL. */
4744
4745 static void
4746 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4747 aff_tree *val)
4748 {
4749 aff_tree step, delta, nit;
4750 struct iv *iv = cand->iv;
4751 tree type = TREE_TYPE (iv->base);
4752 tree steptype = type;
4753 if (POINTER_TYPE_P (type))
4754 steptype = sizetype;
4755 steptype = unsigned_type_for (type);
4756
4757 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4758 aff_combination_convert (&step, steptype);
4759 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4760 aff_combination_convert (&nit, steptype);
4761 aff_combination_mult (&nit, &step, &delta);
4762 if (stmt_after_increment (loop, cand, at))
4763 aff_combination_add (&delta, &step);
4764
4765 tree_to_aff_combination (iv->base, type, val);
4766 if (!POINTER_TYPE_P (type))
4767 aff_combination_convert (val, steptype);
4768 aff_combination_add (val, &delta);
4769 }
4770
4771 /* Returns period of induction variable iv. */
4772
4773 static tree
4774 iv_period (struct iv *iv)
4775 {
4776 tree step = iv->step, period, type;
4777 tree pow2div;
4778
4779 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4780
4781 type = unsigned_type_for (TREE_TYPE (step));
4782 /* Period of the iv is lcm (step, type_range)/step -1,
4783 i.e., N*type_range/step - 1. Since type range is power
4784 of two, N == (step >> num_of_ending_zeros_binary (step),
4785 so the final result is
4786
4787 (type_range >> num_of_ending_zeros_binary (step)) - 1
4788
4789 */
4790 pow2div = num_ending_zeros (step);
4791
4792 period = build_low_bits_mask (type,
4793 (TYPE_PRECISION (type)
4794 - tree_to_uhwi (pow2div)));
4795
4796 return period;
4797 }
4798
4799 /* Returns the comparison operator used when eliminating the iv USE. */
4800
4801 static enum tree_code
4802 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4803 {
4804 struct loop *loop = data->current_loop;
4805 basic_block ex_bb;
4806 edge exit;
4807
4808 ex_bb = gimple_bb (use->stmt);
4809 exit = EDGE_SUCC (ex_bb, 0);
4810 if (flow_bb_inside_loop_p (loop, exit->dest))
4811 exit = EDGE_SUCC (ex_bb, 1);
4812
4813 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4814 }
4815
4816 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4817 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4818 calculation is performed in non-wrapping type.
4819
4820 TODO: More generally, we could test for the situation that
4821 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4822 This would require knowing the sign of OFFSET. */
4823
4824 static bool
4825 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4826 {
4827 enum tree_code code;
4828 tree e1, e2;
4829 aff_tree aff_e1, aff_e2, aff_offset;
4830
4831 if (!nowrap_type_p (TREE_TYPE (base)))
4832 return false;
4833
4834 base = expand_simple_operations (base);
4835
4836 if (TREE_CODE (base) == SSA_NAME)
4837 {
4838 gimple stmt = SSA_NAME_DEF_STMT (base);
4839
4840 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4841 return false;
4842
4843 code = gimple_assign_rhs_code (stmt);
4844 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4845 return false;
4846
4847 e1 = gimple_assign_rhs1 (stmt);
4848 e2 = gimple_assign_rhs2 (stmt);
4849 }
4850 else
4851 {
4852 code = TREE_CODE (base);
4853 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4854 return false;
4855 e1 = TREE_OPERAND (base, 0);
4856 e2 = TREE_OPERAND (base, 1);
4857 }
4858
4859 /* Use affine expansion as deeper inspection to prove the equality. */
4860 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4861 &aff_e2, &data->name_expansion_cache);
4862 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4863 &aff_offset, &data->name_expansion_cache);
4864 aff_combination_scale (&aff_offset, -1);
4865 switch (code)
4866 {
4867 case PLUS_EXPR:
4868 aff_combination_add (&aff_e2, &aff_offset);
4869 if (aff_combination_zero_p (&aff_e2))
4870 return true;
4871
4872 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4873 &aff_e1, &data->name_expansion_cache);
4874 aff_combination_add (&aff_e1, &aff_offset);
4875 return aff_combination_zero_p (&aff_e1);
4876
4877 case POINTER_PLUS_EXPR:
4878 aff_combination_add (&aff_e2, &aff_offset);
4879 return aff_combination_zero_p (&aff_e2);
4880
4881 default:
4882 return false;
4883 }
4884 }
4885
4886 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4887 comparison with CAND. NITER describes the number of iterations of
4888 the loops. If successful, the comparison in COMP_P is altered accordingly.
4889
4890 We aim to handle the following situation:
4891
4892 sometype *base, *p;
4893 int a, b, i;
4894
4895 i = a;
4896 p = p_0 = base + a;
4897
4898 do
4899 {
4900 bla (*p);
4901 p++;
4902 i++;
4903 }
4904 while (i < b);
4905
4906 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4907 We aim to optimize this to
4908
4909 p = p_0 = base + a;
4910 do
4911 {
4912 bla (*p);
4913 p++;
4914 }
4915 while (p < p_0 - a + b);
4916
4917 This preserves the correctness, since the pointer arithmetics does not
4918 overflow. More precisely:
4919
4920 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4921 overflow in computing it or the values of p.
4922 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4923 overflow. To prove this, we use the fact that p_0 = base + a. */
4924
4925 static bool
4926 iv_elimination_compare_lt (struct ivopts_data *data,
4927 struct iv_cand *cand, enum tree_code *comp_p,
4928 struct tree_niter_desc *niter)
4929 {
4930 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4931 struct aff_tree nit, tmpa, tmpb;
4932 enum tree_code comp;
4933 HOST_WIDE_INT step;
4934
4935 /* We need to know that the candidate induction variable does not overflow.
4936 While more complex analysis may be used to prove this, for now just
4937 check that the variable appears in the original program and that it
4938 is computed in a type that guarantees no overflows. */
4939 cand_type = TREE_TYPE (cand->iv->base);
4940 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4941 return false;
4942
4943 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4944 the calculation of the BOUND could overflow, making the comparison
4945 invalid. */
4946 if (!data->loop_single_exit_p)
4947 return false;
4948
4949 /* We need to be able to decide whether candidate is increasing or decreasing
4950 in order to choose the right comparison operator. */
4951 if (!cst_and_fits_in_hwi (cand->iv->step))
4952 return false;
4953 step = int_cst_value (cand->iv->step);
4954
4955 /* Check that the number of iterations matches the expected pattern:
4956 a + 1 > b ? 0 : b - a - 1. */
4957 mbz = niter->may_be_zero;
4958 if (TREE_CODE (mbz) == GT_EXPR)
4959 {
4960 /* Handle a + 1 > b. */
4961 tree op0 = TREE_OPERAND (mbz, 0);
4962 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4963 {
4964 a = TREE_OPERAND (op0, 0);
4965 b = TREE_OPERAND (mbz, 1);
4966 }
4967 else
4968 return false;
4969 }
4970 else if (TREE_CODE (mbz) == LT_EXPR)
4971 {
4972 tree op1 = TREE_OPERAND (mbz, 1);
4973
4974 /* Handle b < a + 1. */
4975 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4976 {
4977 a = TREE_OPERAND (op1, 0);
4978 b = TREE_OPERAND (mbz, 0);
4979 }
4980 else
4981 return false;
4982 }
4983 else
4984 return false;
4985
4986 /* Expected number of iterations is B - A - 1. Check that it matches
4987 the actual number, i.e., that B - A - NITER = 1. */
4988 tree_to_aff_combination (niter->niter, nit_type, &nit);
4989 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4990 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4991 aff_combination_scale (&nit, -1);
4992 aff_combination_scale (&tmpa, -1);
4993 aff_combination_add (&tmpb, &tmpa);
4994 aff_combination_add (&tmpb, &nit);
4995 if (tmpb.n != 0 || tmpb.offset != 1)
4996 return false;
4997
4998 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4999 overflow. */
5000 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5001 cand->iv->step,
5002 fold_convert (TREE_TYPE (cand->iv->step), a));
5003 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5004 return false;
5005
5006 /* Determine the new comparison operator. */
5007 comp = step < 0 ? GT_EXPR : LT_EXPR;
5008 if (*comp_p == NE_EXPR)
5009 *comp_p = comp;
5010 else if (*comp_p == EQ_EXPR)
5011 *comp_p = invert_tree_comparison (comp, false);
5012 else
5013 gcc_unreachable ();
5014
5015 return true;
5016 }
5017
5018 /* Check whether it is possible to express the condition in USE by comparison
5019 of candidate CAND. If so, store the value compared with to BOUND, and the
5020 comparison operator to COMP. */
5021
5022 static bool
5023 may_eliminate_iv (struct ivopts_data *data,
5024 struct iv_use *use, struct iv_cand *cand, tree *bound,
5025 enum tree_code *comp)
5026 {
5027 basic_block ex_bb;
5028 edge exit;
5029 tree period;
5030 struct loop *loop = data->current_loop;
5031 aff_tree bnd;
5032 struct tree_niter_desc *desc = NULL;
5033
5034 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5035 return false;
5036
5037 /* For now works only for exits that dominate the loop latch.
5038 TODO: extend to other conditions inside loop body. */
5039 ex_bb = gimple_bb (use->stmt);
5040 if (use->stmt != last_stmt (ex_bb)
5041 || gimple_code (use->stmt) != GIMPLE_COND
5042 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5043 return false;
5044
5045 exit = EDGE_SUCC (ex_bb, 0);
5046 if (flow_bb_inside_loop_p (loop, exit->dest))
5047 exit = EDGE_SUCC (ex_bb, 1);
5048 if (flow_bb_inside_loop_p (loop, exit->dest))
5049 return false;
5050
5051 desc = niter_for_exit (data, exit);
5052 if (!desc)
5053 return false;
5054
5055 /* Determine whether we can use the variable to test the exit condition.
5056 This is the case iff the period of the induction variable is greater
5057 than the number of iterations for which the exit condition is true. */
5058 period = iv_period (cand->iv);
5059
5060 /* If the number of iterations is constant, compare against it directly. */
5061 if (TREE_CODE (desc->niter) == INTEGER_CST)
5062 {
5063 /* See cand_value_at. */
5064 if (stmt_after_increment (loop, cand, use->stmt))
5065 {
5066 if (!tree_int_cst_lt (desc->niter, period))
5067 return false;
5068 }
5069 else
5070 {
5071 if (tree_int_cst_lt (period, desc->niter))
5072 return false;
5073 }
5074 }
5075
5076 /* If not, and if this is the only possible exit of the loop, see whether
5077 we can get a conservative estimate on the number of iterations of the
5078 entire loop and compare against that instead. */
5079 else
5080 {
5081 widest_int period_value, max_niter;
5082
5083 max_niter = desc->max;
5084 if (stmt_after_increment (loop, cand, use->stmt))
5085 max_niter += 1;
5086 period_value = wi::to_widest (period);
5087 if (wi::gtu_p (max_niter, period_value))
5088 {
5089 /* See if we can take advantage of inferred loop bound information. */
5090 if (data->loop_single_exit_p)
5091 {
5092 if (!max_loop_iterations (loop, &max_niter))
5093 return false;
5094 /* The loop bound is already adjusted by adding 1. */
5095 if (wi::gtu_p (max_niter, period_value))
5096 return false;
5097 }
5098 else
5099 return false;
5100 }
5101 }
5102
5103 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5104
5105 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5106 aff_combination_to_tree (&bnd));
5107 *comp = iv_elimination_compare (data, use);
5108
5109 /* It is unlikely that computing the number of iterations using division
5110 would be more profitable than keeping the original induction variable. */
5111 if (expression_expensive_p (*bound))
5112 return false;
5113
5114 /* Sometimes, it is possible to handle the situation that the number of
5115 iterations may be zero unless additional assumtions by using <
5116 instead of != in the exit condition.
5117
5118 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5119 base the exit condition on it. However, that is often too
5120 expensive. */
5121 if (!integer_zerop (desc->may_be_zero))
5122 return iv_elimination_compare_lt (data, cand, comp, desc);
5123
5124 return true;
5125 }
5126
5127 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5128 be copied, if is is used in the loop body and DATA->body_includes_call. */
5129
5130 static int
5131 parm_decl_cost (struct ivopts_data *data, tree bound)
5132 {
5133 tree sbound = bound;
5134 STRIP_NOPS (sbound);
5135
5136 if (TREE_CODE (sbound) == SSA_NAME
5137 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5138 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5139 && data->body_includes_call)
5140 return COSTS_N_INSNS (1);
5141
5142 return 0;
5143 }
5144
5145 /* Determines cost of basing replacement of USE on CAND in a condition. */
5146
5147 static bool
5148 determine_use_iv_cost_condition (struct ivopts_data *data,
5149 struct iv_use *use, struct iv_cand *cand)
5150 {
5151 tree bound = NULL_TREE;
5152 struct iv *cmp_iv;
5153 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5154 comp_cost elim_cost, express_cost, cost, bound_cost;
5155 bool ok;
5156 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5157 tree *control_var, *bound_cst;
5158 enum tree_code comp = ERROR_MARK;
5159
5160 /* Only consider real candidates. */
5161 if (!cand->iv)
5162 {
5163 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5164 ERROR_MARK, -1);
5165 return false;
5166 }
5167
5168 /* Try iv elimination. */
5169 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5170 {
5171 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5172 if (elim_cost.cost == 0)
5173 elim_cost.cost = parm_decl_cost (data, bound);
5174 else if (TREE_CODE (bound) == INTEGER_CST)
5175 elim_cost.cost = 0;
5176 /* If we replace a loop condition 'i < n' with 'p < base + n',
5177 depends_on_elim will have 'base' and 'n' set, which implies
5178 that both 'base' and 'n' will be live during the loop. More likely,
5179 'base + n' will be loop invariant, resulting in only one live value
5180 during the loop. So in that case we clear depends_on_elim and set
5181 elim_inv_expr_id instead. */
5182 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5183 {
5184 elim_inv_expr_id = get_expr_id (data, bound);
5185 bitmap_clear (depends_on_elim);
5186 }
5187 /* The bound is a loop invariant, so it will be only computed
5188 once. */
5189 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5190 }
5191 else
5192 elim_cost = infinite_cost;
5193
5194 /* Try expressing the original giv. If it is compared with an invariant,
5195 note that we cannot get rid of it. */
5196 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5197 NULL, &cmp_iv);
5198 gcc_assert (ok);
5199
5200 /* When the condition is a comparison of the candidate IV against
5201 zero, prefer this IV.
5202
5203 TODO: The constant that we're subtracting from the cost should
5204 be target-dependent. This information should be added to the
5205 target costs for each backend. */
5206 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5207 && integer_zerop (*bound_cst)
5208 && (operand_equal_p (*control_var, cand->var_after, 0)
5209 || operand_equal_p (*control_var, cand->var_before, 0)))
5210 elim_cost.cost -= 1;
5211
5212 express_cost = get_computation_cost (data, use, cand, false,
5213 &depends_on_express, NULL,
5214 &express_inv_expr_id);
5215 fd_ivopts_data = data;
5216 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5217
5218 /* Count the cost of the original bound as well. */
5219 bound_cost = force_var_cost (data, *bound_cst, NULL);
5220 if (bound_cost.cost == 0)
5221 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5222 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5223 bound_cost.cost = 0;
5224 express_cost.cost += bound_cost.cost;
5225
5226 /* Choose the better approach, preferring the eliminated IV. */
5227 if (compare_costs (elim_cost, express_cost) <= 0)
5228 {
5229 cost = elim_cost;
5230 depends_on = depends_on_elim;
5231 depends_on_elim = NULL;
5232 inv_expr_id = elim_inv_expr_id;
5233 }
5234 else
5235 {
5236 cost = express_cost;
5237 depends_on = depends_on_express;
5238 depends_on_express = NULL;
5239 bound = NULL_TREE;
5240 comp = ERROR_MARK;
5241 inv_expr_id = express_inv_expr_id;
5242 }
5243
5244 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5245
5246 if (depends_on_elim)
5247 BITMAP_FREE (depends_on_elim);
5248 if (depends_on_express)
5249 BITMAP_FREE (depends_on_express);
5250
5251 return !infinite_cost_p (cost);
5252 }
5253
5254 /* Determines cost of basing replacement of USE on CAND. Returns false
5255 if USE cannot be based on CAND. */
5256
5257 static bool
5258 determine_use_iv_cost (struct ivopts_data *data,
5259 struct iv_use *use, struct iv_cand *cand)
5260 {
5261 switch (use->type)
5262 {
5263 case USE_NONLINEAR_EXPR:
5264 return determine_use_iv_cost_generic (data, use, cand);
5265
5266 case USE_ADDRESS:
5267 return determine_use_iv_cost_address (data, use, cand);
5268
5269 case USE_COMPARE:
5270 return determine_use_iv_cost_condition (data, use, cand);
5271
5272 default:
5273 gcc_unreachable ();
5274 }
5275 }
5276
5277 /* Return true if get_computation_cost indicates that autoincrement is
5278 a possibility for the pair of USE and CAND, false otherwise. */
5279
5280 static bool
5281 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5282 struct iv_cand *cand)
5283 {
5284 bitmap depends_on;
5285 bool can_autoinc;
5286 comp_cost cost;
5287
5288 if (use->type != USE_ADDRESS)
5289 return false;
5290
5291 cost = get_computation_cost (data, use, cand, true, &depends_on,
5292 &can_autoinc, NULL);
5293
5294 BITMAP_FREE (depends_on);
5295
5296 return !infinite_cost_p (cost) && can_autoinc;
5297 }
5298
5299 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5300 use that allows autoincrement, and set their AINC_USE if possible. */
5301
5302 static void
5303 set_autoinc_for_original_candidates (struct ivopts_data *data)
5304 {
5305 unsigned i, j;
5306
5307 for (i = 0; i < n_iv_cands (data); i++)
5308 {
5309 struct iv_cand *cand = iv_cand (data, i);
5310 struct iv_use *closest_before = NULL;
5311 struct iv_use *closest_after = NULL;
5312 if (cand->pos != IP_ORIGINAL)
5313 continue;
5314
5315 for (j = 0; j < n_iv_uses (data); j++)
5316 {
5317 struct iv_use *use = iv_use (data, j);
5318 unsigned uid = gimple_uid (use->stmt);
5319
5320 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5321 continue;
5322
5323 if (uid < gimple_uid (cand->incremented_at)
5324 && (closest_before == NULL
5325 || uid > gimple_uid (closest_before->stmt)))
5326 closest_before = use;
5327
5328 if (uid > gimple_uid (cand->incremented_at)
5329 && (closest_after == NULL
5330 || uid < gimple_uid (closest_after->stmt)))
5331 closest_after = use;
5332 }
5333
5334 if (closest_before != NULL
5335 && autoinc_possible_for_pair (data, closest_before, cand))
5336 cand->ainc_use = closest_before;
5337 else if (closest_after != NULL
5338 && autoinc_possible_for_pair (data, closest_after, cand))
5339 cand->ainc_use = closest_after;
5340 }
5341 }
5342
5343 /* Finds the candidates for the induction variables. */
5344
5345 static void
5346 find_iv_candidates (struct ivopts_data *data)
5347 {
5348 /* Add commonly used ivs. */
5349 add_standard_iv_candidates (data);
5350
5351 /* Add old induction variables. */
5352 add_old_ivs_candidates (data);
5353
5354 /* Add induction variables derived from uses. */
5355 add_derived_ivs_candidates (data);
5356
5357 set_autoinc_for_original_candidates (data);
5358
5359 /* Record the important candidates. */
5360 record_important_candidates (data);
5361 }
5362
5363 /* Determines costs of basing the use of the iv on an iv candidate. */
5364
5365 static void
5366 determine_use_iv_costs (struct ivopts_data *data)
5367 {
5368 unsigned i, j;
5369 struct iv_use *use;
5370 struct iv_cand *cand;
5371 bitmap to_clear = BITMAP_ALLOC (NULL);
5372
5373 alloc_use_cost_map (data);
5374
5375 for (i = 0; i < n_iv_uses (data); i++)
5376 {
5377 use = iv_use (data, i);
5378
5379 if (data->consider_all_candidates)
5380 {
5381 for (j = 0; j < n_iv_cands (data); j++)
5382 {
5383 cand = iv_cand (data, j);
5384 determine_use_iv_cost (data, use, cand);
5385 }
5386 }
5387 else
5388 {
5389 bitmap_iterator bi;
5390
5391 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5392 {
5393 cand = iv_cand (data, j);
5394 if (!determine_use_iv_cost (data, use, cand))
5395 bitmap_set_bit (to_clear, j);
5396 }
5397
5398 /* Remove the candidates for that the cost is infinite from
5399 the list of related candidates. */
5400 bitmap_and_compl_into (use->related_cands, to_clear);
5401 bitmap_clear (to_clear);
5402 }
5403 }
5404
5405 BITMAP_FREE (to_clear);
5406
5407 if (dump_file && (dump_flags & TDF_DETAILS))
5408 {
5409 fprintf (dump_file, "Use-candidate costs:\n");
5410
5411 for (i = 0; i < n_iv_uses (data); i++)
5412 {
5413 use = iv_use (data, i);
5414
5415 fprintf (dump_file, "Use %d:\n", i);
5416 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5417 for (j = 0; j < use->n_map_members; j++)
5418 {
5419 if (!use->cost_map[j].cand
5420 || infinite_cost_p (use->cost_map[j].cost))
5421 continue;
5422
5423 fprintf (dump_file, " %d\t%d\t%d\t",
5424 use->cost_map[j].cand->id,
5425 use->cost_map[j].cost.cost,
5426 use->cost_map[j].cost.complexity);
5427 if (use->cost_map[j].depends_on)
5428 bitmap_print (dump_file,
5429 use->cost_map[j].depends_on, "","");
5430 if (use->cost_map[j].inv_expr_id != -1)
5431 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5432 fprintf (dump_file, "\n");
5433 }
5434
5435 fprintf (dump_file, "\n");
5436 }
5437 fprintf (dump_file, "\n");
5438 }
5439 }
5440
5441 /* Determines cost of the candidate CAND. */
5442
5443 static void
5444 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5445 {
5446 comp_cost cost_base;
5447 unsigned cost, cost_step;
5448 tree base;
5449
5450 if (!cand->iv)
5451 {
5452 cand->cost = 0;
5453 return;
5454 }
5455
5456 /* There are two costs associated with the candidate -- its increment
5457 and its initialization. The second is almost negligible for any loop
5458 that rolls enough, so we take it just very little into account. */
5459
5460 base = cand->iv->base;
5461 cost_base = force_var_cost (data, base, NULL);
5462 /* It will be exceptional that the iv register happens to be initialized with
5463 the proper value at no cost. In general, there will at least be a regcopy
5464 or a const set. */
5465 if (cost_base.cost == 0)
5466 cost_base.cost = COSTS_N_INSNS (1);
5467 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5468
5469 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5470
5471 /* Prefer the original ivs unless we may gain something by replacing it.
5472 The reason is to make debugging simpler; so this is not relevant for
5473 artificial ivs created by other optimization passes. */
5474 if (cand->pos != IP_ORIGINAL
5475 || !SSA_NAME_VAR (cand->var_before)
5476 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5477 cost++;
5478
5479 /* Prefer not to insert statements into latch unless there are some
5480 already (so that we do not create unnecessary jumps). */
5481 if (cand->pos == IP_END
5482 && empty_block_p (ip_end_pos (data->current_loop)))
5483 cost++;
5484
5485 cand->cost = cost;
5486 cand->cost_step = cost_step;
5487 }
5488
5489 /* Determines costs of computation of the candidates. */
5490
5491 static void
5492 determine_iv_costs (struct ivopts_data *data)
5493 {
5494 unsigned i;
5495
5496 if (dump_file && (dump_flags & TDF_DETAILS))
5497 {
5498 fprintf (dump_file, "Candidate costs:\n");
5499 fprintf (dump_file, " cand\tcost\n");
5500 }
5501
5502 for (i = 0; i < n_iv_cands (data); i++)
5503 {
5504 struct iv_cand *cand = iv_cand (data, i);
5505
5506 determine_iv_cost (data, cand);
5507
5508 if (dump_file && (dump_flags & TDF_DETAILS))
5509 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5510 }
5511
5512 if (dump_file && (dump_flags & TDF_DETAILS))
5513 fprintf (dump_file, "\n");
5514 }
5515
5516 /* Calculates cost for having SIZE induction variables. */
5517
5518 static unsigned
5519 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5520 {
5521 /* We add size to the cost, so that we prefer eliminating ivs
5522 if possible. */
5523 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5524 data->body_includes_call);
5525 }
5526
5527 /* For each size of the induction variable set determine the penalty. */
5528
5529 static void
5530 determine_set_costs (struct ivopts_data *data)
5531 {
5532 unsigned j, n;
5533 gphi *phi;
5534 gphi_iterator psi;
5535 tree op;
5536 struct loop *loop = data->current_loop;
5537 bitmap_iterator bi;
5538
5539 if (dump_file && (dump_flags & TDF_DETAILS))
5540 {
5541 fprintf (dump_file, "Global costs:\n");
5542 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5543 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5544 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5545 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5546 }
5547
5548 n = 0;
5549 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5550 {
5551 phi = psi.phi ();
5552 op = PHI_RESULT (phi);
5553
5554 if (virtual_operand_p (op))
5555 continue;
5556
5557 if (get_iv (data, op))
5558 continue;
5559
5560 n++;
5561 }
5562
5563 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5564 {
5565 struct version_info *info = ver_info (data, j);
5566
5567 if (info->inv_id && info->has_nonlin_use)
5568 n++;
5569 }
5570
5571 data->regs_used = n;
5572 if (dump_file && (dump_flags & TDF_DETAILS))
5573 fprintf (dump_file, " regs_used %d\n", n);
5574
5575 if (dump_file && (dump_flags & TDF_DETAILS))
5576 {
5577 fprintf (dump_file, " cost for size:\n");
5578 fprintf (dump_file, " ivs\tcost\n");
5579 for (j = 0; j <= 2 * target_avail_regs; j++)
5580 fprintf (dump_file, " %d\t%d\n", j,
5581 ivopts_global_cost_for_size (data, j));
5582 fprintf (dump_file, "\n");
5583 }
5584 }
5585
5586 /* Returns true if A is a cheaper cost pair than B. */
5587
5588 static bool
5589 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5590 {
5591 int cmp;
5592
5593 if (!a)
5594 return false;
5595
5596 if (!b)
5597 return true;
5598
5599 cmp = compare_costs (a->cost, b->cost);
5600 if (cmp < 0)
5601 return true;
5602
5603 if (cmp > 0)
5604 return false;
5605
5606 /* In case the costs are the same, prefer the cheaper candidate. */
5607 if (a->cand->cost < b->cand->cost)
5608 return true;
5609
5610 return false;
5611 }
5612
5613
5614 /* Returns candidate by that USE is expressed in IVS. */
5615
5616 static struct cost_pair *
5617 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5618 {
5619 return ivs->cand_for_use[use->id];
5620 }
5621
5622 /* Computes the cost field of IVS structure. */
5623
5624 static void
5625 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5626 {
5627 comp_cost cost = ivs->cand_use_cost;
5628
5629 cost.cost += ivs->cand_cost;
5630
5631 cost.cost += ivopts_global_cost_for_size (data,
5632 ivs->n_regs + ivs->num_used_inv_expr);
5633
5634 ivs->cost = cost;
5635 }
5636
5637 /* Remove invariants in set INVS to set IVS. */
5638
5639 static void
5640 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5641 {
5642 bitmap_iterator bi;
5643 unsigned iid;
5644
5645 if (!invs)
5646 return;
5647
5648 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5649 {
5650 ivs->n_invariant_uses[iid]--;
5651 if (ivs->n_invariant_uses[iid] == 0)
5652 ivs->n_regs--;
5653 }
5654 }
5655
5656 /* Set USE not to be expressed by any candidate in IVS. */
5657
5658 static void
5659 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5660 struct iv_use *use)
5661 {
5662 unsigned uid = use->id, cid;
5663 struct cost_pair *cp;
5664
5665 cp = ivs->cand_for_use[uid];
5666 if (!cp)
5667 return;
5668 cid = cp->cand->id;
5669
5670 ivs->bad_uses++;
5671 ivs->cand_for_use[uid] = NULL;
5672 ivs->n_cand_uses[cid]--;
5673
5674 if (ivs->n_cand_uses[cid] == 0)
5675 {
5676 bitmap_clear_bit (ivs->cands, cid);
5677 /* Do not count the pseudocandidates. */
5678 if (cp->cand->iv)
5679 ivs->n_regs--;
5680 ivs->n_cands--;
5681 ivs->cand_cost -= cp->cand->cost;
5682
5683 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5684 }
5685
5686 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5687
5688 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5689
5690 if (cp->inv_expr_id != -1)
5691 {
5692 ivs->used_inv_expr[cp->inv_expr_id]--;
5693 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5694 ivs->num_used_inv_expr--;
5695 }
5696 iv_ca_recount_cost (data, ivs);
5697 }
5698
5699 /* Add invariants in set INVS to set IVS. */
5700
5701 static void
5702 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5703 {
5704 bitmap_iterator bi;
5705 unsigned iid;
5706
5707 if (!invs)
5708 return;
5709
5710 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5711 {
5712 ivs->n_invariant_uses[iid]++;
5713 if (ivs->n_invariant_uses[iid] == 1)
5714 ivs->n_regs++;
5715 }
5716 }
5717
5718 /* Set cost pair for USE in set IVS to CP. */
5719
5720 static void
5721 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5722 struct iv_use *use, struct cost_pair *cp)
5723 {
5724 unsigned uid = use->id, cid;
5725
5726 if (ivs->cand_for_use[uid] == cp)
5727 return;
5728
5729 if (ivs->cand_for_use[uid])
5730 iv_ca_set_no_cp (data, ivs, use);
5731
5732 if (cp)
5733 {
5734 cid = cp->cand->id;
5735
5736 ivs->bad_uses--;
5737 ivs->cand_for_use[uid] = cp;
5738 ivs->n_cand_uses[cid]++;
5739 if (ivs->n_cand_uses[cid] == 1)
5740 {
5741 bitmap_set_bit (ivs->cands, cid);
5742 /* Do not count the pseudocandidates. */
5743 if (cp->cand->iv)
5744 ivs->n_regs++;
5745 ivs->n_cands++;
5746 ivs->cand_cost += cp->cand->cost;
5747
5748 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5749 }
5750
5751 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5752 iv_ca_set_add_invariants (ivs, cp->depends_on);
5753
5754 if (cp->inv_expr_id != -1)
5755 {
5756 ivs->used_inv_expr[cp->inv_expr_id]++;
5757 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5758 ivs->num_used_inv_expr++;
5759 }
5760 iv_ca_recount_cost (data, ivs);
5761 }
5762 }
5763
5764 /* Extend set IVS by expressing USE by some of the candidates in it
5765 if possible. Consider all important candidates if candidates in
5766 set IVS don't give any result. */
5767
5768 static void
5769 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5770 struct iv_use *use)
5771 {
5772 struct cost_pair *best_cp = NULL, *cp;
5773 bitmap_iterator bi;
5774 unsigned i;
5775 struct iv_cand *cand;
5776
5777 gcc_assert (ivs->upto >= use->id);
5778 ivs->upto++;
5779 ivs->bad_uses++;
5780
5781 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5782 {
5783 cand = iv_cand (data, i);
5784 cp = get_use_iv_cost (data, use, cand);
5785 if (cheaper_cost_pair (cp, best_cp))
5786 best_cp = cp;
5787 }
5788
5789 if (best_cp == NULL)
5790 {
5791 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5792 {
5793 cand = iv_cand (data, i);
5794 cp = get_use_iv_cost (data, use, cand);
5795 if (cheaper_cost_pair (cp, best_cp))
5796 best_cp = cp;
5797 }
5798 }
5799
5800 iv_ca_set_cp (data, ivs, use, best_cp);
5801 }
5802
5803 /* Get cost for assignment IVS. */
5804
5805 static comp_cost
5806 iv_ca_cost (struct iv_ca *ivs)
5807 {
5808 /* This was a conditional expression but it triggered a bug in
5809 Sun C 5.5. */
5810 if (ivs->bad_uses)
5811 return infinite_cost;
5812 else
5813 return ivs->cost;
5814 }
5815
5816 /* Returns true if all dependences of CP are among invariants in IVS. */
5817
5818 static bool
5819 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5820 {
5821 unsigned i;
5822 bitmap_iterator bi;
5823
5824 if (!cp->depends_on)
5825 return true;
5826
5827 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5828 {
5829 if (ivs->n_invariant_uses[i] == 0)
5830 return false;
5831 }
5832
5833 return true;
5834 }
5835
5836 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5837 it before NEXT_CHANGE. */
5838
5839 static struct iv_ca_delta *
5840 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5841 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5842 {
5843 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5844
5845 change->use = use;
5846 change->old_cp = old_cp;
5847 change->new_cp = new_cp;
5848 change->next_change = next_change;
5849
5850 return change;
5851 }
5852
5853 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5854 are rewritten. */
5855
5856 static struct iv_ca_delta *
5857 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5858 {
5859 struct iv_ca_delta *last;
5860
5861 if (!l2)
5862 return l1;
5863
5864 if (!l1)
5865 return l2;
5866
5867 for (last = l1; last->next_change; last = last->next_change)
5868 continue;
5869 last->next_change = l2;
5870
5871 return l1;
5872 }
5873
5874 /* Reverse the list of changes DELTA, forming the inverse to it. */
5875
5876 static struct iv_ca_delta *
5877 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5878 {
5879 struct iv_ca_delta *act, *next, *prev = NULL;
5880
5881 for (act = delta; act; act = next)
5882 {
5883 next = act->next_change;
5884 act->next_change = prev;
5885 prev = act;
5886
5887 std::swap (act->old_cp, act->new_cp);
5888 }
5889
5890 return prev;
5891 }
5892
5893 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5894 reverted instead. */
5895
5896 static void
5897 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5898 struct iv_ca_delta *delta, bool forward)
5899 {
5900 struct cost_pair *from, *to;
5901 struct iv_ca_delta *act;
5902
5903 if (!forward)
5904 delta = iv_ca_delta_reverse (delta);
5905
5906 for (act = delta; act; act = act->next_change)
5907 {
5908 from = act->old_cp;
5909 to = act->new_cp;
5910 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5911 iv_ca_set_cp (data, ivs, act->use, to);
5912 }
5913
5914 if (!forward)
5915 iv_ca_delta_reverse (delta);
5916 }
5917
5918 /* Returns true if CAND is used in IVS. */
5919
5920 static bool
5921 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5922 {
5923 return ivs->n_cand_uses[cand->id] > 0;
5924 }
5925
5926 /* Returns number of induction variable candidates in the set IVS. */
5927
5928 static unsigned
5929 iv_ca_n_cands (struct iv_ca *ivs)
5930 {
5931 return ivs->n_cands;
5932 }
5933
5934 /* Free the list of changes DELTA. */
5935
5936 static void
5937 iv_ca_delta_free (struct iv_ca_delta **delta)
5938 {
5939 struct iv_ca_delta *act, *next;
5940
5941 for (act = *delta; act; act = next)
5942 {
5943 next = act->next_change;
5944 free (act);
5945 }
5946
5947 *delta = NULL;
5948 }
5949
5950 /* Allocates new iv candidates assignment. */
5951
5952 static struct iv_ca *
5953 iv_ca_new (struct ivopts_data *data)
5954 {
5955 struct iv_ca *nw = XNEW (struct iv_ca);
5956
5957 nw->upto = 0;
5958 nw->bad_uses = 0;
5959 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5960 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5961 nw->cands = BITMAP_ALLOC (NULL);
5962 nw->n_cands = 0;
5963 nw->n_regs = 0;
5964 nw->cand_use_cost = no_cost;
5965 nw->cand_cost = 0;
5966 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5967 nw->cost = no_cost;
5968 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5969 nw->num_used_inv_expr = 0;
5970
5971 return nw;
5972 }
5973
5974 /* Free memory occupied by the set IVS. */
5975
5976 static void
5977 iv_ca_free (struct iv_ca **ivs)
5978 {
5979 free ((*ivs)->cand_for_use);
5980 free ((*ivs)->n_cand_uses);
5981 BITMAP_FREE ((*ivs)->cands);
5982 free ((*ivs)->n_invariant_uses);
5983 free ((*ivs)->used_inv_expr);
5984 free (*ivs);
5985 *ivs = NULL;
5986 }
5987
5988 /* Dumps IVS to FILE. */
5989
5990 static void
5991 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5992 {
5993 const char *pref = " invariants ";
5994 unsigned i;
5995 comp_cost cost = iv_ca_cost (ivs);
5996
5997 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5998 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5999 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6000 bitmap_print (file, ivs->cands, " candidates: ","\n");
6001
6002 for (i = 0; i < ivs->upto; i++)
6003 {
6004 struct iv_use *use = iv_use (data, i);
6005 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6006 if (cp)
6007 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6008 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6009 else
6010 fprintf (file, " use:%d --> ??\n", use->id);
6011 }
6012
6013 for (i = 1; i <= data->max_inv_id; i++)
6014 if (ivs->n_invariant_uses[i])
6015 {
6016 fprintf (file, "%s%d", pref, i);
6017 pref = ", ";
6018 }
6019 fprintf (file, "\n\n");
6020 }
6021
6022 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6023 new set, and store differences in DELTA. Number of induction variables
6024 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6025 the function will try to find a solution with mimimal iv candidates. */
6026
6027 static comp_cost
6028 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6029 struct iv_cand *cand, struct iv_ca_delta **delta,
6030 unsigned *n_ivs, bool min_ncand)
6031 {
6032 unsigned i;
6033 comp_cost cost;
6034 struct iv_use *use;
6035 struct cost_pair *old_cp, *new_cp;
6036
6037 *delta = NULL;
6038 for (i = 0; i < ivs->upto; i++)
6039 {
6040 use = iv_use (data, i);
6041 old_cp = iv_ca_cand_for_use (ivs, use);
6042
6043 if (old_cp
6044 && old_cp->cand == cand)
6045 continue;
6046
6047 new_cp = get_use_iv_cost (data, use, cand);
6048 if (!new_cp)
6049 continue;
6050
6051 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6052 continue;
6053
6054 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6055 continue;
6056
6057 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6058 }
6059
6060 iv_ca_delta_commit (data, ivs, *delta, true);
6061 cost = iv_ca_cost (ivs);
6062 if (n_ivs)
6063 *n_ivs = iv_ca_n_cands (ivs);
6064 iv_ca_delta_commit (data, ivs, *delta, false);
6065
6066 return cost;
6067 }
6068
6069 /* Try narrowing set IVS by removing CAND. Return the cost of
6070 the new set and store the differences in DELTA. START is
6071 the candidate with which we start narrowing. */
6072
6073 static comp_cost
6074 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6075 struct iv_cand *cand, struct iv_cand *start,
6076 struct iv_ca_delta **delta)
6077 {
6078 unsigned i, ci;
6079 struct iv_use *use;
6080 struct cost_pair *old_cp, *new_cp, *cp;
6081 bitmap_iterator bi;
6082 struct iv_cand *cnd;
6083 comp_cost cost, best_cost, acost;
6084
6085 *delta = NULL;
6086 for (i = 0; i < n_iv_uses (data); i++)
6087 {
6088 use = iv_use (data, i);
6089
6090 old_cp = iv_ca_cand_for_use (ivs, use);
6091 if (old_cp->cand != cand)
6092 continue;
6093
6094 best_cost = iv_ca_cost (ivs);
6095 /* Start narrowing with START. */
6096 new_cp = get_use_iv_cost (data, use, start);
6097
6098 if (data->consider_all_candidates)
6099 {
6100 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6101 {
6102 if (ci == cand->id || (start && ci == start->id))
6103 continue;
6104
6105 cnd = iv_cand (data, ci);
6106
6107 cp = get_use_iv_cost (data, use, cnd);
6108 if (!cp)
6109 continue;
6110
6111 iv_ca_set_cp (data, ivs, use, cp);
6112 acost = iv_ca_cost (ivs);
6113
6114 if (compare_costs (acost, best_cost) < 0)
6115 {
6116 best_cost = acost;
6117 new_cp = cp;
6118 }
6119 }
6120 }
6121 else
6122 {
6123 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6124 {
6125 if (ci == cand->id || (start && ci == start->id))
6126 continue;
6127
6128 cnd = iv_cand (data, ci);
6129
6130 cp = get_use_iv_cost (data, use, cnd);
6131 if (!cp)
6132 continue;
6133
6134 iv_ca_set_cp (data, ivs, use, cp);
6135 acost = iv_ca_cost (ivs);
6136
6137 if (compare_costs (acost, best_cost) < 0)
6138 {
6139 best_cost = acost;
6140 new_cp = cp;
6141 }
6142 }
6143 }
6144 /* Restore to old cp for use. */
6145 iv_ca_set_cp (data, ivs, use, old_cp);
6146
6147 if (!new_cp)
6148 {
6149 iv_ca_delta_free (delta);
6150 return infinite_cost;
6151 }
6152
6153 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6154 }
6155
6156 iv_ca_delta_commit (data, ivs, *delta, true);
6157 cost = iv_ca_cost (ivs);
6158 iv_ca_delta_commit (data, ivs, *delta, false);
6159
6160 return cost;
6161 }
6162
6163 /* Try optimizing the set of candidates IVS by removing candidates different
6164 from to EXCEPT_CAND from it. Return cost of the new set, and store
6165 differences in DELTA. */
6166
6167 static comp_cost
6168 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6169 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6170 {
6171 bitmap_iterator bi;
6172 struct iv_ca_delta *act_delta, *best_delta;
6173 unsigned i;
6174 comp_cost best_cost, acost;
6175 struct iv_cand *cand;
6176
6177 best_delta = NULL;
6178 best_cost = iv_ca_cost (ivs);
6179
6180 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6181 {
6182 cand = iv_cand (data, i);
6183
6184 if (cand == except_cand)
6185 continue;
6186
6187 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6188
6189 if (compare_costs (acost, best_cost) < 0)
6190 {
6191 best_cost = acost;
6192 iv_ca_delta_free (&best_delta);
6193 best_delta = act_delta;
6194 }
6195 else
6196 iv_ca_delta_free (&act_delta);
6197 }
6198
6199 if (!best_delta)
6200 {
6201 *delta = NULL;
6202 return best_cost;
6203 }
6204
6205 /* Recurse to possibly remove other unnecessary ivs. */
6206 iv_ca_delta_commit (data, ivs, best_delta, true);
6207 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6208 iv_ca_delta_commit (data, ivs, best_delta, false);
6209 *delta = iv_ca_delta_join (best_delta, *delta);
6210 return best_cost;
6211 }
6212
6213 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6214 cheaper local cost for USE than BEST_CP. Return pointer to
6215 the corresponding cost_pair, otherwise just return BEST_CP. */
6216
6217 static struct cost_pair*
6218 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6219 unsigned int cand_idx, struct iv_cand *old_cand,
6220 struct cost_pair *best_cp)
6221 {
6222 struct iv_cand *cand;
6223 struct cost_pair *cp;
6224
6225 gcc_assert (old_cand != NULL && best_cp != NULL);
6226 if (cand_idx == old_cand->id)
6227 return best_cp;
6228
6229 cand = iv_cand (data, cand_idx);
6230 cp = get_use_iv_cost (data, use, cand);
6231 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6232 return cp;
6233
6234 return best_cp;
6235 }
6236
6237 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6238 which are used by more than one iv uses. For each of those candidates,
6239 this function tries to represent iv uses under that candidate using
6240 other ones with lower local cost, then tries to prune the new set.
6241 If the new set has lower cost, It returns the new cost after recording
6242 candidate replacement in list DELTA. */
6243
6244 static comp_cost
6245 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6246 struct iv_ca_delta **delta)
6247 {
6248 bitmap_iterator bi, bj;
6249 unsigned int i, j, k;
6250 struct iv_use *use;
6251 struct iv_cand *cand;
6252 comp_cost orig_cost, acost;
6253 struct iv_ca_delta *act_delta, *tmp_delta;
6254 struct cost_pair *old_cp, *best_cp = NULL;
6255
6256 *delta = NULL;
6257 orig_cost = iv_ca_cost (ivs);
6258
6259 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6260 {
6261 if (ivs->n_cand_uses[i] == 1
6262 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6263 continue;
6264
6265 cand = iv_cand (data, i);
6266
6267 act_delta = NULL;
6268 /* Represent uses under current candidate using other ones with
6269 lower local cost. */
6270 for (j = 0; j < ivs->upto; j++)
6271 {
6272 use = iv_use (data, j);
6273 old_cp = iv_ca_cand_for_use (ivs, use);
6274
6275 if (old_cp->cand != cand)
6276 continue;
6277
6278 best_cp = old_cp;
6279 if (data->consider_all_candidates)
6280 for (k = 0; k < n_iv_cands (data); k++)
6281 best_cp = cheaper_cost_with_cand (data, use, k,
6282 old_cp->cand, best_cp);
6283 else
6284 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6285 best_cp = cheaper_cost_with_cand (data, use, k,
6286 old_cp->cand, best_cp);
6287
6288 if (best_cp == old_cp)
6289 continue;
6290
6291 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6292 }
6293 /* No need for further prune. */
6294 if (!act_delta)
6295 continue;
6296
6297 /* Prune the new candidate set. */
6298 iv_ca_delta_commit (data, ivs, act_delta, true);
6299 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6300 iv_ca_delta_commit (data, ivs, act_delta, false);
6301 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6302
6303 if (compare_costs (acost, orig_cost) < 0)
6304 {
6305 *delta = act_delta;
6306 return acost;
6307 }
6308 else
6309 iv_ca_delta_free (&act_delta);
6310 }
6311
6312 return orig_cost;
6313 }
6314
6315 /* Tries to extend the sets IVS in the best possible way in order
6316 to express the USE. If ORIGINALP is true, prefer candidates from
6317 the original set of IVs, otherwise favor important candidates not
6318 based on any memory object. */
6319
6320 static bool
6321 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6322 struct iv_use *use, bool originalp)
6323 {
6324 comp_cost best_cost, act_cost;
6325 unsigned i;
6326 bitmap_iterator bi;
6327 struct iv_cand *cand;
6328 struct iv_ca_delta *best_delta = NULL, *act_delta;
6329 struct cost_pair *cp;
6330
6331 iv_ca_add_use (data, ivs, use);
6332 best_cost = iv_ca_cost (ivs);
6333 cp = iv_ca_cand_for_use (ivs, use);
6334 if (cp)
6335 {
6336 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6337 iv_ca_set_no_cp (data, ivs, use);
6338 }
6339
6340 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6341 first try important candidates not based on any memory object. Only if
6342 this fails, try the specific ones. Rationale -- in loops with many
6343 variables the best choice often is to use just one generic biv. If we
6344 added here many ivs specific to the uses, the optimization algorithm later
6345 would be likely to get stuck in a local minimum, thus causing us to create
6346 too many ivs. The approach from few ivs to more seems more likely to be
6347 successful -- starting from few ivs, replacing an expensive use by a
6348 specific iv should always be a win. */
6349 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6350 {
6351 cand = iv_cand (data, i);
6352
6353 if (originalp && cand->pos !=IP_ORIGINAL)
6354 continue;
6355
6356 if (!originalp && cand->iv->base_object != NULL_TREE)
6357 continue;
6358
6359 if (iv_ca_cand_used_p (ivs, cand))
6360 continue;
6361
6362 cp = get_use_iv_cost (data, use, cand);
6363 if (!cp)
6364 continue;
6365
6366 iv_ca_set_cp (data, ivs, use, cp);
6367 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6368 true);
6369 iv_ca_set_no_cp (data, ivs, use);
6370 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6371
6372 if (compare_costs (act_cost, best_cost) < 0)
6373 {
6374 best_cost = act_cost;
6375
6376 iv_ca_delta_free (&best_delta);
6377 best_delta = act_delta;
6378 }
6379 else
6380 iv_ca_delta_free (&act_delta);
6381 }
6382
6383 if (infinite_cost_p (best_cost))
6384 {
6385 for (i = 0; i < use->n_map_members; i++)
6386 {
6387 cp = use->cost_map + i;
6388 cand = cp->cand;
6389 if (!cand)
6390 continue;
6391
6392 /* Already tried this. */
6393 if (cand->important)
6394 {
6395 if (originalp && cand->pos == IP_ORIGINAL)
6396 continue;
6397 if (!originalp && cand->iv->base_object == NULL_TREE)
6398 continue;
6399 }
6400
6401 if (iv_ca_cand_used_p (ivs, cand))
6402 continue;
6403
6404 act_delta = NULL;
6405 iv_ca_set_cp (data, ivs, use, cp);
6406 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6407 iv_ca_set_no_cp (data, ivs, use);
6408 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6409 cp, act_delta);
6410
6411 if (compare_costs (act_cost, best_cost) < 0)
6412 {
6413 best_cost = act_cost;
6414
6415 if (best_delta)
6416 iv_ca_delta_free (&best_delta);
6417 best_delta = act_delta;
6418 }
6419 else
6420 iv_ca_delta_free (&act_delta);
6421 }
6422 }
6423
6424 iv_ca_delta_commit (data, ivs, best_delta, true);
6425 iv_ca_delta_free (&best_delta);
6426
6427 return !infinite_cost_p (best_cost);
6428 }
6429
6430 /* Finds an initial assignment of candidates to uses. */
6431
6432 static struct iv_ca *
6433 get_initial_solution (struct ivopts_data *data, bool originalp)
6434 {
6435 struct iv_ca *ivs = iv_ca_new (data);
6436 unsigned i;
6437
6438 for (i = 0; i < n_iv_uses (data); i++)
6439 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6440 {
6441 iv_ca_free (&ivs);
6442 return NULL;
6443 }
6444
6445 return ivs;
6446 }
6447
6448 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6449 points to a bool variable, this function tries to break local
6450 optimal fixed-point by replacing candidates in IVS if it's true. */
6451
6452 static bool
6453 try_improve_iv_set (struct ivopts_data *data,
6454 struct iv_ca *ivs, bool *try_replace_p)
6455 {
6456 unsigned i, n_ivs;
6457 comp_cost acost, best_cost = iv_ca_cost (ivs);
6458 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6459 struct iv_cand *cand;
6460
6461 /* Try extending the set of induction variables by one. */
6462 for (i = 0; i < n_iv_cands (data); i++)
6463 {
6464 cand = iv_cand (data, i);
6465
6466 if (iv_ca_cand_used_p (ivs, cand))
6467 continue;
6468
6469 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6470 if (!act_delta)
6471 continue;
6472
6473 /* If we successfully added the candidate and the set is small enough,
6474 try optimizing it by removing other candidates. */
6475 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6476 {
6477 iv_ca_delta_commit (data, ivs, act_delta, true);
6478 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6479 iv_ca_delta_commit (data, ivs, act_delta, false);
6480 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6481 }
6482
6483 if (compare_costs (acost, best_cost) < 0)
6484 {
6485 best_cost = acost;
6486 iv_ca_delta_free (&best_delta);
6487 best_delta = act_delta;
6488 }
6489 else
6490 iv_ca_delta_free (&act_delta);
6491 }
6492
6493 if (!best_delta)
6494 {
6495 /* Try removing the candidates from the set instead. */
6496 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6497
6498 if (!best_delta && *try_replace_p)
6499 {
6500 *try_replace_p = false;
6501 /* So far candidate selecting algorithm tends to choose fewer IVs
6502 so that it can handle cases in which loops have many variables
6503 but the best choice is often to use only one general biv. One
6504 weakness is it can't handle opposite cases, in which different
6505 candidates should be chosen with respect to each use. To solve
6506 the problem, we replace candidates in a manner described by the
6507 comments of iv_ca_replace, thus give general algorithm a chance
6508 to break local optimal fixed-point in these cases. */
6509 best_cost = iv_ca_replace (data, ivs, &best_delta);
6510 }
6511
6512 if (!best_delta)
6513 return false;
6514 }
6515
6516 iv_ca_delta_commit (data, ivs, best_delta, true);
6517 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6518 iv_ca_delta_free (&best_delta);
6519 return true;
6520 }
6521
6522 /* Attempts to find the optimal set of induction variables. We do simple
6523 greedy heuristic -- we try to replace at most one candidate in the selected
6524 solution and remove the unused ivs while this improves the cost. */
6525
6526 static struct iv_ca *
6527 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6528 {
6529 struct iv_ca *set;
6530 bool try_replace_p = true;
6531
6532 /* Get the initial solution. */
6533 set = get_initial_solution (data, originalp);
6534 if (!set)
6535 {
6536 if (dump_file && (dump_flags & TDF_DETAILS))
6537 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6538 return NULL;
6539 }
6540
6541 if (dump_file && (dump_flags & TDF_DETAILS))
6542 {
6543 fprintf (dump_file, "Initial set of candidates:\n");
6544 iv_ca_dump (data, dump_file, set);
6545 }
6546
6547 while (try_improve_iv_set (data, set, &try_replace_p))
6548 {
6549 if (dump_file && (dump_flags & TDF_DETAILS))
6550 {
6551 fprintf (dump_file, "Improved to:\n");
6552 iv_ca_dump (data, dump_file, set);
6553 }
6554 }
6555
6556 return set;
6557 }
6558
6559 static struct iv_ca *
6560 find_optimal_iv_set (struct ivopts_data *data)
6561 {
6562 unsigned i;
6563 struct iv_ca *set, *origset;
6564 struct iv_use *use;
6565 comp_cost cost, origcost;
6566
6567 /* Determine the cost based on a strategy that starts with original IVs,
6568 and try again using a strategy that prefers candidates not based
6569 on any IVs. */
6570 origset = find_optimal_iv_set_1 (data, true);
6571 set = find_optimal_iv_set_1 (data, false);
6572
6573 if (!origset && !set)
6574 return NULL;
6575
6576 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6577 cost = set ? iv_ca_cost (set) : infinite_cost;
6578
6579 if (dump_file && (dump_flags & TDF_DETAILS))
6580 {
6581 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6582 origcost.cost, origcost.complexity);
6583 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6584 cost.cost, cost.complexity);
6585 }
6586
6587 /* Choose the one with the best cost. */
6588 if (compare_costs (origcost, cost) <= 0)
6589 {
6590 if (set)
6591 iv_ca_free (&set);
6592 set = origset;
6593 }
6594 else if (origset)
6595 iv_ca_free (&origset);
6596
6597 for (i = 0; i < n_iv_uses (data); i++)
6598 {
6599 use = iv_use (data, i);
6600 use->selected = iv_ca_cand_for_use (set, use)->cand;
6601 }
6602
6603 return set;
6604 }
6605
6606 /* Creates a new induction variable corresponding to CAND. */
6607
6608 static void
6609 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6610 {
6611 gimple_stmt_iterator incr_pos;
6612 tree base;
6613 bool after = false;
6614
6615 if (!cand->iv)
6616 return;
6617
6618 switch (cand->pos)
6619 {
6620 case IP_NORMAL:
6621 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6622 break;
6623
6624 case IP_END:
6625 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6626 after = true;
6627 break;
6628
6629 case IP_AFTER_USE:
6630 after = true;
6631 /* fall through */
6632 case IP_BEFORE_USE:
6633 incr_pos = gsi_for_stmt (cand->incremented_at);
6634 break;
6635
6636 case IP_ORIGINAL:
6637 /* Mark that the iv is preserved. */
6638 name_info (data, cand->var_before)->preserve_biv = true;
6639 name_info (data, cand->var_after)->preserve_biv = true;
6640
6641 /* Rewrite the increment so that it uses var_before directly. */
6642 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6643 return;
6644 }
6645
6646 gimple_add_tmp_var (cand->var_before);
6647
6648 base = unshare_expr (cand->iv->base);
6649
6650 create_iv (base, unshare_expr (cand->iv->step),
6651 cand->var_before, data->current_loop,
6652 &incr_pos, after, &cand->var_before, &cand->var_after);
6653 }
6654
6655 /* Creates new induction variables described in SET. */
6656
6657 static void
6658 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6659 {
6660 unsigned i;
6661 struct iv_cand *cand;
6662 bitmap_iterator bi;
6663
6664 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6665 {
6666 cand = iv_cand (data, i);
6667 create_new_iv (data, cand);
6668 }
6669
6670 if (dump_file && (dump_flags & TDF_DETAILS))
6671 {
6672 fprintf (dump_file, "Selected IV set for loop %d",
6673 data->current_loop->num);
6674 if (data->loop_loc != UNKNOWN_LOCATION)
6675 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6676 LOCATION_LINE (data->loop_loc));
6677 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6678 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6679 {
6680 cand = iv_cand (data, i);
6681 dump_cand (dump_file, cand);
6682 }
6683 fprintf (dump_file, "\n");
6684 }
6685 }
6686
6687 /* Rewrites USE (definition of iv used in a nonlinear expression)
6688 using candidate CAND. */
6689
6690 static void
6691 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6692 struct iv_use *use, struct iv_cand *cand)
6693 {
6694 tree comp;
6695 tree op, tgt;
6696 gassign *ass;
6697 gimple_stmt_iterator bsi;
6698
6699 /* An important special case -- if we are asked to express value of
6700 the original iv by itself, just exit; there is no need to
6701 introduce a new computation (that might also need casting the
6702 variable to unsigned and back). */
6703 if (cand->pos == IP_ORIGINAL
6704 && cand->incremented_at == use->stmt)
6705 {
6706 enum tree_code stmt_code;
6707
6708 gcc_assert (is_gimple_assign (use->stmt));
6709 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6710
6711 /* Check whether we may leave the computation unchanged.
6712 This is the case only if it does not rely on other
6713 computations in the loop -- otherwise, the computation
6714 we rely upon may be removed in remove_unused_ivs,
6715 thus leading to ICE. */
6716 stmt_code = gimple_assign_rhs_code (use->stmt);
6717 if (stmt_code == PLUS_EXPR
6718 || stmt_code == MINUS_EXPR
6719 || stmt_code == POINTER_PLUS_EXPR)
6720 {
6721 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6722 op = gimple_assign_rhs2 (use->stmt);
6723 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6724 op = gimple_assign_rhs1 (use->stmt);
6725 else
6726 op = NULL_TREE;
6727 }
6728 else
6729 op = NULL_TREE;
6730
6731 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6732 return;
6733 }
6734
6735 comp = get_computation (data->current_loop, use, cand);
6736 gcc_assert (comp != NULL_TREE);
6737
6738 switch (gimple_code (use->stmt))
6739 {
6740 case GIMPLE_PHI:
6741 tgt = PHI_RESULT (use->stmt);
6742
6743 /* If we should keep the biv, do not replace it. */
6744 if (name_info (data, tgt)->preserve_biv)
6745 return;
6746
6747 bsi = gsi_after_labels (gimple_bb (use->stmt));
6748 break;
6749
6750 case GIMPLE_ASSIGN:
6751 tgt = gimple_assign_lhs (use->stmt);
6752 bsi = gsi_for_stmt (use->stmt);
6753 break;
6754
6755 default:
6756 gcc_unreachable ();
6757 }
6758
6759 if (!valid_gimple_rhs_p (comp)
6760 || (gimple_code (use->stmt) != GIMPLE_PHI
6761 /* We can't allow re-allocating the stmt as it might be pointed
6762 to still. */
6763 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6764 >= gimple_num_ops (gsi_stmt (bsi)))))
6765 {
6766 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6767 true, GSI_SAME_STMT);
6768 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6769 {
6770 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6771 /* As this isn't a plain copy we have to reset alignment
6772 information. */
6773 if (SSA_NAME_PTR_INFO (comp))
6774 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6775 }
6776 }
6777
6778 if (gimple_code (use->stmt) == GIMPLE_PHI)
6779 {
6780 ass = gimple_build_assign (tgt, comp);
6781 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6782
6783 bsi = gsi_for_stmt (use->stmt);
6784 remove_phi_node (&bsi, false);
6785 }
6786 else
6787 {
6788 gimple_assign_set_rhs_from_tree (&bsi, comp);
6789 use->stmt = gsi_stmt (bsi);
6790 }
6791 }
6792
6793 /* Performs a peephole optimization to reorder the iv update statement with
6794 a mem ref to enable instruction combining in later phases. The mem ref uses
6795 the iv value before the update, so the reordering transformation requires
6796 adjustment of the offset. CAND is the selected IV_CAND.
6797
6798 Example:
6799
6800 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6801 iv2 = iv1 + 1;
6802
6803 if (t < val) (1)
6804 goto L;
6805 goto Head;
6806
6807
6808 directly propagating t over to (1) will introduce overlapping live range
6809 thus increase register pressure. This peephole transform it into:
6810
6811
6812 iv2 = iv1 + 1;
6813 t = MEM_REF (base, iv2, 8, 8);
6814 if (t < val)
6815 goto L;
6816 goto Head;
6817 */
6818
6819 static void
6820 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6821 {
6822 tree var_after;
6823 gimple iv_update, stmt;
6824 basic_block bb;
6825 gimple_stmt_iterator gsi, gsi_iv;
6826
6827 if (cand->pos != IP_NORMAL)
6828 return;
6829
6830 var_after = cand->var_after;
6831 iv_update = SSA_NAME_DEF_STMT (var_after);
6832
6833 bb = gimple_bb (iv_update);
6834 gsi = gsi_last_nondebug_bb (bb);
6835 stmt = gsi_stmt (gsi);
6836
6837 /* Only handle conditional statement for now. */
6838 if (gimple_code (stmt) != GIMPLE_COND)
6839 return;
6840
6841 gsi_prev_nondebug (&gsi);
6842 stmt = gsi_stmt (gsi);
6843 if (stmt != iv_update)
6844 return;
6845
6846 gsi_prev_nondebug (&gsi);
6847 if (gsi_end_p (gsi))
6848 return;
6849
6850 stmt = gsi_stmt (gsi);
6851 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6852 return;
6853
6854 if (stmt != use->stmt)
6855 return;
6856
6857 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6858 return;
6859
6860 if (dump_file && (dump_flags & TDF_DETAILS))
6861 {
6862 fprintf (dump_file, "Reordering \n");
6863 print_gimple_stmt (dump_file, iv_update, 0, 0);
6864 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6865 fprintf (dump_file, "\n");
6866 }
6867
6868 gsi = gsi_for_stmt (use->stmt);
6869 gsi_iv = gsi_for_stmt (iv_update);
6870 gsi_move_before (&gsi_iv, &gsi);
6871
6872 cand->pos = IP_BEFORE_USE;
6873 cand->incremented_at = use->stmt;
6874 }
6875
6876 /* Rewrites USE (address that is an iv) using candidate CAND. */
6877
6878 static void
6879 rewrite_use_address_1 (struct ivopts_data *data,
6880 struct iv_use *use, struct iv_cand *cand)
6881 {
6882 aff_tree aff;
6883 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6884 tree base_hint = NULL_TREE;
6885 tree ref, iv;
6886 bool ok;
6887
6888 adjust_iv_update_pos (cand, use);
6889 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6890 gcc_assert (ok);
6891 unshare_aff_combination (&aff);
6892
6893 /* To avoid undefined overflow problems, all IV candidates use unsigned
6894 integer types. The drawback is that this makes it impossible for
6895 create_mem_ref to distinguish an IV that is based on a memory object
6896 from one that represents simply an offset.
6897
6898 To work around this problem, we pass a hint to create_mem_ref that
6899 indicates which variable (if any) in aff is an IV based on a memory
6900 object. Note that we only consider the candidate. If this is not
6901 based on an object, the base of the reference is in some subexpression
6902 of the use -- but these will use pointer types, so they are recognized
6903 by the create_mem_ref heuristics anyway. */
6904 if (cand->iv->base_object)
6905 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6906
6907 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6908 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6909 reference_alias_ptr_type (*use->op_p),
6910 iv, base_hint, data->speed);
6911 copy_ref_info (ref, *use->op_p);
6912 *use->op_p = ref;
6913 }
6914
6915 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6916 first use of a group, rewrites sub uses in the group too. */
6917
6918 static void
6919 rewrite_use_address (struct ivopts_data *data,
6920 struct iv_use *use, struct iv_cand *cand)
6921 {
6922 struct iv_use *next;
6923
6924 gcc_assert (use->sub_id == 0);
6925 rewrite_use_address_1 (data, use, cand);
6926 update_stmt (use->stmt);
6927
6928 for (next = use->next; next != NULL; next = next->next)
6929 {
6930 rewrite_use_address_1 (data, next, cand);
6931 update_stmt (next->stmt);
6932 }
6933
6934 return;
6935 }
6936
6937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6938 candidate CAND. */
6939
6940 static void
6941 rewrite_use_compare (struct ivopts_data *data,
6942 struct iv_use *use, struct iv_cand *cand)
6943 {
6944 tree comp, *var_p, op, bound;
6945 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6946 enum tree_code compare;
6947 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6948 bool ok;
6949
6950 bound = cp->value;
6951 if (bound)
6952 {
6953 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6954 tree var_type = TREE_TYPE (var);
6955 gimple_seq stmts;
6956
6957 if (dump_file && (dump_flags & TDF_DETAILS))
6958 {
6959 fprintf (dump_file, "Replacing exit test: ");
6960 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6961 }
6962 compare = cp->comp;
6963 bound = unshare_expr (fold_convert (var_type, bound));
6964 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6965 if (stmts)
6966 gsi_insert_seq_on_edge_immediate (
6967 loop_preheader_edge (data->current_loop),
6968 stmts);
6969
6970 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6971 gimple_cond_set_lhs (cond_stmt, var);
6972 gimple_cond_set_code (cond_stmt, compare);
6973 gimple_cond_set_rhs (cond_stmt, op);
6974 return;
6975 }
6976
6977 /* The induction variable elimination failed; just express the original
6978 giv. */
6979 comp = get_computation (data->current_loop, use, cand);
6980 gcc_assert (comp != NULL_TREE);
6981
6982 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6983 gcc_assert (ok);
6984
6985 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6986 true, GSI_SAME_STMT);
6987 }
6988
6989 /* Rewrites USE using candidate CAND. */
6990
6991 static void
6992 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6993 {
6994 switch (use->type)
6995 {
6996 case USE_NONLINEAR_EXPR:
6997 rewrite_use_nonlinear_expr (data, use, cand);
6998 break;
6999
7000 case USE_ADDRESS:
7001 rewrite_use_address (data, use, cand);
7002 break;
7003
7004 case USE_COMPARE:
7005 rewrite_use_compare (data, use, cand);
7006 break;
7007
7008 default:
7009 gcc_unreachable ();
7010 }
7011
7012 update_stmt (use->stmt);
7013 }
7014
7015 /* Rewrite the uses using the selected induction variables. */
7016
7017 static void
7018 rewrite_uses (struct ivopts_data *data)
7019 {
7020 unsigned i;
7021 struct iv_cand *cand;
7022 struct iv_use *use;
7023
7024 for (i = 0; i < n_iv_uses (data); i++)
7025 {
7026 use = iv_use (data, i);
7027 cand = use->selected;
7028 gcc_assert (cand);
7029
7030 rewrite_use (data, use, cand);
7031 }
7032 }
7033
7034 /* Removes the ivs that are not used after rewriting. */
7035
7036 static void
7037 remove_unused_ivs (struct ivopts_data *data)
7038 {
7039 unsigned j;
7040 bitmap_iterator bi;
7041 bitmap toremove = BITMAP_ALLOC (NULL);
7042
7043 /* Figure out an order in which to release SSA DEFs so that we don't
7044 release something that we'd have to propagate into a debug stmt
7045 afterwards. */
7046 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7047 {
7048 struct version_info *info;
7049
7050 info = ver_info (data, j);
7051 if (info->iv
7052 && !integer_zerop (info->iv->step)
7053 && !info->inv_id
7054 && !info->iv->have_use_for
7055 && !info->preserve_biv)
7056 {
7057 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7058
7059 tree def = info->iv->ssa_name;
7060
7061 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7062 {
7063 imm_use_iterator imm_iter;
7064 use_operand_p use_p;
7065 gimple stmt;
7066 int count = 0;
7067
7068 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7069 {
7070 if (!gimple_debug_bind_p (stmt))
7071 continue;
7072
7073 /* We just want to determine whether to do nothing
7074 (count == 0), to substitute the computed
7075 expression into a single use of the SSA DEF by
7076 itself (count == 1), or to use a debug temp
7077 because the SSA DEF is used multiple times or as
7078 part of a larger expression (count > 1). */
7079 count++;
7080 if (gimple_debug_bind_get_value (stmt) != def)
7081 count++;
7082
7083 if (count > 1)
7084 BREAK_FROM_IMM_USE_STMT (imm_iter);
7085 }
7086
7087 if (!count)
7088 continue;
7089
7090 struct iv_use dummy_use;
7091 struct iv_cand *best_cand = NULL, *cand;
7092 unsigned i, best_pref = 0, cand_pref;
7093
7094 memset (&dummy_use, 0, sizeof (dummy_use));
7095 dummy_use.iv = info->iv;
7096 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7097 {
7098 cand = iv_use (data, i)->selected;
7099 if (cand == best_cand)
7100 continue;
7101 cand_pref = operand_equal_p (cand->iv->step,
7102 info->iv->step, 0)
7103 ? 4 : 0;
7104 cand_pref
7105 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7106 == TYPE_MODE (TREE_TYPE (info->iv->base))
7107 ? 2 : 0;
7108 cand_pref
7109 += TREE_CODE (cand->iv->base) == INTEGER_CST
7110 ? 1 : 0;
7111 if (best_cand == NULL || best_pref < cand_pref)
7112 {
7113 best_cand = cand;
7114 best_pref = cand_pref;
7115 }
7116 }
7117
7118 if (!best_cand)
7119 continue;
7120
7121 tree comp = get_computation_at (data->current_loop,
7122 &dummy_use, best_cand,
7123 SSA_NAME_DEF_STMT (def));
7124 if (!comp)
7125 continue;
7126
7127 if (count > 1)
7128 {
7129 tree vexpr = make_node (DEBUG_EXPR_DECL);
7130 DECL_ARTIFICIAL (vexpr) = 1;
7131 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7132 if (SSA_NAME_VAR (def))
7133 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7134 else
7135 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7136 gdebug *def_temp
7137 = gimple_build_debug_bind (vexpr, comp, NULL);
7138 gimple_stmt_iterator gsi;
7139
7140 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7141 gsi = gsi_after_labels (gimple_bb
7142 (SSA_NAME_DEF_STMT (def)));
7143 else
7144 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7145
7146 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7147 comp = vexpr;
7148 }
7149
7150 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7151 {
7152 if (!gimple_debug_bind_p (stmt))
7153 continue;
7154
7155 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7156 SET_USE (use_p, comp);
7157
7158 update_stmt (stmt);
7159 }
7160 }
7161 }
7162 }
7163
7164 release_defs_bitset (toremove);
7165
7166 BITMAP_FREE (toremove);
7167 }
7168
7169 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7170 for hash_map::traverse. */
7171
7172 bool
7173 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7174 {
7175 free (value);
7176 return true;
7177 }
7178
7179 /* Frees data allocated by the optimization of a single loop. */
7180
7181 static void
7182 free_loop_data (struct ivopts_data *data)
7183 {
7184 unsigned i, j;
7185 bitmap_iterator bi;
7186 tree obj;
7187
7188 if (data->niters)
7189 {
7190 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7191 delete data->niters;
7192 data->niters = NULL;
7193 }
7194
7195 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7196 {
7197 struct version_info *info;
7198
7199 info = ver_info (data, i);
7200 free (info->iv);
7201 info->iv = NULL;
7202 info->has_nonlin_use = false;
7203 info->preserve_biv = false;
7204 info->inv_id = 0;
7205 }
7206 bitmap_clear (data->relevant);
7207 bitmap_clear (data->important_candidates);
7208
7209 for (i = 0; i < n_iv_uses (data); i++)
7210 {
7211 struct iv_use *use = iv_use (data, i);
7212 struct iv_use *pre = use, *sub = use->next;
7213
7214 while (sub)
7215 {
7216 gcc_assert (sub->related_cands == NULL);
7217 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7218
7219 free (sub->iv);
7220 pre = sub;
7221 sub = sub->next;
7222 free (pre);
7223 }
7224
7225 free (use->iv);
7226 BITMAP_FREE (use->related_cands);
7227 for (j = 0; j < use->n_map_members; j++)
7228 if (use->cost_map[j].depends_on)
7229 BITMAP_FREE (use->cost_map[j].depends_on);
7230 free (use->cost_map);
7231 free (use);
7232 }
7233 data->iv_uses.truncate (0);
7234
7235 for (i = 0; i < n_iv_cands (data); i++)
7236 {
7237 struct iv_cand *cand = iv_cand (data, i);
7238
7239 free (cand->iv);
7240 if (cand->depends_on)
7241 BITMAP_FREE (cand->depends_on);
7242 free (cand);
7243 }
7244 data->iv_candidates.truncate (0);
7245
7246 if (data->version_info_size < num_ssa_names)
7247 {
7248 data->version_info_size = 2 * num_ssa_names;
7249 free (data->version_info);
7250 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7251 }
7252
7253 data->max_inv_id = 0;
7254
7255 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7256 SET_DECL_RTL (obj, NULL_RTX);
7257
7258 decl_rtl_to_reset.truncate (0);
7259
7260 data->inv_expr_tab->empty ();
7261 data->inv_expr_id = 0;
7262 }
7263
7264 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7265 loop tree. */
7266
7267 static void
7268 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7269 {
7270 free_loop_data (data);
7271 free (data->version_info);
7272 BITMAP_FREE (data->relevant);
7273 BITMAP_FREE (data->important_candidates);
7274
7275 decl_rtl_to_reset.release ();
7276 data->iv_uses.release ();
7277 data->iv_candidates.release ();
7278 delete data->inv_expr_tab;
7279 data->inv_expr_tab = NULL;
7280 free_affine_expand_cache (&data->name_expansion_cache);
7281 }
7282
7283 /* Returns true if the loop body BODY includes any function calls. */
7284
7285 static bool
7286 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7287 {
7288 gimple_stmt_iterator gsi;
7289 unsigned i;
7290
7291 for (i = 0; i < num_nodes; i++)
7292 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7293 {
7294 gimple stmt = gsi_stmt (gsi);
7295 if (is_gimple_call (stmt)
7296 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7297 return true;
7298 }
7299 return false;
7300 }
7301
7302 /* Optimizes the LOOP. Returns true if anything changed. */
7303
7304 static bool
7305 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7306 {
7307 bool changed = false;
7308 struct iv_ca *iv_ca;
7309 edge exit = single_dom_exit (loop);
7310 basic_block *body;
7311
7312 gcc_assert (!data->niters);
7313 data->current_loop = loop;
7314 data->loop_loc = find_loop_location (loop);
7315 data->speed = optimize_loop_for_speed_p (loop);
7316
7317 if (dump_file && (dump_flags & TDF_DETAILS))
7318 {
7319 fprintf (dump_file, "Processing loop %d", loop->num);
7320 if (data->loop_loc != UNKNOWN_LOCATION)
7321 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7322 LOCATION_LINE (data->loop_loc));
7323 fprintf (dump_file, "\n");
7324
7325 if (exit)
7326 {
7327 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7328 exit->src->index, exit->dest->index);
7329 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7330 fprintf (dump_file, "\n");
7331 }
7332
7333 fprintf (dump_file, "\n");
7334 }
7335
7336 body = get_loop_body (loop);
7337 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7338 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7339 free (body);
7340
7341 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7342
7343 /* For each ssa name determines whether it behaves as an induction variable
7344 in some loop. */
7345 if (!find_induction_variables (data))
7346 goto finish;
7347
7348 /* Finds interesting uses (item 1). */
7349 find_interesting_uses (data);
7350 group_address_uses (data);
7351 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7352 goto finish;
7353
7354 /* Finds candidates for the induction variables (item 2). */
7355 find_iv_candidates (data);
7356
7357 /* Calculates the costs (item 3, part 1). */
7358 determine_iv_costs (data);
7359 determine_use_iv_costs (data);
7360 determine_set_costs (data);
7361
7362 /* Find the optimal set of induction variables (item 3, part 2). */
7363 iv_ca = find_optimal_iv_set (data);
7364 if (!iv_ca)
7365 goto finish;
7366 changed = true;
7367
7368 /* Create the new induction variables (item 4, part 1). */
7369 create_new_ivs (data, iv_ca);
7370 iv_ca_free (&iv_ca);
7371
7372 /* Rewrite the uses (item 4, part 2). */
7373 rewrite_uses (data);
7374
7375 /* Remove the ivs that are unused after rewriting. */
7376 remove_unused_ivs (data);
7377
7378 /* We have changed the structure of induction variables; it might happen
7379 that definitions in the scev database refer to some of them that were
7380 eliminated. */
7381 scev_reset ();
7382
7383 finish:
7384 free_loop_data (data);
7385
7386 return changed;
7387 }
7388
7389 /* Main entry point. Optimizes induction variables in loops. */
7390
7391 void
7392 tree_ssa_iv_optimize (void)
7393 {
7394 struct loop *loop;
7395 struct ivopts_data data;
7396
7397 tree_ssa_iv_optimize_init (&data);
7398
7399 /* Optimize the loops starting with the innermost ones. */
7400 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7401 {
7402 if (dump_file && (dump_flags & TDF_DETAILS))
7403 flow_loop_dump (loop, dump_file, NULL, 1);
7404
7405 tree_ssa_iv_optimize_loop (&data, loop);
7406 }
7407
7408 tree_ssa_iv_optimize_finalize (&data);
7409 }