re PR tree-optimization/69042 (Missed optimization in ivopts)
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
105
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109
110 /* The infinite cost. */
111 #define INFTY 10000000
112
113 #define AVG_LOOP_NITER(LOOP) 5
114
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
118
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
121 {
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
125
126 return niter;
127 }
128
129 /* Representation of the induction variable. */
130 struct iv
131 {
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
142 };
143
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
146 {
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
153 };
154
155 /* Types of uses. */
156 enum use_type
157 {
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
161 };
162
163 /* Cost of a computation. */
164 struct comp_cost
165 {
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
171 };
172
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
175
176 /* The candidate - cost pair. */
177 struct cost_pair
178 {
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
188 };
189
190 /* Use. */
191 struct iv_use
192 {
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
201
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
205
206 struct iv_cand *selected;
207 /* The selected candidate. */
208
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
248 };
249
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
252 {
253 tree base;
254 tree step;
255 /* IV uses from which this common candidate is derived. */
256 auto_vec<iv_use *> uses;
257 hashval_t hash;
258 };
259
260 /* Hashtable helpers. */
261
262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
263 {
264 static inline hashval_t hash (const iv_common_cand *);
265 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
266 };
267
268 /* Hash function for possible common candidates. */
269
270 inline hashval_t
271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
272 {
273 return ccand->hash;
274 }
275
276 /* Hash table equality function for common candidates. */
277
278 inline bool
279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
280 const iv_common_cand *ccand2)
281 {
282 return (ccand1->hash == ccand2->hash
283 && operand_equal_p (ccand1->base, ccand2->base, 0)
284 && operand_equal_p (ccand1->step, ccand2->step, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
286 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
287 }
288
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
291 {
292 tree expr;
293 int id;
294 hashval_t hash;
295 };
296
297 /* Hashtable helpers. */
298
299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
300 {
301 static inline hashval_t hash (const iv_inv_expr_ent *);
302 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
303 };
304
305 /* Hash function for loop invariant expressions. */
306
307 inline hashval_t
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
309 {
310 return expr->hash;
311 }
312
313 /* Hash table equality function for expressions. */
314
315 inline bool
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
317 const iv_inv_expr_ent *expr2)
318 {
319 return expr1->hash == expr2->hash
320 && operand_equal_p (expr1->expr, expr2->expr, 0);
321 }
322
323 struct ivopts_data
324 {
325 /* The currently optimized loop. */
326 struct loop *current_loop;
327 source_location loop_loc;
328
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map<edge, tree_niter_desc *> *niters;
331
332 /* Number of registers used in it. */
333 unsigned regs_used;
334
335 /* The size of version_info array allocated. */
336 unsigned version_info_size;
337
338 /* The array of information for the ssa names. */
339 struct version_info *version_info;
340
341 /* The hashtable of loop invariant expressions created
342 by ivopt. */
343 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
344
345 /* Loop invariant expression id. */
346 int inv_expr_id;
347
348 /* The bitmap of indices in version_info whose value was changed. */
349 bitmap relevant;
350
351 /* The uses of induction variables. */
352 vec<iv_use *> iv_uses;
353
354 /* The candidates. */
355 vec<iv_cand *> iv_candidates;
356
357 /* A bitmap of important candidates. */
358 bitmap important_candidates;
359
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map<tree, name_expansion *> *name_expansion_cache;
362
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
365
366 /* The common candidates. */
367 vec<iv_common_cand *> iv_common_cands;
368
369 /* The maximum invariant id. */
370 unsigned max_inv_id;
371
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr;
374
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack;
377
378 /* Whether to consider just related and important candidates when replacing a
379 use. */
380 bool consider_all_candidates;
381
382 /* Are we optimizing for speed? */
383 bool speed;
384
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call;
387
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p;
390 };
391
392 /* An assignment of iv candidates to uses. */
393
394 struct iv_ca
395 {
396 /* The number of uses covered by the assignment. */
397 unsigned upto;
398
399 /* Number of uses that cannot be expressed by the candidates in the set. */
400 unsigned bad_uses;
401
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair **cand_for_use;
404
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses;
407
408 /* The candidates used. */
409 bitmap cands;
410
411 /* The number of candidates in the set. */
412 unsigned n_cands;
413
414 /* Total number of registers needed. */
415 unsigned n_regs;
416
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost;
419
420 /* Total cost of candidates. */
421 unsigned cand_cost;
422
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses;
425
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr;
429
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr;
432
433 /* Total cost of the assignment. */
434 comp_cost cost;
435 };
436
437 /* Difference of two iv candidate assignments. */
438
439 struct iv_ca_delta
440 {
441 /* Changed use. */
442 struct iv_use *use;
443
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair *old_cp;
446
447 /* A new assignment. */
448 struct cost_pair *new_cp;
449
450 /* Next change in the list. */
451 struct iv_ca_delta *next_change;
452 };
453
454 /* Bound on number of candidates below that all candidates are considered. */
455
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
458
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
461
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
464
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
467
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
470
471 /* The list of trees for that the decl_rtl field must be reset is stored
472 here. */
473
474 static vec<tree> decl_rtl_to_reset;
475
476 static comp_cost force_expr_to_var_cost (tree, bool);
477
478 /* Number of uses recorded in DATA. */
479
480 static inline unsigned
481 n_iv_uses (struct ivopts_data *data)
482 {
483 return data->iv_uses.length ();
484 }
485
486 /* Ith use recorded in DATA. */
487
488 static inline struct iv_use *
489 iv_use (struct ivopts_data *data, unsigned i)
490 {
491 return data->iv_uses[i];
492 }
493
494 /* Number of candidates recorded in DATA. */
495
496 static inline unsigned
497 n_iv_cands (struct ivopts_data *data)
498 {
499 return data->iv_candidates.length ();
500 }
501
502 /* Ith candidate recorded in DATA. */
503
504 static inline struct iv_cand *
505 iv_cand (struct ivopts_data *data, unsigned i)
506 {
507 return data->iv_candidates[i];
508 }
509
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
511
512 edge
513 single_dom_exit (struct loop *loop)
514 {
515 edge exit = single_exit (loop);
516
517 if (!exit)
518 return NULL;
519
520 if (!just_once_each_iteration_p (loop, exit->src))
521 return NULL;
522
523 return exit;
524 }
525
526 /* Dumps information about the induction variable IV to FILE. */
527
528 void
529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
530 {
531 if (iv->ssa_name && dump_name)
532 {
533 fprintf (file, "ssa name ");
534 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
535 fprintf (file, "\n");
536 }
537
538 fprintf (file, " type ");
539 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
540 fprintf (file, "\n");
541
542 if (iv->step)
543 {
544 fprintf (file, " base ");
545 print_generic_expr (file, iv->base, TDF_SLIM);
546 fprintf (file, "\n");
547
548 fprintf (file, " step ");
549 print_generic_expr (file, iv->step, TDF_SLIM);
550 fprintf (file, "\n");
551 }
552 else
553 {
554 fprintf (file, " invariant ");
555 print_generic_expr (file, iv->base, TDF_SLIM);
556 fprintf (file, "\n");
557 }
558
559 if (iv->base_object)
560 {
561 fprintf (file, " base object ");
562 print_generic_expr (file, iv->base_object, TDF_SLIM);
563 fprintf (file, "\n");
564 }
565
566 if (iv->biv_p)
567 fprintf (file, " is a biv\n");
568
569 if (iv->no_overflow)
570 fprintf (file, " iv doesn't overflow wrto loop niter\n");
571 }
572
573 /* Dumps information about the USE to FILE. */
574
575 void
576 dump_use (FILE *file, struct iv_use *use)
577 {
578 fprintf (file, "use %d", use->id);
579 if (use->sub_id)
580 fprintf (file, ".%d", use->sub_id);
581
582 fprintf (file, "\n");
583
584 switch (use->type)
585 {
586 case USE_NONLINEAR_EXPR:
587 fprintf (file, " generic\n");
588 break;
589
590 case USE_ADDRESS:
591 fprintf (file, " address\n");
592 break;
593
594 case USE_COMPARE:
595 fprintf (file, " compare\n");
596 break;
597
598 default:
599 gcc_unreachable ();
600 }
601
602 fprintf (file, " in statement ");
603 print_gimple_stmt (file, use->stmt, 0, 0);
604 fprintf (file, "\n");
605
606 fprintf (file, " at position ");
607 if (use->op_p)
608 print_generic_expr (file, *use->op_p, TDF_SLIM);
609 fprintf (file, "\n");
610
611 dump_iv (file, use->iv, false);
612
613 if (use->related_cands)
614 {
615 fprintf (file, " related candidates ");
616 dump_bitmap (file, use->related_cands);
617 }
618 }
619
620 /* Dumps information about the uses to FILE. */
621
622 void
623 dump_uses (FILE *file, struct ivopts_data *data)
624 {
625 unsigned i;
626 struct iv_use *use;
627
628 for (i = 0; i < n_iv_uses (data); i++)
629 {
630 use = iv_use (data, i);
631 do
632 {
633 dump_use (file, use);
634 use = use->next;
635 }
636 while (use);
637 fprintf (file, "\n");
638 }
639 }
640
641 /* Dumps information about induction variable candidate CAND to FILE. */
642
643 void
644 dump_cand (FILE *file, struct iv_cand *cand)
645 {
646 struct iv *iv = cand->iv;
647
648 fprintf (file, "candidate %d%s\n",
649 cand->id, cand->important ? " (important)" : "");
650
651 if (cand->depends_on)
652 {
653 fprintf (file, " depends on ");
654 dump_bitmap (file, cand->depends_on);
655 }
656
657 if (!iv)
658 {
659 fprintf (file, " final value replacement\n");
660 return;
661 }
662
663 if (cand->var_before)
664 {
665 fprintf (file, " var_before ");
666 print_generic_expr (file, cand->var_before, TDF_SLIM);
667 fprintf (file, "\n");
668 }
669 if (cand->var_after)
670 {
671 fprintf (file, " var_after ");
672 print_generic_expr (file, cand->var_after, TDF_SLIM);
673 fprintf (file, "\n");
674 }
675
676 switch (cand->pos)
677 {
678 case IP_NORMAL:
679 fprintf (file, " incremented before exit test\n");
680 break;
681
682 case IP_BEFORE_USE:
683 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
684 break;
685
686 case IP_AFTER_USE:
687 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
688 break;
689
690 case IP_END:
691 fprintf (file, " incremented at end\n");
692 break;
693
694 case IP_ORIGINAL:
695 fprintf (file, " original biv\n");
696 break;
697 }
698
699 dump_iv (file, iv, false);
700 }
701
702 /* Returns the info for ssa version VER. */
703
704 static inline struct version_info *
705 ver_info (struct ivopts_data *data, unsigned ver)
706 {
707 return data->version_info + ver;
708 }
709
710 /* Returns the info for ssa name NAME. */
711
712 static inline struct version_info *
713 name_info (struct ivopts_data *data, tree name)
714 {
715 return ver_info (data, SSA_NAME_VERSION (name));
716 }
717
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
719 emitted in LOOP. */
720
721 static bool
722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
723 {
724 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
725
726 gcc_assert (bb);
727
728 if (sbb == loop->latch)
729 return true;
730
731 if (sbb != bb)
732 return false;
733
734 return stmt == last_stmt (bb);
735 }
736
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
740
741 static bool
742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
743 {
744 basic_block cand_bb = gimple_bb (cand->incremented_at);
745 basic_block stmt_bb = gimple_bb (stmt);
746
747 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
748 return false;
749
750 if (stmt_bb != cand_bb)
751 return true;
752
753 if (true_if_equal
754 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
755 return true;
756 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
757 }
758
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
761
762 static bool
763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
764 {
765 switch (cand->pos)
766 {
767 case IP_END:
768 return false;
769
770 case IP_NORMAL:
771 return stmt_after_ip_normal_pos (loop, stmt);
772
773 case IP_ORIGINAL:
774 case IP_AFTER_USE:
775 return stmt_after_inc_pos (cand, stmt, false);
776
777 case IP_BEFORE_USE:
778 return stmt_after_inc_pos (cand, stmt, true);
779
780 default:
781 gcc_unreachable ();
782 }
783 }
784
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
786
787 static bool
788 abnormal_ssa_name_p (tree exp)
789 {
790 if (!exp)
791 return false;
792
793 if (TREE_CODE (exp) != SSA_NAME)
794 return false;
795
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
797 }
798
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
801
802 static bool
803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
804 void *data ATTRIBUTE_UNUSED)
805 {
806 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
807 {
808 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
809 return false;
810 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
811 return false;
812 }
813
814 return !abnormal_ssa_name_p (*index);
815 }
816
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
819
820 bool
821 contains_abnormal_ssa_name_p (tree expr)
822 {
823 enum tree_code code;
824 enum tree_code_class codeclass;
825
826 if (!expr)
827 return false;
828
829 code = TREE_CODE (expr);
830 codeclass = TREE_CODE_CLASS (code);
831
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
834
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
838
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
843
844 if (code == COND_EXPR)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
848
849 switch (codeclass)
850 {
851 case tcc_binary:
852 case tcc_comparison:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
854 return true;
855
856 /* Fallthru. */
857 case tcc_unary:
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
859 return true;
860
861 break;
862
863 default:
864 gcc_unreachable ();
865 }
866
867 return false;
868 }
869
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
872
873 static struct tree_niter_desc *
874 niter_for_exit (struct ivopts_data *data, edge exit)
875 {
876 struct tree_niter_desc *desc;
877 tree_niter_desc **slot;
878
879 if (!data->niters)
880 {
881 data->niters = new hash_map<edge, tree_niter_desc *>;
882 slot = NULL;
883 }
884 else
885 slot = data->niters->get (exit);
886
887 if (!slot)
888 {
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc = XNEW (struct tree_niter_desc);
893 if (!number_of_iterations_exit (data->current_loop,
894 exit, desc, true)
895 || contains_abnormal_ssa_name_p (desc->niter))
896 {
897 XDELETE (desc);
898 desc = NULL;
899 }
900 data->niters->put (exit, desc);
901 }
902 else
903 desc = *slot;
904
905 return desc;
906 }
907
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
910 goes wrong. */
911
912 static struct tree_niter_desc *
913 niter_for_single_dom_exit (struct ivopts_data *data)
914 {
915 edge exit = single_dom_exit (data->current_loop);
916
917 if (!exit)
918 return NULL;
919
920 return niter_for_exit (data, exit);
921 }
922
923 /* Initializes data structures used by the iv optimization pass, stored
924 in DATA. */
925
926 static void
927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
928 {
929 data->version_info_size = 2 * num_ssa_names;
930 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
931 data->relevant = BITMAP_ALLOC (NULL);
932 data->important_candidates = BITMAP_ALLOC (NULL);
933 data->max_inv_id = 0;
934 data->niters = NULL;
935 data->iv_uses.create (20);
936 data->iv_candidates.create (20);
937 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
938 data->inv_expr_id = 0;
939 data->name_expansion_cache = NULL;
940 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
941 data->iv_common_cands.create (20);
942 decl_rtl_to_reset.create (20);
943 gcc_obstack_init (&data->iv_obstack);
944 }
945
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
948
949 static tree
950 determine_base_object (tree expr)
951 {
952 enum tree_code code = TREE_CODE (expr);
953 tree base, obj;
954
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr))
959 return determine_base_object (TREE_OPERAND (expr, 0));
960
961 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
962 return NULL_TREE;
963
964 switch (code)
965 {
966 case INTEGER_CST:
967 return NULL_TREE;
968
969 case ADDR_EXPR:
970 obj = TREE_OPERAND (expr, 0);
971 base = get_base_address (obj);
972
973 if (!base)
974 return expr;
975
976 if (TREE_CODE (base) == MEM_REF)
977 return determine_base_object (TREE_OPERAND (base, 0));
978
979 return fold_convert (ptr_type_node,
980 build_fold_addr_expr (base));
981
982 case POINTER_PLUS_EXPR:
983 return determine_base_object (TREE_OPERAND (expr, 0));
984
985 case PLUS_EXPR:
986 case MINUS_EXPR:
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
988 gcc_unreachable ();
989
990 default:
991 return fold_convert (ptr_type_node, expr);
992 }
993 }
994
995 /* Return true if address expression with non-DECL_P operand appears
996 in EXPR. */
997
998 static bool
999 contain_complex_addr_expr (tree expr)
1000 {
1001 bool res = false;
1002
1003 STRIP_NOPS (expr);
1004 switch (TREE_CODE (expr))
1005 {
1006 case POINTER_PLUS_EXPR:
1007 case PLUS_EXPR:
1008 case MINUS_EXPR:
1009 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011 break;
1012
1013 case ADDR_EXPR:
1014 return (!DECL_P (TREE_OPERAND (expr, 0)));
1015
1016 default:
1017 return false;
1018 }
1019
1020 return res;
1021 }
1022
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1025
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028 bool no_overflow = false)
1029 {
1030 tree expr = base;
1031 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032 sizeof (struct iv));
1033 gcc_assert (step != NULL_TREE);
1034
1035 /* Lower address expression in base except ones with DECL_P as operand.
1036 By doing this:
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1040 STRIP_NOPS (expr);
1041 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042 || contain_complex_addr_expr (expr))
1043 {
1044 aff_tree comb;
1045 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1047 }
1048
1049 iv->base = base;
1050 iv->base_object = determine_base_object (base);
1051 iv->step = step;
1052 iv->biv_p = false;
1053 iv->have_use_for = false;
1054 iv->use_id = 0;
1055 iv->ssa_name = NULL_TREE;
1056 iv->no_overflow = no_overflow;
1057 iv->have_address_use = false;
1058
1059 return iv;
1060 }
1061
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1064
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067 bool no_overflow)
1068 {
1069 struct version_info *info = name_info (data, iv);
1070
1071 gcc_assert (!info->iv);
1072
1073 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074 info->iv = alloc_iv (data, base, step, no_overflow);
1075 info->iv->ssa_name = iv;
1076 }
1077
1078 /* Finds induction variable declaration for VAR. */
1079
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1082 {
1083 basic_block bb;
1084 tree type = TREE_TYPE (var);
1085
1086 if (!POINTER_TYPE_P (type)
1087 && !INTEGRAL_TYPE_P (type))
1088 return NULL;
1089
1090 if (!name_info (data, var)->iv)
1091 {
1092 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1093
1094 if (!bb
1095 || !flow_bb_inside_loop_p (data->current_loop, bb))
1096 set_iv (data, var, var, build_int_cst (type, 0), true);
1097 }
1098
1099 return name_info (data, var)->iv;
1100 }
1101
1102 /* Return the first non-invariant ssa var found in EXPR. */
1103
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1106 {
1107 int i, n;
1108 tree tmp;
1109 enum tree_code code;
1110
1111 if (!expr || is_gimple_min_invariant (expr))
1112 return NULL;
1113
1114 code = TREE_CODE (expr);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1116 {
1117 n = TREE_OPERAND_LENGTH (expr);
1118 for (i = 0; i < n; i++)
1119 {
1120 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1121
1122 if (tmp)
1123 return tmp;
1124 }
1125 }
1126 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1127 }
1128
1129 /* Finds basic ivs. */
1130
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1133 {
1134 gphi *phi;
1135 affine_iv iv;
1136 tree step, type, base, stop;
1137 bool found = false;
1138 struct loop *loop = data->current_loop;
1139 gphi_iterator psi;
1140
1141 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1142 {
1143 phi = psi.phi ();
1144
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146 continue;
1147
1148 if (virtual_operand_p (PHI_RESULT (phi)))
1149 continue;
1150
1151 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152 continue;
1153
1154 if (integer_zerop (iv.step))
1155 continue;
1156
1157 step = iv.step;
1158 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1162
1163 See PR64705 for the rationale. */
1164 stop = extract_single_var_from_expr (step);
1165 base = expand_simple_operations (base, stop);
1166 if (contains_abnormal_ssa_name_p (base)
1167 || contains_abnormal_ssa_name_p (step))
1168 continue;
1169
1170 type = TREE_TYPE (PHI_RESULT (phi));
1171 base = fold_convert (type, base);
1172 if (step)
1173 {
1174 if (POINTER_TYPE_P (type))
1175 step = convert_to_ptrofftype (step);
1176 else
1177 step = fold_convert (type, step);
1178 }
1179
1180 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181 found = true;
1182 }
1183
1184 return found;
1185 }
1186
1187 /* Marks basic ivs. */
1188
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1191 {
1192 gphi *phi;
1193 gimple *def;
1194 tree var;
1195 struct iv *iv, *incr_iv;
1196 struct loop *loop = data->current_loop;
1197 basic_block incr_bb;
1198 gphi_iterator psi;
1199
1200 data->bivs_not_used_in_addr = 0;
1201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1202 {
1203 phi = psi.phi ();
1204
1205 iv = get_iv (data, PHI_RESULT (phi));
1206 if (!iv)
1207 continue;
1208
1209 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210 def = SSA_NAME_DEF_STMT (var);
1211 /* Don't mark iv peeled from other one as biv. */
1212 if (def
1213 && gimple_code (def) == GIMPLE_PHI
1214 && gimple_bb (def) == loop->header)
1215 continue;
1216
1217 incr_iv = get_iv (data, var);
1218 if (!incr_iv)
1219 continue;
1220
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223 if (incr_bb->loop_father != data->current_loop
1224 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225 continue;
1226
1227 iv->biv_p = true;
1228 incr_iv->biv_p = true;
1229 if (iv->no_overflow)
1230 data->bivs_not_used_in_addr++;
1231 if (incr_iv->no_overflow)
1232 data->bivs_not_used_in_addr++;
1233 }
1234 }
1235
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1238
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1241 {
1242 tree lhs, stop;
1243 struct loop *loop = data->current_loop;
1244
1245 iv->base = NULL_TREE;
1246 iv->step = NULL_TREE;
1247
1248 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249 return false;
1250
1251 lhs = gimple_assign_lhs (stmt);
1252 if (TREE_CODE (lhs) != SSA_NAME)
1253 return false;
1254
1255 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256 return false;
1257
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1261
1262 See PR64705 for the rationale. */
1263 stop = extract_single_var_from_expr (iv->step);
1264 iv->base = expand_simple_operations (iv->base, stop);
1265 if (contains_abnormal_ssa_name_p (iv->base)
1266 || contains_abnormal_ssa_name_p (iv->step))
1267 return false;
1268
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt))
1273 return false;
1274
1275 return true;
1276 }
1277
1278 /* Finds general ivs in statement STMT. */
1279
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1282 {
1283 affine_iv iv;
1284
1285 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286 return;
1287
1288 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1289 }
1290
1291 /* Finds general ivs in basic block BB. */
1292
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1295 {
1296 gimple_stmt_iterator bsi;
1297
1298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299 find_givs_in_stmt (data, gsi_stmt (bsi));
1300 }
1301
1302 /* Finds general ivs. */
1303
1304 static void
1305 find_givs (struct ivopts_data *data)
1306 {
1307 struct loop *loop = data->current_loop;
1308 basic_block *body = get_loop_body_in_dom_order (loop);
1309 unsigned i;
1310
1311 for (i = 0; i < loop->num_nodes; i++)
1312 find_givs_in_bb (data, body[i]);
1313 free (body);
1314 }
1315
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1318
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1321 {
1322 unsigned i;
1323 bitmap_iterator bi;
1324
1325 if (!find_bivs (data))
1326 return false;
1327
1328 find_givs (data);
1329 mark_bivs (data);
1330
1331 if (dump_file && (dump_flags & TDF_DETAILS))
1332 {
1333 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1334
1335 if (niter)
1336 {
1337 fprintf (dump_file, " number of iterations ");
1338 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339 if (!integer_zerop (niter->may_be_zero))
1340 {
1341 fprintf (dump_file, "; zero if ");
1342 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1343 }
1344 fprintf (dump_file, "\n\n");
1345 };
1346
1347 fprintf (dump_file, "Induction variables:\n\n");
1348
1349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1350 {
1351 if (ver_info (data, i)->iv)
1352 dump_iv (dump_file, ver_info (data, i)->iv, true);
1353 }
1354 }
1355
1356 return true;
1357 }
1358
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1363
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367 unsigned HOST_WIDE_INT addr_offset = 0)
1368 {
1369 struct iv_use *use = XCNEW (struct iv_use);
1370
1371 use->id = n_iv_uses (data);
1372 use->sub_id = 0;
1373 use->type = use_type;
1374 use->iv = iv;
1375 use->stmt = stmt;
1376 use->op_p = use_p;
1377 use->related_cands = BITMAP_ALLOC (NULL);
1378 use->next = NULL;
1379 use->addr_base = addr_base;
1380 use->addr_offset = addr_offset;
1381
1382 data->iv_uses.safe_push (use);
1383
1384 return use;
1385 }
1386
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1389
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392 struct iv *iv, gimple *stmt, enum use_type use_type,
1393 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394 unsigned int id_group)
1395 {
1396 struct iv_use *use = XCNEW (struct iv_use);
1397 struct iv_use *group = iv_use (data, id_group);
1398
1399 use->id = group->id;
1400 use->sub_id = 0;
1401 use->type = use_type;
1402 use->iv = iv;
1403 use->stmt = stmt;
1404 use->op_p = use_p;
1405 use->related_cands = NULL;
1406 use->addr_base = addr_base;
1407 use->addr_offset = addr_offset;
1408
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset <= group->addr_offset)
1411 {
1412 use->related_cands = group->related_cands;
1413 group->related_cands = NULL;
1414 use->next = group;
1415 data->iv_uses[id_group] = use;
1416 }
1417 else
1418 {
1419 struct iv_use *pre;
1420 do
1421 {
1422 pre = group;
1423 group = group->next;
1424 }
1425 while (group && addr_offset > group->addr_offset);
1426 use->next = pre->next;
1427 pre->next = use;
1428 }
1429
1430 return use;
1431 }
1432
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1436
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1439 {
1440 basic_block bb;
1441 struct version_info *info;
1442
1443 if (TREE_CODE (op) != SSA_NAME
1444 || virtual_operand_p (op))
1445 return;
1446
1447 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448 if (bb
1449 && flow_bb_inside_loop_p (data->current_loop, bb))
1450 return;
1451
1452 info = name_info (data, op);
1453 info->name = op;
1454 info->has_nonlin_use |= nonlinear_use;
1455 if (!info->inv_id)
1456 info->inv_id = ++data->max_inv_id;
1457 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1458 }
1459
1460 /* Checks whether the use OP is interesting and if so, records it. */
1461
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1464 {
1465 struct iv *iv;
1466 gimple *stmt;
1467 struct iv_use *use;
1468
1469 if (TREE_CODE (op) != SSA_NAME)
1470 return NULL;
1471
1472 iv = get_iv (data, op);
1473 if (!iv)
1474 return NULL;
1475
1476 if (iv->have_use_for)
1477 {
1478 use = iv_use (data, iv->use_id);
1479
1480 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481 return use;
1482 }
1483
1484 if (integer_zerop (iv->step))
1485 {
1486 record_invariant (data, op, true);
1487 return NULL;
1488 }
1489 iv->have_use_for = true;
1490
1491 stmt = SSA_NAME_DEF_STMT (op);
1492 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493 || is_gimple_assign (stmt));
1494
1495 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496 iv->use_id = use->id;
1497
1498 return use;
1499 }
1500
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1508
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511 tree **control_var, tree **bound,
1512 struct iv **iv_var, struct iv **iv_bound)
1513 {
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv;
1516 static tree zero;
1517 tree *op0 = &zero, *op1 = &zero;
1518 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519 bool ret = false;
1520
1521 if (gimple_code (stmt) == GIMPLE_COND)
1522 {
1523 gcond *cond_stmt = as_a <gcond *> (stmt);
1524 op0 = gimple_cond_lhs_ptr (cond_stmt);
1525 op1 = gimple_cond_rhs_ptr (cond_stmt);
1526 }
1527 else
1528 {
1529 op0 = gimple_assign_rhs1_ptr (stmt);
1530 op1 = gimple_assign_rhs2_ptr (stmt);
1531 }
1532
1533 zero = integer_zero_node;
1534 const_iv.step = integer_zero_node;
1535
1536 if (TREE_CODE (*op0) == SSA_NAME)
1537 iv0 = get_iv (data, *op0);
1538 if (TREE_CODE (*op1) == SSA_NAME)
1539 iv1 = get_iv (data, *op1);
1540
1541 /* Exactly one of the compared values must be an iv, and the other one must
1542 be an invariant. */
1543 if (!iv0 || !iv1)
1544 goto end;
1545
1546 if (integer_zerop (iv0->step))
1547 {
1548 /* Control variable may be on the other side. */
1549 std::swap (op0, op1);
1550 std::swap (iv0, iv1);
1551 }
1552 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1553
1554 end:
1555 if (control_var)
1556 *control_var = op0;
1557 if (iv_var)
1558 *iv_var = iv0;
1559 if (bound)
1560 *bound = op1;
1561 if (iv_bound)
1562 *iv_bound = iv1;
1563
1564 return ret;
1565 }
1566
1567 /* Checks whether the condition in STMT is interesting and if so,
1568 records it. */
1569
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1572 {
1573 tree *var_p, *bound_p;
1574 struct iv *var_iv;
1575
1576 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1577 {
1578 find_interesting_uses_op (data, *var_p);
1579 find_interesting_uses_op (data, *bound_p);
1580 return;
1581 }
1582
1583 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1584 }
1585
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1590
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1593 {
1594 basic_block def_bb;
1595 unsigned i, len;
1596
1597 if (is_gimple_min_invariant (expr))
1598 return current_loops->tree_root;
1599
1600 if (TREE_CODE (expr) == SSA_NAME)
1601 {
1602 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603 if (def_bb)
1604 {
1605 if (flow_bb_inside_loop_p (loop, def_bb))
1606 return NULL;
1607 return superloop_at_depth (loop,
1608 loop_depth (def_bb->loop_father) + 1);
1609 }
1610
1611 return current_loops->tree_root;
1612 }
1613
1614 if (!EXPR_P (expr))
1615 return NULL;
1616
1617 unsigned maxdepth = 0;
1618 len = TREE_OPERAND_LENGTH (expr);
1619 for (i = 0; i < len; i++)
1620 {
1621 struct loop *ivloop;
1622 if (!TREE_OPERAND (expr, i))
1623 continue;
1624
1625 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626 if (!ivloop)
1627 return NULL;
1628 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1629 }
1630
1631 return superloop_at_depth (loop, maxdepth);
1632 }
1633
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1637
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1640 {
1641 basic_block def_bb;
1642 unsigned i, len;
1643
1644 gcc_assert (loop_depth (loop) > 0);
1645
1646 if (is_gimple_min_invariant (expr))
1647 return true;
1648
1649 if (TREE_CODE (expr) == SSA_NAME)
1650 {
1651 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652 if (def_bb
1653 && flow_bb_inside_loop_p (loop, def_bb))
1654 return false;
1655
1656 return true;
1657 }
1658
1659 if (!EXPR_P (expr))
1660 return false;
1661
1662 len = TREE_OPERAND_LENGTH (expr);
1663 for (i = 0; i < len; i++)
1664 if (TREE_OPERAND (expr, i)
1665 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666 return false;
1667
1668 return true;
1669 }
1670
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1674
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1677 {
1678 struct iv *iv;
1679 unsigned i, n;
1680 tree e2, e1;
1681 enum tree_code code;
1682 gimple *stmt;
1683
1684 if (expr == NULL_TREE)
1685 return NULL;
1686
1687 if (is_gimple_min_invariant (expr))
1688 return NULL;
1689
1690 code = TREE_CODE (expr);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1692 {
1693 n = TREE_OPERAND_LENGTH (expr);
1694 for (i = 0; i < n; i++)
1695 {
1696 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697 if (iv)
1698 return iv;
1699 }
1700 }
1701
1702 /* Stop if it's not ssa name. */
1703 if (code != SSA_NAME)
1704 return NULL;
1705
1706 iv = get_iv (data, expr);
1707 if (!iv || integer_zerop (iv->step))
1708 return NULL;
1709 else if (iv->biv_p)
1710 return iv;
1711
1712 stmt = SSA_NAME_DEF_STMT (expr);
1713 if (gphi *phi = dyn_cast <gphi *> (stmt))
1714 {
1715 ssa_op_iter iter;
1716 use_operand_p use_p;
1717
1718 if (virtual_operand_p (gimple_phi_result (phi)))
1719 return NULL;
1720
1721 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1722 {
1723 tree use = USE_FROM_PTR (use_p);
1724 iv = find_deriving_biv_for_expr (data, use);
1725 if (iv)
1726 return iv;
1727 }
1728 return NULL;
1729 }
1730 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731 return NULL;
1732
1733 e1 = gimple_assign_rhs1 (stmt);
1734 code = gimple_assign_rhs_code (stmt);
1735 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736 return find_deriving_biv_for_expr (data, e1);
1737
1738 switch (code)
1739 {
1740 case MULT_EXPR:
1741 case PLUS_EXPR:
1742 case MINUS_EXPR:
1743 case POINTER_PLUS_EXPR:
1744 /* Increments, decrements and multiplications by a constant
1745 are simple. */
1746 e2 = gimple_assign_rhs2 (stmt);
1747 iv = find_deriving_biv_for_expr (data, e2);
1748 if (iv)
1749 return iv;
1750
1751 /* Fallthru. */
1752 CASE_CONVERT:
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data, e1);
1755
1756 default:
1757 break;
1758 }
1759
1760 return NULL;
1761 }
1762
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1765
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1768 {
1769 unsigned i;
1770 tree type, base_1, base_2;
1771 bitmap_iterator bi;
1772
1773 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774 || biv->have_address_use || !biv->no_overflow)
1775 return;
1776
1777 type = TREE_TYPE (biv->base);
1778 if (!INTEGRAL_TYPE_P (type))
1779 return;
1780
1781 biv->have_address_use = true;
1782 data->bivs_not_used_in_addr--;
1783 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1785 {
1786 struct iv *iv = ver_info (data, i)->iv;
1787
1788 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789 || iv->have_address_use || !iv->no_overflow)
1790 continue;
1791
1792 if (type != TREE_TYPE (iv->base)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794 continue;
1795
1796 if (!operand_equal_p (biv->step, iv->step, 0))
1797 continue;
1798
1799 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800 if (operand_equal_p (base_1, iv->base, 0)
1801 || operand_equal_p (base_2, biv->base, 0))
1802 {
1803 iv->have_address_use = true;
1804 data->bivs_not_used_in_addr--;
1805 }
1806 }
1807 }
1808
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1812
1813 struct ifs_ivopts_data
1814 {
1815 struct ivopts_data *ivopts_data;
1816 gimple *stmt;
1817 tree step;
1818 };
1819
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1822 {
1823 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824 struct iv *iv;
1825 bool use_overflow_semantics = false;
1826 tree step, iv_base, iv_step, lbound, off;
1827 struct loop *loop = dta->ivopts_data->current_loop;
1828
1829 /* If base is a component ref, require that the offset of the reference
1830 be invariant. */
1831 if (TREE_CODE (base) == COMPONENT_REF)
1832 {
1833 off = component_ref_field_offset (base);
1834 return expr_invariant_in_loop_p (loop, off);
1835 }
1836
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1842 {
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846 return false;
1847
1848 step = array_ref_element_size (base);
1849 lbound = array_ref_low_bound (base);
1850
1851 if (!expr_invariant_in_loop_p (loop, step)
1852 || !expr_invariant_in_loop_p (loop, lbound))
1853 return false;
1854 }
1855
1856 if (TREE_CODE (*idx) != SSA_NAME)
1857 return true;
1858
1859 iv = get_iv (dta->ivopts_data, *idx);
1860 if (!iv)
1861 return false;
1862
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1866 *idx = iv->base;
1867
1868 if (integer_zerop (iv->step))
1869 return true;
1870
1871 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1872 {
1873 step = array_ref_element_size (base);
1874
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step) != INTEGER_CST)
1877 return false;
1878 }
1879 else
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step = size_one_node;
1882
1883 iv_base = iv->base;
1884 iv_step = iv->step;
1885 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886 use_overflow_semantics = true;
1887
1888 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889 sizetype, &iv_base, &iv_step, dta->stmt,
1890 use_overflow_semantics))
1891 {
1892 /* The index might wrap. */
1893 return false;
1894 }
1895
1896 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1898
1899 if (dta->ivopts_data->bivs_not_used_in_addr)
1900 {
1901 if (!iv->biv_p)
1902 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1903
1904 record_biv_for_address_use (dta->ivopts_data, iv);
1905 }
1906 return true;
1907 }
1908
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1911
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914 void *vdata)
1915 {
1916 struct ivopts_data *data = (struct ivopts_data *) vdata;
1917 find_interesting_uses_op (data, *idx);
1918 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1919 {
1920 find_interesting_uses_op (data, array_ref_element_size (base));
1921 find_interesting_uses_op (data, array_ref_low_bound (base));
1922 }
1923 return true;
1924 }
1925
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1930
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1933 {
1934 tree mby;
1935 enum tree_code code;
1936 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937 widest_int res, p0, p1;
1938
1939 STRIP_NOPS (top);
1940 STRIP_NOPS (bot);
1941
1942 if (operand_equal_p (top, bot, 0))
1943 {
1944 *mul = 1;
1945 return true;
1946 }
1947
1948 code = TREE_CODE (top);
1949 switch (code)
1950 {
1951 case MULT_EXPR:
1952 mby = TREE_OPERAND (top, 1);
1953 if (TREE_CODE (mby) != INTEGER_CST)
1954 return false;
1955
1956 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957 return false;
1958
1959 *mul = wi::sext (res * wi::to_widest (mby), precision);
1960 return true;
1961
1962 case PLUS_EXPR:
1963 case MINUS_EXPR:
1964 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966 return false;
1967
1968 if (code == MINUS_EXPR)
1969 p1 = -p1;
1970 *mul = wi::sext (p0 + p1, precision);
1971 return true;
1972
1973 case INTEGER_CST:
1974 if (TREE_CODE (bot) != INTEGER_CST)
1975 return false;
1976
1977 p0 = widest_int::from (top, SIGNED);
1978 p1 = widest_int::from (bot, SIGNED);
1979 if (p1 == 0)
1980 return false;
1981 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982 return res == 0;
1983
1984 default:
1985 return false;
1986 }
1987 }
1988
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1990
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1993 {
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref) == TARGET_MEM_REF)
1997 return false;
1998
1999 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2002
2003 unsigned HOST_WIDE_INT bitpos;
2004 unsigned int ref_align;
2005 get_object_alignment_1 (ref, &ref_align, &bitpos);
2006 if (ref_align < align
2007 || (bitpos % align) != 0
2008 || (bitpos % BITS_PER_UNIT) != 0)
2009 return true;
2010
2011 unsigned int trailing_zeros = tree_ctz (step);
2012 if (trailing_zeros < HOST_BITS_PER_INT
2013 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014 return true;
2015
2016 return false;
2017 }
2018
2019 /* Return true if EXPR may be non-addressable. */
2020
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2023 {
2024 switch (TREE_CODE (expr))
2025 {
2026 case TARGET_MEM_REF:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2029 return false;
2030
2031 case MEM_REF:
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr);
2034
2035 case BIT_FIELD_REF:
2036 if (REF_REVERSE_STORAGE_ORDER (expr))
2037 return true;
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2039
2040 case COMPONENT_REF:
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042 return true;
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2045
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049 return true;
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2051
2052 case VIEW_CONVERT_EXPR:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059 return true;
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2061
2062 CASE_CONVERT:
2063 return true;
2064
2065 default:
2066 break;
2067 }
2068
2069 return false;
2070 }
2071
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2074
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2079
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082 struct iv *iv, gimple *stmt, enum use_type use_type)
2083 {
2084 unsigned int i;
2085 struct iv_use *use;
2086 tree addr_base;
2087 unsigned HOST_WIDE_INT addr_offset;
2088
2089 /* Only support sub use for address type uses, that is, with base
2090 object. */
2091 if (!iv->base_object)
2092 return record_use (data, use_p, iv, stmt, use_type);
2093
2094 addr_base = strip_offset (iv->base, &addr_offset);
2095 for (i = 0; i < n_iv_uses (data); i++)
2096 {
2097 use = iv_use (data, i);
2098 if (use->type != USE_ADDRESS || !use->iv->base_object)
2099 continue;
2100
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103 && operand_equal_p (iv->step, use->iv->step, 0)
2104 && operand_equal_p (addr_base, use->addr_base, 0))
2105 break;
2106 }
2107
2108 if (i == n_iv_uses (data))
2109 return record_use (data, use_p, iv, stmt,
2110 use_type, addr_base, addr_offset);
2111 else
2112 return record_sub_use (data, use_p, iv, stmt,
2113 use_type, addr_base, addr_offset, i);
2114 }
2115
2116 /* Finds addresses in *OP_P inside STMT. */
2117
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120 tree *op_p)
2121 {
2122 tree base = *op_p, step = size_zero_node;
2123 struct iv *civ;
2124 struct ifs_ivopts_data ifs_ivopts_data;
2125
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt))
2129 goto fail;
2130
2131 /* Ignore bitfields for now. Not really something terribly complicated
2132 to handle. TODO. */
2133 if (TREE_CODE (base) == BIT_FIELD_REF)
2134 goto fail;
2135
2136 base = unshare_expr (base);
2137
2138 if (TREE_CODE (base) == TARGET_MEM_REF)
2139 {
2140 tree type = build_pointer_type (TREE_TYPE (base));
2141 tree astep;
2142
2143 if (TMR_BASE (base)
2144 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2145 {
2146 civ = get_iv (data, TMR_BASE (base));
2147 if (!civ)
2148 goto fail;
2149
2150 TMR_BASE (base) = civ->base;
2151 step = civ->step;
2152 }
2153 if (TMR_INDEX2 (base)
2154 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2155 {
2156 civ = get_iv (data, TMR_INDEX2 (base));
2157 if (!civ)
2158 goto fail;
2159
2160 TMR_INDEX2 (base) = civ->base;
2161 step = civ->step;
2162 }
2163 if (TMR_INDEX (base)
2164 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2165 {
2166 civ = get_iv (data, TMR_INDEX (base));
2167 if (!civ)
2168 goto fail;
2169
2170 TMR_INDEX (base) = civ->base;
2171 astep = civ->step;
2172
2173 if (astep)
2174 {
2175 if (TMR_STEP (base))
2176 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2177
2178 step = fold_build2 (PLUS_EXPR, type, step, astep);
2179 }
2180 }
2181
2182 if (integer_zerop (step))
2183 goto fail;
2184 base = tree_mem_ref_addr (type, base);
2185 }
2186 else
2187 {
2188 ifs_ivopts_data.ivopts_data = data;
2189 ifs_ivopts_data.stmt = stmt;
2190 ifs_ivopts_data.step = size_zero_node;
2191 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192 || integer_zerop (ifs_ivopts_data.step))
2193 goto fail;
2194 step = ifs_ivopts_data.step;
2195
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base))
2199 goto fail;
2200
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204 goto fail;
2205
2206 base = build_fold_addr_expr (base);
2207
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base) == ADDR_EXPR)
2211 {
2212 tree *ref = &TREE_OPERAND (base, 0);
2213 while (handled_component_p (*ref))
2214 ref = &TREE_OPERAND (*ref, 0);
2215 if (TREE_CODE (*ref) == MEM_REF)
2216 {
2217 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218 TREE_OPERAND (*ref, 0),
2219 TREE_OPERAND (*ref, 1));
2220 if (tem)
2221 *ref = tem;
2222 }
2223 }
2224 }
2225
2226 civ = alloc_iv (data, base, step);
2227 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228 return;
2229
2230 fail:
2231 for_each_index (op_p, idx_record_use, data);
2232 }
2233
2234 /* Finds and records invariants used in STMT. */
2235
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2238 {
2239 ssa_op_iter iter;
2240 use_operand_p use_p;
2241 tree op;
2242
2243 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2244 {
2245 op = USE_FROM_PTR (use_p);
2246 record_invariant (data, op, false);
2247 }
2248 }
2249
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2251
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2254 {
2255 struct iv *iv;
2256 tree op, *lhs, *rhs;
2257 ssa_op_iter iter;
2258 use_operand_p use_p;
2259 enum tree_code code;
2260
2261 find_invariants_stmt (data, stmt);
2262
2263 if (gimple_code (stmt) == GIMPLE_COND)
2264 {
2265 find_interesting_uses_cond (data, stmt);
2266 return;
2267 }
2268
2269 if (is_gimple_assign (stmt))
2270 {
2271 lhs = gimple_assign_lhs_ptr (stmt);
2272 rhs = gimple_assign_rhs1_ptr (stmt);
2273
2274 if (TREE_CODE (*lhs) == SSA_NAME)
2275 {
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2278
2279 iv = get_iv (data, *lhs);
2280
2281 if (iv && !integer_zerop (iv->step))
2282 return;
2283 }
2284
2285 code = gimple_assign_rhs_code (stmt);
2286 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs)
2288 || is_gimple_val (*rhs)))
2289 {
2290 if (REFERENCE_CLASS_P (*rhs))
2291 find_interesting_uses_address (data, stmt, rhs);
2292 else
2293 find_interesting_uses_op (data, *rhs);
2294
2295 if (REFERENCE_CLASS_P (*lhs))
2296 find_interesting_uses_address (data, stmt, lhs);
2297 return;
2298 }
2299 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2300 {
2301 find_interesting_uses_cond (data, stmt);
2302 return;
2303 }
2304
2305 /* TODO -- we should also handle address uses of type
2306
2307 memory = call (whatever);
2308
2309 and
2310
2311 call (memory). */
2312 }
2313
2314 if (gimple_code (stmt) == GIMPLE_PHI
2315 && gimple_bb (stmt) == data->current_loop->header)
2316 {
2317 iv = get_iv (data, PHI_RESULT (stmt));
2318
2319 if (iv && !integer_zerop (iv->step))
2320 return;
2321 }
2322
2323 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2324 {
2325 op = USE_FROM_PTR (use_p);
2326
2327 if (TREE_CODE (op) != SSA_NAME)
2328 continue;
2329
2330 iv = get_iv (data, op);
2331 if (!iv)
2332 continue;
2333
2334 find_interesting_uses_op (data, op);
2335 }
2336 }
2337
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2340
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2343 {
2344 gphi *phi;
2345 gphi_iterator psi;
2346 tree def;
2347
2348 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2349 {
2350 phi = psi.phi ();
2351 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352 if (!virtual_operand_p (def))
2353 find_interesting_uses_op (data, def);
2354 }
2355 }
2356
2357 /* Finds uses of the induction variables that are interesting. */
2358
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2361 {
2362 basic_block bb;
2363 gimple_stmt_iterator bsi;
2364 basic_block *body = get_loop_body (data->current_loop);
2365 unsigned i;
2366 struct version_info *info;
2367 edge e;
2368
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 fprintf (dump_file, "Uses:\n\n");
2371
2372 for (i = 0; i < data->current_loop->num_nodes; i++)
2373 {
2374 edge_iterator ei;
2375 bb = body[i];
2376
2377 FOR_EACH_EDGE (e, ei, bb->succs)
2378 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380 find_interesting_uses_outside (data, e);
2381
2382 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385 if (!is_gimple_debug (gsi_stmt (bsi)))
2386 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2387 }
2388
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 {
2391 bitmap_iterator bi;
2392
2393 fprintf (dump_file, "\n");
2394
2395 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2396 {
2397 info = ver_info (data, i);
2398 if (info->inv_id)
2399 {
2400 fprintf (dump_file, " ");
2401 print_generic_expr (dump_file, info->name, TDF_SLIM);
2402 fprintf (dump_file, " is invariant (%d)%s\n",
2403 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2404 }
2405 }
2406
2407 fprintf (dump_file, "\n");
2408 }
2409
2410 free (body);
2411 }
2412
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2415
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2418 {
2419 int width;
2420 rtx reg, addr;
2421 HOST_WIDE_INT i, off;
2422 unsigned list_index, num;
2423 addr_space_t as;
2424 machine_mode mem_mode, addr_mode;
2425 static vec<HOST_WIDE_INT> max_offset_list;
2426
2427 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2429
2430 num = max_offset_list.length ();
2431 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432 if (list_index >= num)
2433 {
2434 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435 for (; num < max_offset_list.length (); num++)
2436 max_offset_list[num] = -1;
2437 }
2438
2439 off = max_offset_list[list_index];
2440 if (off != -1)
2441 return off;
2442
2443 addr_mode = targetm.addr_space.address_mode (as);
2444 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2446
2447 width = GET_MODE_BITSIZE (addr_mode) - 1;
2448 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449 width = HOST_BITS_PER_WIDE_INT - 1;
2450
2451 for (i = width; i > 0; i--)
2452 {
2453 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455 if (memory_address_addr_space_p (mem_mode, addr, as))
2456 break;
2457
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off = ((unsigned HOST_WIDE_INT) 1 << i);
2461 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2462 {
2463 off -= GET_MODE_SIZE (mem_mode);
2464 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465 if (memory_address_addr_space_p (mem_mode, addr, as))
2466 break;
2467 }
2468 }
2469 if (i == 0)
2470 off = 0;
2471
2472 max_offset_list[list_index] = off;
2473 return off;
2474 }
2475
2476 /* Check if all small groups should be split. Return true if and
2477 only if:
2478
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2481
2482 Return false otherwise. We want to split such groups because:
2483
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2488
2489 TODO -- Above claim may not hold when auto increment is supported. */
2490
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2493 {
2494 bool split_p = false;
2495 unsigned int i, n, distinct;
2496 struct iv_use *pre, *use;
2497
2498 n = n_iv_uses (data);
2499 for (i = 0; i < n; i++)
2500 {
2501 use = iv_use (data, i);
2502 if (!use->next)
2503 continue;
2504
2505 distinct = 1;
2506 gcc_assert (use->type == USE_ADDRESS);
2507 for (pre = use, use = use->next; use; pre = use, use = use->next)
2508 {
2509 if (pre->addr_offset != use->addr_offset)
2510 distinct++;
2511
2512 if (distinct > 2)
2513 return false;
2514 }
2515 if (distinct == 2)
2516 split_p = true;
2517 }
2518
2519 return split_p;
2520 }
2521
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2525
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2528 {
2529 HOST_WIDE_INT max_offset = -1;
2530 unsigned int i, n, sub_id;
2531 struct iv_use *pre, *use;
2532 unsigned HOST_WIDE_INT addr_offset_first;
2533
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data))
2536 max_offset = 0;
2537
2538 n = n_iv_uses (data);
2539 for (i = 0; i < n; i++)
2540 {
2541 use = iv_use (data, i);
2542 if (!use->next)
2543 continue;
2544
2545 gcc_assert (use->type == USE_ADDRESS);
2546 if (max_offset != 0)
2547 max_offset = compute_max_addr_offset (use);
2548
2549 while (use)
2550 {
2551 sub_id = 0;
2552 addr_offset_first = use->addr_offset;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre = use, use = use->next;
2556 use && (use->addr_offset - addr_offset_first
2557 <= (unsigned HOST_WIDE_INT) max_offset);
2558 pre = use, use = use->next)
2559 {
2560 use->id = pre->id;
2561 use->sub_id = ++sub_id;
2562 }
2563
2564 /* Break the list and create new group. */
2565 if (use)
2566 {
2567 pre->next = NULL;
2568 use->id = n_iv_uses (data);
2569 use->related_cands = BITMAP_ALLOC (NULL);
2570 data->iv_uses.safe_push (use);
2571 }
2572 }
2573 }
2574
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 dump_uses (dump_file, data);
2577 }
2578
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2582
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585 HOST_WIDE_INT *offset)
2586 {
2587 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588 enum tree_code code;
2589 tree type, orig_type = TREE_TYPE (expr);
2590 HOST_WIDE_INT off0, off1, st;
2591 tree orig_expr = expr;
2592
2593 STRIP_NOPS (expr);
2594
2595 type = TREE_TYPE (expr);
2596 code = TREE_CODE (expr);
2597 *offset = 0;
2598
2599 switch (code)
2600 {
2601 case INTEGER_CST:
2602 if (!cst_and_fits_in_hwi (expr)
2603 || integer_zerop (expr))
2604 return orig_expr;
2605
2606 *offset = int_cst_value (expr);
2607 return build_int_cst (orig_type, 0);
2608
2609 case POINTER_PLUS_EXPR:
2610 case PLUS_EXPR:
2611 case MINUS_EXPR:
2612 op0 = TREE_OPERAND (expr, 0);
2613 op1 = TREE_OPERAND (expr, 1);
2614
2615 op0 = strip_offset_1 (op0, false, false, &off0);
2616 op1 = strip_offset_1 (op1, false, false, &off1);
2617
2618 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619 if (op0 == TREE_OPERAND (expr, 0)
2620 && op1 == TREE_OPERAND (expr, 1))
2621 return orig_expr;
2622
2623 if (integer_zerop (op1))
2624 expr = op0;
2625 else if (integer_zerop (op0))
2626 {
2627 if (code == MINUS_EXPR)
2628 expr = fold_build1 (NEGATE_EXPR, type, op1);
2629 else
2630 expr = op1;
2631 }
2632 else
2633 expr = fold_build2 (code, type, op0, op1);
2634
2635 return fold_convert (orig_type, expr);
2636
2637 case MULT_EXPR:
2638 op1 = TREE_OPERAND (expr, 1);
2639 if (!cst_and_fits_in_hwi (op1))
2640 return orig_expr;
2641
2642 op0 = TREE_OPERAND (expr, 0);
2643 op0 = strip_offset_1 (op0, false, false, &off0);
2644 if (op0 == TREE_OPERAND (expr, 0))
2645 return orig_expr;
2646
2647 *offset = off0 * int_cst_value (op1);
2648 if (integer_zerop (op0))
2649 expr = op0;
2650 else
2651 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2652
2653 return fold_convert (orig_type, expr);
2654
2655 case ARRAY_REF:
2656 case ARRAY_RANGE_REF:
2657 if (!inside_addr)
2658 return orig_expr;
2659
2660 step = array_ref_element_size (expr);
2661 if (!cst_and_fits_in_hwi (step))
2662 break;
2663
2664 st = int_cst_value (step);
2665 op1 = TREE_OPERAND (expr, 1);
2666 op1 = strip_offset_1 (op1, false, false, &off1);
2667 *offset = off1 * st;
2668
2669 if (top_compref
2670 && integer_zerop (op1))
2671 {
2672 /* Strip the component reference completely. */
2673 op0 = TREE_OPERAND (expr, 0);
2674 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675 *offset += off0;
2676 return op0;
2677 }
2678 break;
2679
2680 case COMPONENT_REF:
2681 {
2682 tree field;
2683
2684 if (!inside_addr)
2685 return orig_expr;
2686
2687 tmp = component_ref_field_offset (expr);
2688 field = TREE_OPERAND (expr, 1);
2689 if (top_compref
2690 && cst_and_fits_in_hwi (tmp)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2692 {
2693 HOST_WIDE_INT boffset, abs_off;
2694
2695 /* Strip the component reference completely. */
2696 op0 = TREE_OPERAND (expr, 0);
2697 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700 if (boffset < 0)
2701 abs_off = -abs_off;
2702
2703 *offset = off0 + int_cst_value (tmp) + abs_off;
2704 return op0;
2705 }
2706 }
2707 break;
2708
2709 case ADDR_EXPR:
2710 op0 = TREE_OPERAND (expr, 0);
2711 op0 = strip_offset_1 (op0, true, true, &off0);
2712 *offset += off0;
2713
2714 if (op0 == TREE_OPERAND (expr, 0))
2715 return orig_expr;
2716
2717 expr = build_fold_addr_expr (op0);
2718 return fold_convert (orig_type, expr);
2719
2720 case MEM_REF:
2721 /* ??? Offset operand? */
2722 inside_addr = false;
2723 break;
2724
2725 default:
2726 return orig_expr;
2727 }
2728
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0 = TREE_OPERAND (expr, 0);
2732 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733 *offset += off0;
2734
2735 if (op0 == TREE_OPERAND (expr, 0)
2736 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737 return orig_expr;
2738
2739 expr = copy_node (expr);
2740 TREE_OPERAND (expr, 0) = op0;
2741 if (op1)
2742 TREE_OPERAND (expr, 1) = op1;
2743
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2746 will fix that. */
2747 expr = fold_convert (orig_type, expr);
2748
2749 return expr;
2750 }
2751
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2753
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2756 {
2757 HOST_WIDE_INT off;
2758 tree core = strip_offset_1 (expr, false, false, &off);
2759 *offset = off;
2760 return core;
2761 }
2762
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2765 with overflows. */
2766
2767 static tree
2768 generic_type_for (tree type)
2769 {
2770 if (POINTER_TYPE_P (type))
2771 return unsigned_type_for (type);
2772
2773 if (TYPE_UNSIGNED (type))
2774 return type;
2775
2776 return unsigned_type_for (type);
2777 }
2778
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2781
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2785 {
2786 bitmap *depends_on = (bitmap *) data;
2787 struct version_info *info;
2788
2789 if (TREE_CODE (*expr_p) != SSA_NAME)
2790 return NULL_TREE;
2791 info = name_info (fd_ivopts_data, *expr_p);
2792
2793 if (!info->inv_id || info->has_nonlin_use)
2794 return NULL_TREE;
2795
2796 if (!*depends_on)
2797 *depends_on = BITMAP_ALLOC (NULL);
2798 bitmap_set_bit (*depends_on, info->inv_id);
2799
2800 return NULL_TREE;
2801 }
2802
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2807
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810 tree base, tree step, bool important, enum iv_position pos,
2811 struct iv_use *use, gimple *incremented_at,
2812 struct iv *orig_iv = NULL)
2813 {
2814 unsigned i;
2815 struct iv_cand *cand = NULL;
2816 tree type, orig_type;
2817
2818 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2819 live, but the ivopts code may replace a real pointer with one
2820 pointing before or after the memory block that is then adjusted
2821 into the memory block during the loop. FIXME: It would likely be
2822 better to actually force the pointer live and still use ivopts;
2823 for example, it would be enough to write the pointer into memory
2824 and keep it there until after the loop. */
2825 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2826 return NULL;
2827
2828 /* For non-original variables, make sure their values are computed in a type
2829 that does not invoke undefined behavior on overflows (since in general,
2830 we cannot prove that these induction variables are non-wrapping). */
2831 if (pos != IP_ORIGINAL)
2832 {
2833 orig_type = TREE_TYPE (base);
2834 type = generic_type_for (orig_type);
2835 if (type != orig_type)
2836 {
2837 base = fold_convert (type, base);
2838 step = fold_convert (type, step);
2839 }
2840 }
2841
2842 for (i = 0; i < n_iv_cands (data); i++)
2843 {
2844 cand = iv_cand (data, i);
2845
2846 if (cand->pos != pos)
2847 continue;
2848
2849 if (cand->incremented_at != incremented_at
2850 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2851 && cand->ainc_use != use))
2852 continue;
2853
2854 if (!cand->iv)
2855 {
2856 if (!base && !step)
2857 break;
2858
2859 continue;
2860 }
2861
2862 if (!base && !step)
2863 continue;
2864
2865 if (operand_equal_p (base, cand->iv->base, 0)
2866 && operand_equal_p (step, cand->iv->step, 0)
2867 && (TYPE_PRECISION (TREE_TYPE (base))
2868 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2869 break;
2870 }
2871
2872 if (i == n_iv_cands (data))
2873 {
2874 cand = XCNEW (struct iv_cand);
2875 cand->id = i;
2876
2877 if (!base && !step)
2878 cand->iv = NULL;
2879 else
2880 cand->iv = alloc_iv (data, base, step);
2881
2882 cand->pos = pos;
2883 if (pos != IP_ORIGINAL && cand->iv)
2884 {
2885 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2886 cand->var_after = cand->var_before;
2887 }
2888 cand->important = important;
2889 cand->incremented_at = incremented_at;
2890 data->iv_candidates.safe_push (cand);
2891
2892 if (step
2893 && TREE_CODE (step) != INTEGER_CST)
2894 {
2895 fd_ivopts_data = data;
2896 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2897 }
2898
2899 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2900 cand->ainc_use = use;
2901 else
2902 cand->ainc_use = NULL;
2903
2904 cand->orig_iv = orig_iv;
2905 if (dump_file && (dump_flags & TDF_DETAILS))
2906 dump_cand (dump_file, cand);
2907 }
2908
2909 if (important && !cand->important)
2910 {
2911 cand->important = true;
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2914 }
2915
2916 if (use)
2917 {
2918 bitmap_set_bit (use->related_cands, i);
2919 if (dump_file && (dump_flags & TDF_DETAILS))
2920 fprintf (dump_file, "Candidate %d is related to use %d\n",
2921 cand->id, use->id);
2922 }
2923
2924 return cand;
2925 }
2926
2927 /* Returns true if incrementing the induction variable at the end of the LOOP
2928 is allowed.
2929
2930 The purpose is to avoid splitting latch edge with a biv increment, thus
2931 creating a jump, possibly confusing other optimization passes and leaving
2932 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2933 is not available (so we do not have a better alternative), or if the latch
2934 edge is already nonempty. */
2935
2936 static bool
2937 allow_ip_end_pos_p (struct loop *loop)
2938 {
2939 if (!ip_normal_pos (loop))
2940 return true;
2941
2942 if (!empty_block_p (ip_end_pos (loop)))
2943 return true;
2944
2945 return false;
2946 }
2947
2948 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2949 Important field is set to IMPORTANT. */
2950
2951 static void
2952 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2953 bool important, struct iv_use *use)
2954 {
2955 basic_block use_bb = gimple_bb (use->stmt);
2956 machine_mode mem_mode;
2957 unsigned HOST_WIDE_INT cstepi;
2958
2959 /* If we insert the increment in any position other than the standard
2960 ones, we must ensure that it is incremented once per iteration.
2961 It must not be in an inner nested loop, or one side of an if
2962 statement. */
2963 if (use_bb->loop_father != data->current_loop
2964 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2965 || stmt_could_throw_p (use->stmt)
2966 || !cst_and_fits_in_hwi (step))
2967 return;
2968
2969 cstepi = int_cst_value (step);
2970
2971 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2972 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2973 || USE_STORE_PRE_INCREMENT (mem_mode))
2974 && GET_MODE_SIZE (mem_mode) == cstepi)
2975 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2976 || USE_STORE_PRE_DECREMENT (mem_mode))
2977 && GET_MODE_SIZE (mem_mode) == -cstepi))
2978 {
2979 enum tree_code code = MINUS_EXPR;
2980 tree new_base;
2981 tree new_step = step;
2982
2983 if (POINTER_TYPE_P (TREE_TYPE (base)))
2984 {
2985 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2986 code = POINTER_PLUS_EXPR;
2987 }
2988 else
2989 new_step = fold_convert (TREE_TYPE (base), new_step);
2990 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2991 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2992 use->stmt);
2993 }
2994 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2995 || USE_STORE_POST_INCREMENT (mem_mode))
2996 && GET_MODE_SIZE (mem_mode) == cstepi)
2997 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2998 || USE_STORE_POST_DECREMENT (mem_mode))
2999 && GET_MODE_SIZE (mem_mode) == -cstepi))
3000 {
3001 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3002 use->stmt);
3003 }
3004 }
3005
3006 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3007 position to POS. If USE is not NULL, the candidate is set as related to
3008 it. The candidate computation is scheduled before exit condition and at
3009 the end of loop. */
3010
3011 static void
3012 add_candidate (struct ivopts_data *data,
3013 tree base, tree step, bool important, struct iv_use *use,
3014 struct iv *orig_iv = NULL)
3015 {
3016 gcc_assert (use == NULL || use->sub_id == 0);
3017
3018 if (ip_normal_pos (data->current_loop))
3019 add_candidate_1 (data, base, step, important,
3020 IP_NORMAL, use, NULL, orig_iv);
3021 if (ip_end_pos (data->current_loop)
3022 && allow_ip_end_pos_p (data->current_loop))
3023 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3024 }
3025
3026 /* Adds standard iv candidates. */
3027
3028 static void
3029 add_standard_iv_candidates (struct ivopts_data *data)
3030 {
3031 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3032
3033 /* The same for a double-integer type if it is still fast enough. */
3034 if (TYPE_PRECISION
3035 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3036 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3037 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3038 build_int_cst (long_integer_type_node, 1), true, NULL);
3039
3040 /* The same for a double-integer type if it is still fast enough. */
3041 if (TYPE_PRECISION
3042 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3043 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3044 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3045 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3046 }
3047
3048
3049 /* Adds candidates bases on the old induction variable IV. */
3050
3051 static void
3052 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3053 {
3054 gimple *phi;
3055 tree def;
3056 struct iv_cand *cand;
3057
3058 /* Check if this biv is used in address type use. */
3059 if (iv->no_overflow && iv->have_address_use
3060 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3061 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3062 {
3063 tree base = fold_convert (sizetype, iv->base);
3064 tree step = fold_convert (sizetype, iv->step);
3065
3066 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3067 add_candidate (data, base, step, true, NULL, iv);
3068 /* Add iv cand of the original type only if it has nonlinear use. */
3069 if (iv->have_use_for)
3070 add_candidate (data, iv->base, iv->step, true, NULL);
3071 }
3072 else
3073 add_candidate (data, iv->base, iv->step, true, NULL);
3074
3075 /* The same, but with initial value zero. */
3076 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3077 add_candidate (data, size_int (0), iv->step, true, NULL);
3078 else
3079 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3080 iv->step, true, NULL);
3081
3082 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3083 if (gimple_code (phi) == GIMPLE_PHI)
3084 {
3085 /* Additionally record the possibility of leaving the original iv
3086 untouched. */
3087 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3088 /* Don't add candidate if it's from another PHI node because
3089 it's an affine iv appearing in the form of PEELED_CHREC. */
3090 phi = SSA_NAME_DEF_STMT (def);
3091 if (gimple_code (phi) != GIMPLE_PHI)
3092 {
3093 cand = add_candidate_1 (data,
3094 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3095 SSA_NAME_DEF_STMT (def));
3096 if (cand)
3097 {
3098 cand->var_before = iv->ssa_name;
3099 cand->var_after = def;
3100 }
3101 }
3102 else
3103 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3104 }
3105 }
3106
3107 /* Adds candidates based on the old induction variables. */
3108
3109 static void
3110 add_iv_candidate_for_bivs (struct ivopts_data *data)
3111 {
3112 unsigned i;
3113 struct iv *iv;
3114 bitmap_iterator bi;
3115
3116 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3117 {
3118 iv = ver_info (data, i)->iv;
3119 if (iv && iv->biv_p && !integer_zerop (iv->step))
3120 add_iv_candidate_for_biv (data, iv);
3121 }
3122 }
3123
3124 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3125
3126 static void
3127 record_common_cand (struct ivopts_data *data, tree base,
3128 tree step, struct iv_use *use)
3129 {
3130 struct iv_common_cand ent;
3131 struct iv_common_cand **slot;
3132
3133 gcc_assert (use != NULL);
3134
3135 ent.base = base;
3136 ent.step = step;
3137 ent.hash = iterative_hash_expr (base, 0);
3138 ent.hash = iterative_hash_expr (step, ent.hash);
3139
3140 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3141 if (*slot == NULL)
3142 {
3143 *slot = new iv_common_cand ();
3144 (*slot)->base = base;
3145 (*slot)->step = step;
3146 (*slot)->uses.create (8);
3147 (*slot)->hash = ent.hash;
3148 data->iv_common_cands.safe_push ((*slot));
3149 }
3150 (*slot)->uses.safe_push (use);
3151 return;
3152 }
3153
3154 /* Comparison function used to sort common candidates. */
3155
3156 static int
3157 common_cand_cmp (const void *p1, const void *p2)
3158 {
3159 unsigned n1, n2;
3160 const struct iv_common_cand *const *const ccand1
3161 = (const struct iv_common_cand *const *)p1;
3162 const struct iv_common_cand *const *const ccand2
3163 = (const struct iv_common_cand *const *)p2;
3164
3165 n1 = (*ccand1)->uses.length ();
3166 n2 = (*ccand2)->uses.length ();
3167 return n2 - n1;
3168 }
3169
3170 /* Adds IV candidates based on common candidated recorded. */
3171
3172 static void
3173 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3174 {
3175 unsigned i, j;
3176 struct iv_cand *cand_1, *cand_2;
3177
3178 data->iv_common_cands.qsort (common_cand_cmp);
3179 for (i = 0; i < data->iv_common_cands.length (); i++)
3180 {
3181 struct iv_common_cand *ptr = data->iv_common_cands[i];
3182
3183 /* Only add IV candidate if it's derived from multiple uses. */
3184 if (ptr->uses.length () <= 1)
3185 break;
3186
3187 cand_1 = NULL;
3188 cand_2 = NULL;
3189 if (ip_normal_pos (data->current_loop))
3190 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3191 false, IP_NORMAL, NULL, NULL);
3192
3193 if (ip_end_pos (data->current_loop)
3194 && allow_ip_end_pos_p (data->current_loop))
3195 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3196 false, IP_END, NULL, NULL);
3197
3198 /* Bind deriving uses and the new candidates. */
3199 for (j = 0; j < ptr->uses.length (); j++)
3200 {
3201 struct iv_use *use = ptr->uses[j];
3202 if (cand_1)
3203 bitmap_set_bit (use->related_cands, cand_1->id);
3204 if (cand_2)
3205 bitmap_set_bit (use->related_cands, cand_2->id);
3206 }
3207 }
3208
3209 /* Release data since it is useless from this point. */
3210 data->iv_common_cand_tab->empty ();
3211 data->iv_common_cands.truncate (0);
3212 }
3213
3214 /* Adds candidates based on the value of USE's iv. */
3215
3216 static void
3217 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3218 {
3219 unsigned HOST_WIDE_INT offset;
3220 tree base;
3221 tree basetype;
3222 struct iv *iv = use->iv;
3223
3224 add_candidate (data, iv->base, iv->step, false, use);
3225
3226 /* Record common candidate for use in case it can be shared by others. */
3227 record_common_cand (data, iv->base, iv->step, use);
3228
3229 /* Record common candidate with initial value zero. */
3230 basetype = TREE_TYPE (iv->base);
3231 if (POINTER_TYPE_P (basetype))
3232 basetype = sizetype;
3233 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3234
3235 /* Record common candidate with constant offset stripped in base.
3236 Like the use itself, we also add candidate directly for it. */
3237 base = strip_offset (iv->base, &offset);
3238 if (offset || base != iv->base)
3239 {
3240 record_common_cand (data, base, iv->step, use);
3241 add_candidate (data, base, iv->step, false, use);
3242 }
3243
3244 /* Record common candidate with base_object removed in base. */
3245 if (iv->base_object != NULL)
3246 {
3247 unsigned i;
3248 aff_tree aff_base;
3249 tree step, base_object = iv->base_object;
3250
3251 base = iv->base;
3252 step = iv->step;
3253 STRIP_NOPS (base);
3254 STRIP_NOPS (step);
3255 STRIP_NOPS (base_object);
3256 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3257 for (i = 0; i < aff_base.n; i++)
3258 {
3259 if (aff_base.elts[i].coef != 1)
3260 continue;
3261
3262 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3263 break;
3264 }
3265 if (i < aff_base.n)
3266 {
3267 aff_combination_remove_elt (&aff_base, i);
3268 base = aff_combination_to_tree (&aff_base);
3269 basetype = TREE_TYPE (base);
3270 if (POINTER_TYPE_P (basetype))
3271 basetype = sizetype;
3272
3273 step = fold_convert (basetype, step);
3274 record_common_cand (data, base, step, use);
3275 /* Also record common candidate with offset stripped. */
3276 base = strip_offset (base, &offset);
3277 if (offset)
3278 record_common_cand (data, base, step, use);
3279 }
3280 }
3281
3282 /* At last, add auto-incremental candidates. Make such variables
3283 important since other iv uses with same base object may be based
3284 on it. */
3285 if (use != NULL && use->type == USE_ADDRESS)
3286 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3287 }
3288
3289 /* Adds candidates based on the uses. */
3290
3291 static void
3292 add_iv_candidate_for_uses (struct ivopts_data *data)
3293 {
3294 unsigned i;
3295
3296 for (i = 0; i < n_iv_uses (data); i++)
3297 {
3298 struct iv_use *use = iv_use (data, i);
3299
3300 if (!use)
3301 continue;
3302
3303 switch (use->type)
3304 {
3305 case USE_NONLINEAR_EXPR:
3306 case USE_COMPARE:
3307 case USE_ADDRESS:
3308 /* Just add the ivs based on the value of the iv used here. */
3309 add_iv_candidate_for_use (data, use);
3310 break;
3311
3312 default:
3313 gcc_unreachable ();
3314 }
3315 }
3316 add_iv_candidate_derived_from_uses (data);
3317 }
3318
3319 /* Record important candidates and add them to related_cands bitmaps. */
3320
3321 static void
3322 record_important_candidates (struct ivopts_data *data)
3323 {
3324 unsigned i;
3325 struct iv_use *use;
3326
3327 for (i = 0; i < n_iv_cands (data); i++)
3328 {
3329 struct iv_cand *cand = iv_cand (data, i);
3330
3331 if (cand->important)
3332 bitmap_set_bit (data->important_candidates, i);
3333 }
3334
3335 data->consider_all_candidates = (n_iv_cands (data)
3336 <= CONSIDER_ALL_CANDIDATES_BOUND);
3337
3338 /* Add important candidates to uses' related_cands bitmaps. */
3339 for (i = 0; i < n_iv_uses (data); i++)
3340 {
3341 use = iv_use (data, i);
3342 bitmap_ior_into (use->related_cands, data->important_candidates);
3343 }
3344 }
3345
3346 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3347 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3348 we allocate a simple list to every use. */
3349
3350 static void
3351 alloc_use_cost_map (struct ivopts_data *data)
3352 {
3353 unsigned i, size, s;
3354
3355 for (i = 0; i < n_iv_uses (data); i++)
3356 {
3357 struct iv_use *use = iv_use (data, i);
3358
3359 if (data->consider_all_candidates)
3360 size = n_iv_cands (data);
3361 else
3362 {
3363 s = bitmap_count_bits (use->related_cands);
3364
3365 /* Round up to the power of two, so that moduling by it is fast. */
3366 size = s ? (1 << ceil_log2 (s)) : 1;
3367 }
3368
3369 use->n_map_members = size;
3370 use->cost_map = XCNEWVEC (struct cost_pair, size);
3371 }
3372 }
3373
3374 /* Returns description of computation cost of expression whose runtime
3375 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3376
3377 static comp_cost
3378 new_cost (unsigned runtime, unsigned complexity)
3379 {
3380 comp_cost cost;
3381
3382 cost.cost = runtime;
3383 cost.complexity = complexity;
3384
3385 return cost;
3386 }
3387
3388 /* Returns true if COST is infinite. */
3389
3390 static bool
3391 infinite_cost_p (comp_cost cost)
3392 {
3393 return cost.cost == INFTY;
3394 }
3395
3396 /* Adds costs COST1 and COST2. */
3397
3398 static comp_cost
3399 add_costs (comp_cost cost1, comp_cost cost2)
3400 {
3401 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3402 return infinite_cost;
3403
3404 cost1.cost += cost2.cost;
3405 cost1.complexity += cost2.complexity;
3406
3407 return cost1;
3408 }
3409 /* Subtracts costs COST1 and COST2. */
3410
3411 static comp_cost
3412 sub_costs (comp_cost cost1, comp_cost cost2)
3413 {
3414 cost1.cost -= cost2.cost;
3415 cost1.complexity -= cost2.complexity;
3416
3417 return cost1;
3418 }
3419
3420 /* Returns a negative number if COST1 < COST2, a positive number if
3421 COST1 > COST2, and 0 if COST1 = COST2. */
3422
3423 static int
3424 compare_costs (comp_cost cost1, comp_cost cost2)
3425 {
3426 if (cost1.cost == cost2.cost)
3427 return cost1.complexity - cost2.complexity;
3428
3429 return cost1.cost - cost2.cost;
3430 }
3431
3432 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3433 on invariants DEPENDS_ON and that the value used in expressing it
3434 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3435
3436 static void
3437 set_use_iv_cost (struct ivopts_data *data,
3438 struct iv_use *use, struct iv_cand *cand,
3439 comp_cost cost, bitmap depends_on, tree value,
3440 enum tree_code comp, int inv_expr_id)
3441 {
3442 unsigned i, s;
3443
3444 if (infinite_cost_p (cost))
3445 {
3446 BITMAP_FREE (depends_on);
3447 return;
3448 }
3449
3450 if (data->consider_all_candidates)
3451 {
3452 use->cost_map[cand->id].cand = cand;
3453 use->cost_map[cand->id].cost = cost;
3454 use->cost_map[cand->id].depends_on = depends_on;
3455 use->cost_map[cand->id].value = value;
3456 use->cost_map[cand->id].comp = comp;
3457 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3458 return;
3459 }
3460
3461 /* n_map_members is a power of two, so this computes modulo. */
3462 s = cand->id & (use->n_map_members - 1);
3463 for (i = s; i < use->n_map_members; i++)
3464 if (!use->cost_map[i].cand)
3465 goto found;
3466 for (i = 0; i < s; i++)
3467 if (!use->cost_map[i].cand)
3468 goto found;
3469
3470 gcc_unreachable ();
3471
3472 found:
3473 use->cost_map[i].cand = cand;
3474 use->cost_map[i].cost = cost;
3475 use->cost_map[i].depends_on = depends_on;
3476 use->cost_map[i].value = value;
3477 use->cost_map[i].comp = comp;
3478 use->cost_map[i].inv_expr_id = inv_expr_id;
3479 }
3480
3481 /* Gets cost of (USE, CANDIDATE) pair. */
3482
3483 static struct cost_pair *
3484 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3485 struct iv_cand *cand)
3486 {
3487 unsigned i, s;
3488 struct cost_pair *ret;
3489
3490 if (!cand)
3491 return NULL;
3492
3493 if (data->consider_all_candidates)
3494 {
3495 ret = use->cost_map + cand->id;
3496 if (!ret->cand)
3497 return NULL;
3498
3499 return ret;
3500 }
3501
3502 /* n_map_members is a power of two, so this computes modulo. */
3503 s = cand->id & (use->n_map_members - 1);
3504 for (i = s; i < use->n_map_members; i++)
3505 if (use->cost_map[i].cand == cand)
3506 return use->cost_map + i;
3507 else if (use->cost_map[i].cand == NULL)
3508 return NULL;
3509 for (i = 0; i < s; i++)
3510 if (use->cost_map[i].cand == cand)
3511 return use->cost_map + i;
3512 else if (use->cost_map[i].cand == NULL)
3513 return NULL;
3514
3515 return NULL;
3516 }
3517
3518 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3519 static rtx
3520 produce_memory_decl_rtl (tree obj, int *regno)
3521 {
3522 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3523 machine_mode address_mode = targetm.addr_space.address_mode (as);
3524 rtx x;
3525
3526 gcc_assert (obj);
3527 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3528 {
3529 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3530 x = gen_rtx_SYMBOL_REF (address_mode, name);
3531 SET_SYMBOL_REF_DECL (x, obj);
3532 x = gen_rtx_MEM (DECL_MODE (obj), x);
3533 set_mem_addr_space (x, as);
3534 targetm.encode_section_info (obj, x, true);
3535 }
3536 else
3537 {
3538 x = gen_raw_REG (address_mode, (*regno)++);
3539 x = gen_rtx_MEM (DECL_MODE (obj), x);
3540 set_mem_addr_space (x, as);
3541 }
3542
3543 return x;
3544 }
3545
3546 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3547 walk_tree. DATA contains the actual fake register number. */
3548
3549 static tree
3550 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3551 {
3552 tree obj = NULL_TREE;
3553 rtx x = NULL_RTX;
3554 int *regno = (int *) data;
3555
3556 switch (TREE_CODE (*expr_p))
3557 {
3558 case ADDR_EXPR:
3559 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3560 handled_component_p (*expr_p);
3561 expr_p = &TREE_OPERAND (*expr_p, 0))
3562 continue;
3563 obj = *expr_p;
3564 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3565 x = produce_memory_decl_rtl (obj, regno);
3566 break;
3567
3568 case SSA_NAME:
3569 *ws = 0;
3570 obj = SSA_NAME_VAR (*expr_p);
3571 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3572 if (!obj)
3573 return NULL_TREE;
3574 if (!DECL_RTL_SET_P (obj))
3575 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3576 break;
3577
3578 case VAR_DECL:
3579 case PARM_DECL:
3580 case RESULT_DECL:
3581 *ws = 0;
3582 obj = *expr_p;
3583
3584 if (DECL_RTL_SET_P (obj))
3585 break;
3586
3587 if (DECL_MODE (obj) == BLKmode)
3588 x = produce_memory_decl_rtl (obj, regno);
3589 else
3590 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3591
3592 break;
3593
3594 default:
3595 break;
3596 }
3597
3598 if (x)
3599 {
3600 decl_rtl_to_reset.safe_push (obj);
3601 SET_DECL_RTL (obj, x);
3602 }
3603
3604 return NULL_TREE;
3605 }
3606
3607 /* Determines cost of the computation of EXPR. */
3608
3609 static unsigned
3610 computation_cost (tree expr, bool speed)
3611 {
3612 rtx_insn *seq;
3613 rtx rslt;
3614 tree type = TREE_TYPE (expr);
3615 unsigned cost;
3616 /* Avoid using hard regs in ways which may be unsupported. */
3617 int regno = LAST_VIRTUAL_REGISTER + 1;
3618 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3619 enum node_frequency real_frequency = node->frequency;
3620
3621 node->frequency = NODE_FREQUENCY_NORMAL;
3622 crtl->maybe_hot_insn_p = speed;
3623 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3624 start_sequence ();
3625 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3626 seq = get_insns ();
3627 end_sequence ();
3628 default_rtl_profile ();
3629 node->frequency = real_frequency;
3630
3631 cost = seq_cost (seq, speed);
3632 if (MEM_P (rslt))
3633 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3634 TYPE_ADDR_SPACE (type), speed);
3635 else if (!REG_P (rslt))
3636 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3637
3638 return cost;
3639 }
3640
3641 /* Returns variable containing the value of candidate CAND at statement AT. */
3642
3643 static tree
3644 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3645 {
3646 if (stmt_after_increment (loop, cand, stmt))
3647 return cand->var_after;
3648 else
3649 return cand->var_before;
3650 }
3651
3652 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3653 same precision that is at least as wide as the precision of TYPE, stores
3654 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3655 type of A and B. */
3656
3657 static tree
3658 determine_common_wider_type (tree *a, tree *b)
3659 {
3660 tree wider_type = NULL;
3661 tree suba, subb;
3662 tree atype = TREE_TYPE (*a);
3663
3664 if (CONVERT_EXPR_P (*a))
3665 {
3666 suba = TREE_OPERAND (*a, 0);
3667 wider_type = TREE_TYPE (suba);
3668 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3669 return atype;
3670 }
3671 else
3672 return atype;
3673
3674 if (CONVERT_EXPR_P (*b))
3675 {
3676 subb = TREE_OPERAND (*b, 0);
3677 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3678 return atype;
3679 }
3680 else
3681 return atype;
3682
3683 *a = suba;
3684 *b = subb;
3685 return wider_type;
3686 }
3687
3688 /* Determines the expression by that USE is expressed from induction variable
3689 CAND at statement AT in LOOP. The expression is stored in a decomposed
3690 form into AFF. Returns false if USE cannot be expressed using CAND. */
3691
3692 static bool
3693 get_computation_aff (struct loop *loop,
3694 struct iv_use *use, struct iv_cand *cand, gimple *at,
3695 struct aff_tree *aff)
3696 {
3697 tree ubase = use->iv->base;
3698 tree ustep = use->iv->step;
3699 tree cbase = cand->iv->base;
3700 tree cstep = cand->iv->step, cstep_common;
3701 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3702 tree common_type, var;
3703 tree uutype;
3704 aff_tree cbase_aff, var_aff;
3705 widest_int rat;
3706
3707 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3708 {
3709 /* We do not have a precision to express the values of use. */
3710 return false;
3711 }
3712
3713 var = var_at_stmt (loop, cand, at);
3714 uutype = unsigned_type_for (utype);
3715
3716 /* If the conversion is not noop, perform it. */
3717 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3718 {
3719 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3720 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3721 {
3722 tree inner_base, inner_step, inner_type;
3723 inner_base = TREE_OPERAND (cbase, 0);
3724 if (CONVERT_EXPR_P (cstep))
3725 inner_step = TREE_OPERAND (cstep, 0);
3726 else
3727 inner_step = cstep;
3728
3729 inner_type = TREE_TYPE (inner_base);
3730 /* If candidate is added from a biv whose type is smaller than
3731 ctype, we know both candidate and the biv won't overflow.
3732 In this case, it's safe to skip the convertion in candidate.
3733 As an example, (unsigned short)((unsigned long)A) equals to
3734 (unsigned short)A, if A has a type no larger than short. */
3735 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3736 {
3737 cbase = inner_base;
3738 cstep = inner_step;
3739 }
3740 }
3741 cstep = fold_convert (uutype, cstep);
3742 cbase = fold_convert (uutype, cbase);
3743 var = fold_convert (uutype, var);
3744 }
3745
3746 /* Ratio is 1 when computing the value of biv cand by itself.
3747 We can't rely on constant_multiple_of in this case because the
3748 use is created after the original biv is selected. The call
3749 could fail because of inconsistent fold behavior. See PR68021
3750 for more information. */
3751 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3752 {
3753 gcc_assert (is_gimple_assign (use->stmt));
3754 gcc_assert (use->iv->ssa_name == cand->var_after);
3755 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3756 rat = 1;
3757 }
3758 else if (!constant_multiple_of (ustep, cstep, &rat))
3759 return false;
3760
3761 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3762 type, we achieve better folding by computing their difference in this
3763 wider type, and cast the result to UUTYPE. We do not need to worry about
3764 overflows, as all the arithmetics will in the end be performed in UUTYPE
3765 anyway. */
3766 common_type = determine_common_wider_type (&ubase, &cbase);
3767
3768 /* use = ubase - ratio * cbase + ratio * var. */
3769 tree_to_aff_combination (ubase, common_type, aff);
3770 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3771 tree_to_aff_combination (var, uutype, &var_aff);
3772
3773 /* We need to shift the value if we are after the increment. */
3774 if (stmt_after_increment (loop, cand, at))
3775 {
3776 aff_tree cstep_aff;
3777
3778 if (common_type != uutype)
3779 cstep_common = fold_convert (common_type, cstep);
3780 else
3781 cstep_common = cstep;
3782
3783 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3784 aff_combination_add (&cbase_aff, &cstep_aff);
3785 }
3786
3787 aff_combination_scale (&cbase_aff, -rat);
3788 aff_combination_add (aff, &cbase_aff);
3789 if (common_type != uutype)
3790 aff_combination_convert (aff, uutype);
3791
3792 aff_combination_scale (&var_aff, rat);
3793 aff_combination_add (aff, &var_aff);
3794
3795 return true;
3796 }
3797
3798 /* Return the type of USE. */
3799
3800 static tree
3801 get_use_type (struct iv_use *use)
3802 {
3803 tree base_type = TREE_TYPE (use->iv->base);
3804 tree type;
3805
3806 if (use->type == USE_ADDRESS)
3807 {
3808 /* The base_type may be a void pointer. Create a pointer type based on
3809 the mem_ref instead. */
3810 type = build_pointer_type (TREE_TYPE (*use->op_p));
3811 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3812 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3813 }
3814 else
3815 type = base_type;
3816
3817 return type;
3818 }
3819
3820 /* Determines the expression by that USE is expressed from induction variable
3821 CAND at statement AT in LOOP. The computation is unshared. */
3822
3823 static tree
3824 get_computation_at (struct loop *loop,
3825 struct iv_use *use, struct iv_cand *cand, gimple *at)
3826 {
3827 aff_tree aff;
3828 tree type = get_use_type (use);
3829
3830 if (!get_computation_aff (loop, use, cand, at, &aff))
3831 return NULL_TREE;
3832 unshare_aff_combination (&aff);
3833 return fold_convert (type, aff_combination_to_tree (&aff));
3834 }
3835
3836 /* Determines the expression by that USE is expressed from induction variable
3837 CAND in LOOP. The computation is unshared. */
3838
3839 static tree
3840 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3841 {
3842 return get_computation_at (loop, use, cand, use->stmt);
3843 }
3844
3845 /* Adjust the cost COST for being in loop setup rather than loop body.
3846 If we're optimizing for space, the loop setup overhead is constant;
3847 if we're optimizing for speed, amortize it over the per-iteration cost. */
3848 static unsigned
3849 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3850 {
3851 if (cost == INFTY)
3852 return cost;
3853 else if (optimize_loop_for_speed_p (data->current_loop))
3854 return cost / avg_loop_niter (data->current_loop);
3855 else
3856 return cost;
3857 }
3858
3859 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3860 validity for a memory reference accessing memory of mode MODE in
3861 address space AS. */
3862
3863
3864 bool
3865 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3866 addr_space_t as)
3867 {
3868 #define MAX_RATIO 128
3869 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3870 static vec<sbitmap> valid_mult_list;
3871 sbitmap valid_mult;
3872
3873 if (data_index >= valid_mult_list.length ())
3874 valid_mult_list.safe_grow_cleared (data_index + 1);
3875
3876 valid_mult = valid_mult_list[data_index];
3877 if (!valid_mult)
3878 {
3879 machine_mode address_mode = targetm.addr_space.address_mode (as);
3880 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3881 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3882 rtx addr, scaled;
3883 HOST_WIDE_INT i;
3884
3885 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3886 bitmap_clear (valid_mult);
3887 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3888 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3889 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3890 {
3891 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3892 if (memory_address_addr_space_p (mode, addr, as)
3893 || memory_address_addr_space_p (mode, scaled, as))
3894 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3895 }
3896
3897 if (dump_file && (dump_flags & TDF_DETAILS))
3898 {
3899 fprintf (dump_file, " allowed multipliers:");
3900 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3901 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3902 fprintf (dump_file, " %d", (int) i);
3903 fprintf (dump_file, "\n");
3904 fprintf (dump_file, "\n");
3905 }
3906
3907 valid_mult_list[data_index] = valid_mult;
3908 }
3909
3910 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3911 return false;
3912
3913 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3914 }
3915
3916 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3917 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3918 variable is omitted. Compute the cost for a memory reference that accesses
3919 a memory location of mode MEM_MODE in address space AS.
3920
3921 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3922 size of MEM_MODE / RATIO) is available. To make this determination, we
3923 look at the size of the increment to be made, which is given in CSTEP.
3924 CSTEP may be zero if the step is unknown.
3925 STMT_AFTER_INC is true iff the statement we're looking at is after the
3926 increment of the original biv.
3927
3928 TODO -- there must be some better way. This all is quite crude. */
3929
3930 enum ainc_type
3931 {
3932 AINC_PRE_INC, /* Pre increment. */
3933 AINC_PRE_DEC, /* Pre decrement. */
3934 AINC_POST_INC, /* Post increment. */
3935 AINC_POST_DEC, /* Post decrement. */
3936 AINC_NONE /* Also the number of auto increment types. */
3937 };
3938
3939 struct address_cost_data
3940 {
3941 HOST_WIDE_INT min_offset, max_offset;
3942 unsigned costs[2][2][2][2];
3943 unsigned ainc_costs[AINC_NONE];
3944 };
3945
3946
3947 static comp_cost
3948 get_address_cost (bool symbol_present, bool var_present,
3949 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3950 HOST_WIDE_INT cstep, machine_mode mem_mode,
3951 addr_space_t as, bool speed,
3952 bool stmt_after_inc, bool *may_autoinc)
3953 {
3954 machine_mode address_mode = targetm.addr_space.address_mode (as);
3955 static vec<address_cost_data *> address_cost_data_list;
3956 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3957 address_cost_data *data;
3958 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3959 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3960 unsigned cost, acost, complexity;
3961 enum ainc_type autoinc_type;
3962 bool offset_p, ratio_p, autoinc;
3963 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3964 unsigned HOST_WIDE_INT mask;
3965 unsigned bits;
3966
3967 if (data_index >= address_cost_data_list.length ())
3968 address_cost_data_list.safe_grow_cleared (data_index + 1);
3969
3970 data = address_cost_data_list[data_index];
3971 if (!data)
3972 {
3973 HOST_WIDE_INT i;
3974 HOST_WIDE_INT rat, off = 0;
3975 int old_cse_not_expected, width;
3976 unsigned sym_p, var_p, off_p, rat_p, add_c;
3977 rtx_insn *seq;
3978 rtx addr, base;
3979 rtx reg0, reg1;
3980
3981 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3982
3983 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3984
3985 width = GET_MODE_BITSIZE (address_mode) - 1;
3986 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3987 width = HOST_BITS_PER_WIDE_INT - 1;
3988 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3989
3990 for (i = width; i >= 0; i--)
3991 {
3992 off = -((unsigned HOST_WIDE_INT) 1 << i);
3993 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3994 if (memory_address_addr_space_p (mem_mode, addr, as))
3995 break;
3996 }
3997 data->min_offset = (i == -1? 0 : off);
3998
3999 for (i = width; i >= 0; i--)
4000 {
4001 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4002 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4003 if (memory_address_addr_space_p (mem_mode, addr, as))
4004 break;
4005 /* For some strict-alignment targets, the offset must be naturally
4006 aligned. Try an aligned offset if mem_mode is not QImode. */
4007 off = mem_mode != QImode
4008 ? ((unsigned HOST_WIDE_INT) 1 << i)
4009 - GET_MODE_SIZE (mem_mode)
4010 : 0;
4011 if (off > 0)
4012 {
4013 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4014 if (memory_address_addr_space_p (mem_mode, addr, as))
4015 break;
4016 }
4017 }
4018 if (i == -1)
4019 off = 0;
4020 data->max_offset = off;
4021
4022 if (dump_file && (dump_flags & TDF_DETAILS))
4023 {
4024 fprintf (dump_file, "get_address_cost:\n");
4025 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4026 GET_MODE_NAME (mem_mode),
4027 data->min_offset);
4028 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4029 GET_MODE_NAME (mem_mode),
4030 data->max_offset);
4031 }
4032
4033 rat = 1;
4034 for (i = 2; i <= MAX_RATIO; i++)
4035 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4036 {
4037 rat = i;
4038 break;
4039 }
4040
4041 /* Compute the cost of various addressing modes. */
4042 acost = 0;
4043 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4044 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4045
4046 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4047 || USE_STORE_PRE_DECREMENT (mem_mode))
4048 {
4049 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4050 has_predec[mem_mode]
4051 = memory_address_addr_space_p (mem_mode, addr, as);
4052
4053 if (has_predec[mem_mode])
4054 data->ainc_costs[AINC_PRE_DEC]
4055 = address_cost (addr, mem_mode, as, speed);
4056 }
4057 if (USE_LOAD_POST_DECREMENT (mem_mode)
4058 || USE_STORE_POST_DECREMENT (mem_mode))
4059 {
4060 addr = gen_rtx_POST_DEC (address_mode, reg0);
4061 has_postdec[mem_mode]
4062 = memory_address_addr_space_p (mem_mode, addr, as);
4063
4064 if (has_postdec[mem_mode])
4065 data->ainc_costs[AINC_POST_DEC]
4066 = address_cost (addr, mem_mode, as, speed);
4067 }
4068 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4069 || USE_STORE_PRE_DECREMENT (mem_mode))
4070 {
4071 addr = gen_rtx_PRE_INC (address_mode, reg0);
4072 has_preinc[mem_mode]
4073 = memory_address_addr_space_p (mem_mode, addr, as);
4074
4075 if (has_preinc[mem_mode])
4076 data->ainc_costs[AINC_PRE_INC]
4077 = address_cost (addr, mem_mode, as, speed);
4078 }
4079 if (USE_LOAD_POST_INCREMENT (mem_mode)
4080 || USE_STORE_POST_INCREMENT (mem_mode))
4081 {
4082 addr = gen_rtx_POST_INC (address_mode, reg0);
4083 has_postinc[mem_mode]
4084 = memory_address_addr_space_p (mem_mode, addr, as);
4085
4086 if (has_postinc[mem_mode])
4087 data->ainc_costs[AINC_POST_INC]
4088 = address_cost (addr, mem_mode, as, speed);
4089 }
4090 for (i = 0; i < 16; i++)
4091 {
4092 sym_p = i & 1;
4093 var_p = (i >> 1) & 1;
4094 off_p = (i >> 2) & 1;
4095 rat_p = (i >> 3) & 1;
4096
4097 addr = reg0;
4098 if (rat_p)
4099 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4100 gen_int_mode (rat, address_mode));
4101
4102 if (var_p)
4103 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4104
4105 if (sym_p)
4106 {
4107 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4108 /* ??? We can run into trouble with some backends by presenting
4109 it with symbols which haven't been properly passed through
4110 targetm.encode_section_info. By setting the local bit, we
4111 enhance the probability of things working. */
4112 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4113
4114 if (off_p)
4115 base = gen_rtx_fmt_e (CONST, address_mode,
4116 gen_rtx_fmt_ee
4117 (PLUS, address_mode, base,
4118 gen_int_mode (off, address_mode)));
4119 }
4120 else if (off_p)
4121 base = gen_int_mode (off, address_mode);
4122 else
4123 base = NULL_RTX;
4124
4125 if (base)
4126 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4127
4128 start_sequence ();
4129 /* To avoid splitting addressing modes, pretend that no cse will
4130 follow. */
4131 old_cse_not_expected = cse_not_expected;
4132 cse_not_expected = true;
4133 addr = memory_address_addr_space (mem_mode, addr, as);
4134 cse_not_expected = old_cse_not_expected;
4135 seq = get_insns ();
4136 end_sequence ();
4137
4138 acost = seq_cost (seq, speed);
4139 acost += address_cost (addr, mem_mode, as, speed);
4140
4141 if (!acost)
4142 acost = 1;
4143 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4144 }
4145
4146 /* On some targets, it is quite expensive to load symbol to a register,
4147 which makes addresses that contain symbols look much more expensive.
4148 However, the symbol will have to be loaded in any case before the
4149 loop (and quite likely we have it in register already), so it does not
4150 make much sense to penalize them too heavily. So make some final
4151 tweaks for the SYMBOL_PRESENT modes:
4152
4153 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4154 var is cheaper, use this mode with small penalty.
4155 If VAR_PRESENT is true, try whether the mode with
4156 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4157 if this is the case, use it. */
4158 add_c = add_cost (speed, address_mode);
4159 for (i = 0; i < 8; i++)
4160 {
4161 var_p = i & 1;
4162 off_p = (i >> 1) & 1;
4163 rat_p = (i >> 2) & 1;
4164
4165 acost = data->costs[0][1][off_p][rat_p] + 1;
4166 if (var_p)
4167 acost += add_c;
4168
4169 if (acost < data->costs[1][var_p][off_p][rat_p])
4170 data->costs[1][var_p][off_p][rat_p] = acost;
4171 }
4172
4173 if (dump_file && (dump_flags & TDF_DETAILS))
4174 {
4175 fprintf (dump_file, "Address costs:\n");
4176
4177 for (i = 0; i < 16; i++)
4178 {
4179 sym_p = i & 1;
4180 var_p = (i >> 1) & 1;
4181 off_p = (i >> 2) & 1;
4182 rat_p = (i >> 3) & 1;
4183
4184 fprintf (dump_file, " ");
4185 if (sym_p)
4186 fprintf (dump_file, "sym + ");
4187 if (var_p)
4188 fprintf (dump_file, "var + ");
4189 if (off_p)
4190 fprintf (dump_file, "cst + ");
4191 if (rat_p)
4192 fprintf (dump_file, "rat * ");
4193
4194 acost = data->costs[sym_p][var_p][off_p][rat_p];
4195 fprintf (dump_file, "index costs %d\n", acost);
4196 }
4197 if (has_predec[mem_mode] || has_postdec[mem_mode]
4198 || has_preinc[mem_mode] || has_postinc[mem_mode])
4199 fprintf (dump_file, " May include autoinc/dec\n");
4200 fprintf (dump_file, "\n");
4201 }
4202
4203 address_cost_data_list[data_index] = data;
4204 }
4205
4206 bits = GET_MODE_BITSIZE (address_mode);
4207 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4208 offset &= mask;
4209 if ((offset >> (bits - 1) & 1))
4210 offset |= ~mask;
4211 s_offset = offset;
4212
4213 autoinc = false;
4214 autoinc_type = AINC_NONE;
4215 msize = GET_MODE_SIZE (mem_mode);
4216 autoinc_offset = offset;
4217 if (stmt_after_inc)
4218 autoinc_offset += ratio * cstep;
4219 if (symbol_present || var_present || ratio != 1)
4220 autoinc = false;
4221 else
4222 {
4223 if (has_postinc[mem_mode] && autoinc_offset == 0
4224 && msize == cstep)
4225 autoinc_type = AINC_POST_INC;
4226 else if (has_postdec[mem_mode] && autoinc_offset == 0
4227 && msize == -cstep)
4228 autoinc_type = AINC_POST_DEC;
4229 else if (has_preinc[mem_mode] && autoinc_offset == msize
4230 && msize == cstep)
4231 autoinc_type = AINC_PRE_INC;
4232 else if (has_predec[mem_mode] && autoinc_offset == -msize
4233 && msize == -cstep)
4234 autoinc_type = AINC_PRE_DEC;
4235
4236 if (autoinc_type != AINC_NONE)
4237 autoinc = true;
4238 }
4239
4240 cost = 0;
4241 offset_p = (s_offset != 0
4242 && data->min_offset <= s_offset
4243 && s_offset <= data->max_offset);
4244 ratio_p = (ratio != 1
4245 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4246
4247 if (ratio != 1 && !ratio_p)
4248 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4249
4250 if (s_offset && !offset_p && !symbol_present)
4251 cost += add_cost (speed, address_mode);
4252
4253 if (may_autoinc)
4254 *may_autoinc = autoinc;
4255 if (autoinc)
4256 acost = data->ainc_costs[autoinc_type];
4257 else
4258 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4259 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4260 return new_cost (cost + acost, complexity);
4261 }
4262
4263 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4264 EXPR operand holding the shift. COST0 and COST1 are the costs for
4265 calculating the operands of EXPR. Returns true if successful, and returns
4266 the cost in COST. */
4267
4268 static bool
4269 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4270 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4271 {
4272 comp_cost res;
4273 tree op1 = TREE_OPERAND (expr, 1);
4274 tree cst = TREE_OPERAND (mult, 1);
4275 tree multop = TREE_OPERAND (mult, 0);
4276 int m = exact_log2 (int_cst_value (cst));
4277 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4278 int as_cost, sa_cost;
4279 bool mult_in_op1;
4280
4281 if (!(m >= 0 && m < maxm))
4282 return false;
4283
4284 STRIP_NOPS (op1);
4285 mult_in_op1 = operand_equal_p (op1, mult, 0);
4286
4287 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4288
4289 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4290 use that in preference to a shift insn followed by an add insn. */
4291 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4292 ? shiftadd_cost (speed, mode, m)
4293 : (mult_in_op1
4294 ? shiftsub1_cost (speed, mode, m)
4295 : shiftsub0_cost (speed, mode, m)));
4296
4297 res = new_cost (MIN (as_cost, sa_cost), 0);
4298 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4299
4300 STRIP_NOPS (multop);
4301 if (!is_gimple_val (multop))
4302 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4303
4304 *cost = res;
4305 return true;
4306 }
4307
4308 /* Estimates cost of forcing expression EXPR into a variable. */
4309
4310 static comp_cost
4311 force_expr_to_var_cost (tree expr, bool speed)
4312 {
4313 static bool costs_initialized = false;
4314 static unsigned integer_cost [2];
4315 static unsigned symbol_cost [2];
4316 static unsigned address_cost [2];
4317 tree op0, op1;
4318 comp_cost cost0, cost1, cost;
4319 machine_mode mode;
4320
4321 if (!costs_initialized)
4322 {
4323 tree type = build_pointer_type (integer_type_node);
4324 tree var, addr;
4325 rtx x;
4326 int i;
4327
4328 var = create_tmp_var_raw (integer_type_node, "test_var");
4329 TREE_STATIC (var) = 1;
4330 x = produce_memory_decl_rtl (var, NULL);
4331 SET_DECL_RTL (var, x);
4332
4333 addr = build1 (ADDR_EXPR, type, var);
4334
4335
4336 for (i = 0; i < 2; i++)
4337 {
4338 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4339 2000), i);
4340
4341 symbol_cost[i] = computation_cost (addr, i) + 1;
4342
4343 address_cost[i]
4344 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4345 if (dump_file && (dump_flags & TDF_DETAILS))
4346 {
4347 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4348 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4349 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4350 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4351 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4352 fprintf (dump_file, "\n");
4353 }
4354 }
4355
4356 costs_initialized = true;
4357 }
4358
4359 STRIP_NOPS (expr);
4360
4361 if (SSA_VAR_P (expr))
4362 return no_cost;
4363
4364 if (is_gimple_min_invariant (expr))
4365 {
4366 if (TREE_CODE (expr) == INTEGER_CST)
4367 return new_cost (integer_cost [speed], 0);
4368
4369 if (TREE_CODE (expr) == ADDR_EXPR)
4370 {
4371 tree obj = TREE_OPERAND (expr, 0);
4372
4373 if (TREE_CODE (obj) == VAR_DECL
4374 || TREE_CODE (obj) == PARM_DECL
4375 || TREE_CODE (obj) == RESULT_DECL)
4376 return new_cost (symbol_cost [speed], 0);
4377 }
4378
4379 return new_cost (address_cost [speed], 0);
4380 }
4381
4382 switch (TREE_CODE (expr))
4383 {
4384 case POINTER_PLUS_EXPR:
4385 case PLUS_EXPR:
4386 case MINUS_EXPR:
4387 case MULT_EXPR:
4388 op0 = TREE_OPERAND (expr, 0);
4389 op1 = TREE_OPERAND (expr, 1);
4390 STRIP_NOPS (op0);
4391 STRIP_NOPS (op1);
4392 break;
4393
4394 CASE_CONVERT:
4395 case NEGATE_EXPR:
4396 op0 = TREE_OPERAND (expr, 0);
4397 STRIP_NOPS (op0);
4398 op1 = NULL_TREE;
4399 break;
4400
4401 default:
4402 /* Just an arbitrary value, FIXME. */
4403 return new_cost (target_spill_cost[speed], 0);
4404 }
4405
4406 if (op0 == NULL_TREE
4407 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4408 cost0 = no_cost;
4409 else
4410 cost0 = force_expr_to_var_cost (op0, speed);
4411
4412 if (op1 == NULL_TREE
4413 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4414 cost1 = no_cost;
4415 else
4416 cost1 = force_expr_to_var_cost (op1, speed);
4417
4418 mode = TYPE_MODE (TREE_TYPE (expr));
4419 switch (TREE_CODE (expr))
4420 {
4421 case POINTER_PLUS_EXPR:
4422 case PLUS_EXPR:
4423 case MINUS_EXPR:
4424 case NEGATE_EXPR:
4425 cost = new_cost (add_cost (speed, mode), 0);
4426 if (TREE_CODE (expr) != NEGATE_EXPR)
4427 {
4428 tree mult = NULL_TREE;
4429 comp_cost sa_cost;
4430 if (TREE_CODE (op1) == MULT_EXPR)
4431 mult = op1;
4432 else if (TREE_CODE (op0) == MULT_EXPR)
4433 mult = op0;
4434
4435 if (mult != NULL_TREE
4436 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4437 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4438 speed, &sa_cost))
4439 return sa_cost;
4440 }
4441 break;
4442
4443 CASE_CONVERT:
4444 {
4445 tree inner_mode, outer_mode;
4446 outer_mode = TREE_TYPE (expr);
4447 inner_mode = TREE_TYPE (op0);
4448 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4449 TYPE_MODE (inner_mode), speed), 0);
4450 }
4451 break;
4452
4453 case MULT_EXPR:
4454 if (cst_and_fits_in_hwi (op0))
4455 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4456 mode, speed), 0);
4457 else if (cst_and_fits_in_hwi (op1))
4458 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4459 mode, speed), 0);
4460 else
4461 return new_cost (target_spill_cost [speed], 0);
4462 break;
4463
4464 default:
4465 gcc_unreachable ();
4466 }
4467
4468 cost = add_costs (cost, cost0);
4469 cost = add_costs (cost, cost1);
4470
4471 /* Bound the cost by target_spill_cost. The parts of complicated
4472 computations often are either loop invariant or at least can
4473 be shared between several iv uses, so letting this grow without
4474 limits would not give reasonable results. */
4475 if (cost.cost > (int) target_spill_cost [speed])
4476 cost.cost = target_spill_cost [speed];
4477
4478 return cost;
4479 }
4480
4481 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4482 invariants the computation depends on. */
4483
4484 static comp_cost
4485 force_var_cost (struct ivopts_data *data,
4486 tree expr, bitmap *depends_on)
4487 {
4488 if (depends_on)
4489 {
4490 fd_ivopts_data = data;
4491 walk_tree (&expr, find_depends, depends_on, NULL);
4492 }
4493
4494 return force_expr_to_var_cost (expr, data->speed);
4495 }
4496
4497 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4498 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4499 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4500 invariants the computation depends on. */
4501
4502 static comp_cost
4503 split_address_cost (struct ivopts_data *data,
4504 tree addr, bool *symbol_present, bool *var_present,
4505 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4506 {
4507 tree core;
4508 HOST_WIDE_INT bitsize;
4509 HOST_WIDE_INT bitpos;
4510 tree toffset;
4511 machine_mode mode;
4512 int unsignedp, reversep, volatilep;
4513
4514 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4515 &unsignedp, &reversep, &volatilep, false);
4516
4517 if (toffset != 0
4518 || bitpos % BITS_PER_UNIT != 0
4519 || reversep
4520 || TREE_CODE (core) != VAR_DECL)
4521 {
4522 *symbol_present = false;
4523 *var_present = true;
4524 fd_ivopts_data = data;
4525 if (depends_on)
4526 walk_tree (&addr, find_depends, depends_on, NULL);
4527
4528 return new_cost (target_spill_cost[data->speed], 0);
4529 }
4530
4531 *offset += bitpos / BITS_PER_UNIT;
4532 if (TREE_STATIC (core)
4533 || DECL_EXTERNAL (core))
4534 {
4535 *symbol_present = true;
4536 *var_present = false;
4537 return no_cost;
4538 }
4539
4540 *symbol_present = false;
4541 *var_present = true;
4542 return no_cost;
4543 }
4544
4545 /* Estimates cost of expressing difference of addresses E1 - E2 as
4546 var + symbol + offset. The value of offset is added to OFFSET,
4547 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4548 part is missing. DEPENDS_ON is a set of the invariants the computation
4549 depends on. */
4550
4551 static comp_cost
4552 ptr_difference_cost (struct ivopts_data *data,
4553 tree e1, tree e2, bool *symbol_present, bool *var_present,
4554 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4555 {
4556 HOST_WIDE_INT diff = 0;
4557 aff_tree aff_e1, aff_e2;
4558 tree type;
4559
4560 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4561
4562 if (ptr_difference_const (e1, e2, &diff))
4563 {
4564 *offset += diff;
4565 *symbol_present = false;
4566 *var_present = false;
4567 return no_cost;
4568 }
4569
4570 if (integer_zerop (e2))
4571 return split_address_cost (data, TREE_OPERAND (e1, 0),
4572 symbol_present, var_present, offset, depends_on);
4573
4574 *symbol_present = false;
4575 *var_present = true;
4576
4577 type = signed_type_for (TREE_TYPE (e1));
4578 tree_to_aff_combination (e1, type, &aff_e1);
4579 tree_to_aff_combination (e2, type, &aff_e2);
4580 aff_combination_scale (&aff_e2, -1);
4581 aff_combination_add (&aff_e1, &aff_e2);
4582
4583 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4584 }
4585
4586 /* Estimates cost of expressing difference E1 - E2 as
4587 var + symbol + offset. The value of offset is added to OFFSET,
4588 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4589 part is missing. DEPENDS_ON is a set of the invariants the computation
4590 depends on. */
4591
4592 static comp_cost
4593 difference_cost (struct ivopts_data *data,
4594 tree e1, tree e2, bool *symbol_present, bool *var_present,
4595 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4596 {
4597 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4598 unsigned HOST_WIDE_INT off1, off2;
4599 aff_tree aff_e1, aff_e2;
4600 tree type;
4601
4602 e1 = strip_offset (e1, &off1);
4603 e2 = strip_offset (e2, &off2);
4604 *offset += off1 - off2;
4605
4606 STRIP_NOPS (e1);
4607 STRIP_NOPS (e2);
4608
4609 if (TREE_CODE (e1) == ADDR_EXPR)
4610 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4611 offset, depends_on);
4612 *symbol_present = false;
4613
4614 if (operand_equal_p (e1, e2, 0))
4615 {
4616 *var_present = false;
4617 return no_cost;
4618 }
4619
4620 *var_present = true;
4621
4622 if (integer_zerop (e2))
4623 return force_var_cost (data, e1, depends_on);
4624
4625 if (integer_zerop (e1))
4626 {
4627 comp_cost cost = force_var_cost (data, e2, depends_on);
4628 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4629 return cost;
4630 }
4631
4632 type = signed_type_for (TREE_TYPE (e1));
4633 tree_to_aff_combination (e1, type, &aff_e1);
4634 tree_to_aff_combination (e2, type, &aff_e2);
4635 aff_combination_scale (&aff_e2, -1);
4636 aff_combination_add (&aff_e1, &aff_e2);
4637
4638 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4639 }
4640
4641 /* Returns true if AFF1 and AFF2 are identical. */
4642
4643 static bool
4644 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4645 {
4646 unsigned i;
4647
4648 if (aff1->n != aff2->n)
4649 return false;
4650
4651 for (i = 0; i < aff1->n; i++)
4652 {
4653 if (aff1->elts[i].coef != aff2->elts[i].coef)
4654 return false;
4655
4656 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4657 return false;
4658 }
4659 return true;
4660 }
4661
4662 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4663
4664 static int
4665 get_expr_id (struct ivopts_data *data, tree expr)
4666 {
4667 struct iv_inv_expr_ent ent;
4668 struct iv_inv_expr_ent **slot;
4669
4670 ent.expr = expr;
4671 ent.hash = iterative_hash_expr (expr, 0);
4672 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4673 if (*slot)
4674 return (*slot)->id;
4675
4676 *slot = XNEW (struct iv_inv_expr_ent);
4677 (*slot)->expr = expr;
4678 (*slot)->hash = ent.hash;
4679 (*slot)->id = data->inv_expr_id++;
4680 return (*slot)->id;
4681 }
4682
4683 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4684 requires a new compiler generated temporary. Returns -1 otherwise.
4685 ADDRESS_P is a flag indicating if the expression is for address
4686 computation. */
4687
4688 static int
4689 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4690 tree cbase, HOST_WIDE_INT ratio,
4691 bool address_p)
4692 {
4693 aff_tree ubase_aff, cbase_aff;
4694 tree expr, ub, cb;
4695
4696 STRIP_NOPS (ubase);
4697 STRIP_NOPS (cbase);
4698 ub = ubase;
4699 cb = cbase;
4700
4701 if ((TREE_CODE (ubase) == INTEGER_CST)
4702 && (TREE_CODE (cbase) == INTEGER_CST))
4703 return -1;
4704
4705 /* Strips the constant part. */
4706 if (TREE_CODE (ubase) == PLUS_EXPR
4707 || TREE_CODE (ubase) == MINUS_EXPR
4708 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4709 {
4710 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4711 ubase = TREE_OPERAND (ubase, 0);
4712 }
4713
4714 /* Strips the constant part. */
4715 if (TREE_CODE (cbase) == PLUS_EXPR
4716 || TREE_CODE (cbase) == MINUS_EXPR
4717 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4718 {
4719 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4720 cbase = TREE_OPERAND (cbase, 0);
4721 }
4722
4723 if (address_p)
4724 {
4725 if (((TREE_CODE (ubase) == SSA_NAME)
4726 || (TREE_CODE (ubase) == ADDR_EXPR
4727 && is_gimple_min_invariant (ubase)))
4728 && (TREE_CODE (cbase) == INTEGER_CST))
4729 return -1;
4730
4731 if (((TREE_CODE (cbase) == SSA_NAME)
4732 || (TREE_CODE (cbase) == ADDR_EXPR
4733 && is_gimple_min_invariant (cbase)))
4734 && (TREE_CODE (ubase) == INTEGER_CST))
4735 return -1;
4736 }
4737
4738 if (ratio == 1)
4739 {
4740 if (operand_equal_p (ubase, cbase, 0))
4741 return -1;
4742
4743 if (TREE_CODE (ubase) == ADDR_EXPR
4744 && TREE_CODE (cbase) == ADDR_EXPR)
4745 {
4746 tree usym, csym;
4747
4748 usym = TREE_OPERAND (ubase, 0);
4749 csym = TREE_OPERAND (cbase, 0);
4750 if (TREE_CODE (usym) == ARRAY_REF)
4751 {
4752 tree ind = TREE_OPERAND (usym, 1);
4753 if (TREE_CODE (ind) == INTEGER_CST
4754 && tree_fits_shwi_p (ind)
4755 && tree_to_shwi (ind) == 0)
4756 usym = TREE_OPERAND (usym, 0);
4757 }
4758 if (TREE_CODE (csym) == ARRAY_REF)
4759 {
4760 tree ind = TREE_OPERAND (csym, 1);
4761 if (TREE_CODE (ind) == INTEGER_CST
4762 && tree_fits_shwi_p (ind)
4763 && tree_to_shwi (ind) == 0)
4764 csym = TREE_OPERAND (csym, 0);
4765 }
4766 if (operand_equal_p (usym, csym, 0))
4767 return -1;
4768 }
4769 /* Now do more complex comparison */
4770 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4771 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4772 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4773 return -1;
4774 }
4775
4776 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4777 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4778
4779 aff_combination_scale (&cbase_aff, -1 * ratio);
4780 aff_combination_add (&ubase_aff, &cbase_aff);
4781 expr = aff_combination_to_tree (&ubase_aff);
4782 return get_expr_id (data, expr);
4783 }
4784
4785
4786
4787 /* Determines the cost of the computation by that USE is expressed
4788 from induction variable CAND. If ADDRESS_P is true, we just need
4789 to create an address from it, otherwise we want to get it into
4790 register. A set of invariants we depend on is stored in
4791 DEPENDS_ON. AT is the statement at that the value is computed.
4792 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4793 addressing is likely. */
4794
4795 static comp_cost
4796 get_computation_cost_at (struct ivopts_data *data,
4797 struct iv_use *use, struct iv_cand *cand,
4798 bool address_p, bitmap *depends_on, gimple *at,
4799 bool *can_autoinc,
4800 int *inv_expr_id)
4801 {
4802 tree ubase = use->iv->base, ustep = use->iv->step;
4803 tree cbase, cstep;
4804 tree utype = TREE_TYPE (ubase), ctype;
4805 unsigned HOST_WIDE_INT cstepi, offset = 0;
4806 HOST_WIDE_INT ratio, aratio;
4807 bool var_present, symbol_present, stmt_is_after_inc;
4808 comp_cost cost;
4809 widest_int rat;
4810 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4811 machine_mode mem_mode = (address_p
4812 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4813 : VOIDmode);
4814
4815 if (depends_on)
4816 *depends_on = NULL;
4817
4818 /* Only consider real candidates. */
4819 if (!cand->iv)
4820 return infinite_cost;
4821
4822 cbase = cand->iv->base;
4823 cstep = cand->iv->step;
4824 ctype = TREE_TYPE (cbase);
4825
4826 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4827 {
4828 /* We do not have a precision to express the values of use. */
4829 return infinite_cost;
4830 }
4831
4832 if (address_p
4833 || (use->iv->base_object
4834 && cand->iv->base_object
4835 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4836 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4837 {
4838 /* Do not try to express address of an object with computation based
4839 on address of a different object. This may cause problems in rtl
4840 level alias analysis (that does not expect this to be happening,
4841 as this is illegal in C), and would be unlikely to be useful
4842 anyway. */
4843 if (use->iv->base_object
4844 && cand->iv->base_object
4845 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4846 return infinite_cost;
4847 }
4848
4849 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4850 {
4851 /* TODO -- add direct handling of this case. */
4852 goto fallback;
4853 }
4854
4855 /* CSTEPI is removed from the offset in case statement is after the
4856 increment. If the step is not constant, we use zero instead.
4857 This is a bit imprecise (there is the extra addition), but
4858 redundancy elimination is likely to transform the code so that
4859 it uses value of the variable before increment anyway,
4860 so it is not that much unrealistic. */
4861 if (cst_and_fits_in_hwi (cstep))
4862 cstepi = int_cst_value (cstep);
4863 else
4864 cstepi = 0;
4865
4866 if (!constant_multiple_of (ustep, cstep, &rat))
4867 return infinite_cost;
4868
4869 if (wi::fits_shwi_p (rat))
4870 ratio = rat.to_shwi ();
4871 else
4872 return infinite_cost;
4873
4874 STRIP_NOPS (cbase);
4875 ctype = TREE_TYPE (cbase);
4876
4877 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4878
4879 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4880 or ratio == 1, it is better to handle this like
4881
4882 ubase - ratio * cbase + ratio * var
4883
4884 (also holds in the case ratio == -1, TODO. */
4885
4886 if (cst_and_fits_in_hwi (cbase))
4887 {
4888 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4889 cost = difference_cost (data,
4890 ubase, build_int_cst (utype, 0),
4891 &symbol_present, &var_present, &offset,
4892 depends_on);
4893 cost.cost /= avg_loop_niter (data->current_loop);
4894 }
4895 else if (ratio == 1)
4896 {
4897 tree real_cbase = cbase;
4898
4899 /* Check to see if any adjustment is needed. */
4900 if (cstepi == 0 && stmt_is_after_inc)
4901 {
4902 aff_tree real_cbase_aff;
4903 aff_tree cstep_aff;
4904
4905 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4906 &real_cbase_aff);
4907 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4908
4909 aff_combination_add (&real_cbase_aff, &cstep_aff);
4910 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4911 }
4912
4913 cost = difference_cost (data,
4914 ubase, real_cbase,
4915 &symbol_present, &var_present, &offset,
4916 depends_on);
4917 cost.cost /= avg_loop_niter (data->current_loop);
4918 }
4919 else if (address_p
4920 && !POINTER_TYPE_P (ctype)
4921 && multiplier_allowed_in_address_p
4922 (ratio, mem_mode,
4923 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4924 {
4925 if (cstepi == 0 && stmt_is_after_inc)
4926 {
4927 if (POINTER_TYPE_P (ctype))
4928 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4929 else
4930 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4931 }
4932 cbase
4933 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4934 cost = difference_cost (data,
4935 ubase, cbase,
4936 &symbol_present, &var_present, &offset,
4937 depends_on);
4938 cost.cost /= avg_loop_niter (data->current_loop);
4939 }
4940 else
4941 {
4942 cost = force_var_cost (data, cbase, depends_on);
4943 cost = add_costs (cost,
4944 difference_cost (data,
4945 ubase, build_int_cst (utype, 0),
4946 &symbol_present, &var_present,
4947 &offset, depends_on));
4948 cost.cost /= avg_loop_niter (data->current_loop);
4949 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4950 }
4951
4952 /* Set of invariants depended on by sub use has already been computed
4953 for the first use in the group. */
4954 if (use->sub_id)
4955 {
4956 cost.cost = 0;
4957 if (depends_on && *depends_on)
4958 bitmap_clear (*depends_on);
4959 }
4960 else if (inv_expr_id)
4961 {
4962 *inv_expr_id =
4963 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4964 /* Clear depends on. */
4965 if (*inv_expr_id != -1 && depends_on && *depends_on)
4966 bitmap_clear (*depends_on);
4967 }
4968
4969 /* If we are after the increment, the value of the candidate is higher by
4970 one iteration. */
4971 if (stmt_is_after_inc)
4972 offset -= ratio * cstepi;
4973
4974 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4975 (symbol/var1/const parts may be omitted). If we are looking for an
4976 address, find the cost of addressing this. */
4977 if (address_p)
4978 return add_costs (cost,
4979 get_address_cost (symbol_present, var_present,
4980 offset, ratio, cstepi,
4981 mem_mode,
4982 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4983 speed, stmt_is_after_inc,
4984 can_autoinc));
4985
4986 /* Otherwise estimate the costs for computing the expression. */
4987 if (!symbol_present && !var_present && !offset)
4988 {
4989 if (ratio != 1)
4990 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4991 return cost;
4992 }
4993
4994 /* Symbol + offset should be compile-time computable so consider that they
4995 are added once to the variable, if present. */
4996 if (var_present && (symbol_present || offset))
4997 cost.cost += adjust_setup_cost (data,
4998 add_cost (speed, TYPE_MODE (ctype)));
4999
5000 /* Having offset does not affect runtime cost in case it is added to
5001 symbol, but it increases complexity. */
5002 if (offset)
5003 cost.complexity++;
5004
5005 cost.cost += add_cost (speed, TYPE_MODE (ctype));
5006
5007 aratio = ratio > 0 ? ratio : -ratio;
5008 if (aratio != 1)
5009 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5010 return cost;
5011
5012 fallback:
5013 if (can_autoinc)
5014 *can_autoinc = false;
5015
5016 {
5017 /* Just get the expression, expand it and measure the cost. */
5018 tree comp = get_computation_at (data->current_loop, use, cand, at);
5019
5020 if (!comp)
5021 return infinite_cost;
5022
5023 if (address_p)
5024 comp = build_simple_mem_ref (comp);
5025
5026 return new_cost (computation_cost (comp, speed), 0);
5027 }
5028 }
5029
5030 /* Determines the cost of the computation by that USE is expressed
5031 from induction variable CAND. If ADDRESS_P is true, we just need
5032 to create an address from it, otherwise we want to get it into
5033 register. A set of invariants we depend on is stored in
5034 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5035 autoinc addressing is likely. */
5036
5037 static comp_cost
5038 get_computation_cost (struct ivopts_data *data,
5039 struct iv_use *use, struct iv_cand *cand,
5040 bool address_p, bitmap *depends_on,
5041 bool *can_autoinc, int *inv_expr_id)
5042 {
5043 return get_computation_cost_at (data,
5044 use, cand, address_p, depends_on, use->stmt,
5045 can_autoinc, inv_expr_id);
5046 }
5047
5048 /* Determines cost of basing replacement of USE on CAND in a generic
5049 expression. */
5050
5051 static bool
5052 determine_use_iv_cost_generic (struct ivopts_data *data,
5053 struct iv_use *use, struct iv_cand *cand)
5054 {
5055 bitmap depends_on;
5056 comp_cost cost;
5057 int inv_expr_id = -1;
5058
5059 /* The simple case first -- if we need to express value of the preserved
5060 original biv, the cost is 0. This also prevents us from counting the
5061 cost of increment twice -- once at this use and once in the cost of
5062 the candidate. */
5063 if (cand->pos == IP_ORIGINAL
5064 && cand->incremented_at == use->stmt)
5065 {
5066 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5067 ERROR_MARK, -1);
5068 return true;
5069 }
5070
5071 cost = get_computation_cost (data, use, cand, false, &depends_on,
5072 NULL, &inv_expr_id);
5073
5074 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5075 inv_expr_id);
5076
5077 return !infinite_cost_p (cost);
5078 }
5079
5080 /* Determines cost of basing replacement of USE on CAND in an address. */
5081
5082 static bool
5083 determine_use_iv_cost_address (struct ivopts_data *data,
5084 struct iv_use *use, struct iv_cand *cand)
5085 {
5086 bitmap depends_on;
5087 bool can_autoinc;
5088 int inv_expr_id = -1;
5089 struct iv_use *sub_use;
5090 comp_cost sub_cost;
5091 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5092 &can_autoinc, &inv_expr_id);
5093
5094 if (cand->ainc_use == use)
5095 {
5096 if (can_autoinc)
5097 cost.cost -= cand->cost_step;
5098 /* If we generated the candidate solely for exploiting autoincrement
5099 opportunities, and it turns out it can't be used, set the cost to
5100 infinity to make sure we ignore it. */
5101 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5102 cost = infinite_cost;
5103 }
5104 for (sub_use = use->next;
5105 sub_use && !infinite_cost_p (cost);
5106 sub_use = sub_use->next)
5107 {
5108 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5109 &can_autoinc, NULL);
5110 cost = add_costs (cost, sub_cost);
5111 }
5112
5113 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5114 inv_expr_id);
5115
5116 return !infinite_cost_p (cost);
5117 }
5118
5119 /* Computes value of candidate CAND at position AT in iteration NITER, and
5120 stores it to VAL. */
5121
5122 static void
5123 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5124 aff_tree *val)
5125 {
5126 aff_tree step, delta, nit;
5127 struct iv *iv = cand->iv;
5128 tree type = TREE_TYPE (iv->base);
5129 tree steptype = type;
5130 if (POINTER_TYPE_P (type))
5131 steptype = sizetype;
5132 steptype = unsigned_type_for (type);
5133
5134 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5135 aff_combination_convert (&step, steptype);
5136 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5137 aff_combination_convert (&nit, steptype);
5138 aff_combination_mult (&nit, &step, &delta);
5139 if (stmt_after_increment (loop, cand, at))
5140 aff_combination_add (&delta, &step);
5141
5142 tree_to_aff_combination (iv->base, type, val);
5143 if (!POINTER_TYPE_P (type))
5144 aff_combination_convert (val, steptype);
5145 aff_combination_add (val, &delta);
5146 }
5147
5148 /* Returns period of induction variable iv. */
5149
5150 static tree
5151 iv_period (struct iv *iv)
5152 {
5153 tree step = iv->step, period, type;
5154 tree pow2div;
5155
5156 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5157
5158 type = unsigned_type_for (TREE_TYPE (step));
5159 /* Period of the iv is lcm (step, type_range)/step -1,
5160 i.e., N*type_range/step - 1. Since type range is power
5161 of two, N == (step >> num_of_ending_zeros_binary (step),
5162 so the final result is
5163
5164 (type_range >> num_of_ending_zeros_binary (step)) - 1
5165
5166 */
5167 pow2div = num_ending_zeros (step);
5168
5169 period = build_low_bits_mask (type,
5170 (TYPE_PRECISION (type)
5171 - tree_to_uhwi (pow2div)));
5172
5173 return period;
5174 }
5175
5176 /* Returns the comparison operator used when eliminating the iv USE. */
5177
5178 static enum tree_code
5179 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5180 {
5181 struct loop *loop = data->current_loop;
5182 basic_block ex_bb;
5183 edge exit;
5184
5185 ex_bb = gimple_bb (use->stmt);
5186 exit = EDGE_SUCC (ex_bb, 0);
5187 if (flow_bb_inside_loop_p (loop, exit->dest))
5188 exit = EDGE_SUCC (ex_bb, 1);
5189
5190 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5191 }
5192
5193 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5194 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5195 calculation is performed in non-wrapping type.
5196
5197 TODO: More generally, we could test for the situation that
5198 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5199 This would require knowing the sign of OFFSET. */
5200
5201 static bool
5202 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5203 {
5204 enum tree_code code;
5205 tree e1, e2;
5206 aff_tree aff_e1, aff_e2, aff_offset;
5207
5208 if (!nowrap_type_p (TREE_TYPE (base)))
5209 return false;
5210
5211 base = expand_simple_operations (base);
5212
5213 if (TREE_CODE (base) == SSA_NAME)
5214 {
5215 gimple *stmt = SSA_NAME_DEF_STMT (base);
5216
5217 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5218 return false;
5219
5220 code = gimple_assign_rhs_code (stmt);
5221 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5222 return false;
5223
5224 e1 = gimple_assign_rhs1 (stmt);
5225 e2 = gimple_assign_rhs2 (stmt);
5226 }
5227 else
5228 {
5229 code = TREE_CODE (base);
5230 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5231 return false;
5232 e1 = TREE_OPERAND (base, 0);
5233 e2 = TREE_OPERAND (base, 1);
5234 }
5235
5236 /* Use affine expansion as deeper inspection to prove the equality. */
5237 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5238 &aff_e2, &data->name_expansion_cache);
5239 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5240 &aff_offset, &data->name_expansion_cache);
5241 aff_combination_scale (&aff_offset, -1);
5242 switch (code)
5243 {
5244 case PLUS_EXPR:
5245 aff_combination_add (&aff_e2, &aff_offset);
5246 if (aff_combination_zero_p (&aff_e2))
5247 return true;
5248
5249 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5250 &aff_e1, &data->name_expansion_cache);
5251 aff_combination_add (&aff_e1, &aff_offset);
5252 return aff_combination_zero_p (&aff_e1);
5253
5254 case POINTER_PLUS_EXPR:
5255 aff_combination_add (&aff_e2, &aff_offset);
5256 return aff_combination_zero_p (&aff_e2);
5257
5258 default:
5259 return false;
5260 }
5261 }
5262
5263 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5264 comparison with CAND. NITER describes the number of iterations of
5265 the loops. If successful, the comparison in COMP_P is altered accordingly.
5266
5267 We aim to handle the following situation:
5268
5269 sometype *base, *p;
5270 int a, b, i;
5271
5272 i = a;
5273 p = p_0 = base + a;
5274
5275 do
5276 {
5277 bla (*p);
5278 p++;
5279 i++;
5280 }
5281 while (i < b);
5282
5283 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5284 We aim to optimize this to
5285
5286 p = p_0 = base + a;
5287 do
5288 {
5289 bla (*p);
5290 p++;
5291 }
5292 while (p < p_0 - a + b);
5293
5294 This preserves the correctness, since the pointer arithmetics does not
5295 overflow. More precisely:
5296
5297 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5298 overflow in computing it or the values of p.
5299 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5300 overflow. To prove this, we use the fact that p_0 = base + a. */
5301
5302 static bool
5303 iv_elimination_compare_lt (struct ivopts_data *data,
5304 struct iv_cand *cand, enum tree_code *comp_p,
5305 struct tree_niter_desc *niter)
5306 {
5307 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5308 struct aff_tree nit, tmpa, tmpb;
5309 enum tree_code comp;
5310 HOST_WIDE_INT step;
5311
5312 /* We need to know that the candidate induction variable does not overflow.
5313 While more complex analysis may be used to prove this, for now just
5314 check that the variable appears in the original program and that it
5315 is computed in a type that guarantees no overflows. */
5316 cand_type = TREE_TYPE (cand->iv->base);
5317 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5318 return false;
5319
5320 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5321 the calculation of the BOUND could overflow, making the comparison
5322 invalid. */
5323 if (!data->loop_single_exit_p)
5324 return false;
5325
5326 /* We need to be able to decide whether candidate is increasing or decreasing
5327 in order to choose the right comparison operator. */
5328 if (!cst_and_fits_in_hwi (cand->iv->step))
5329 return false;
5330 step = int_cst_value (cand->iv->step);
5331
5332 /* Check that the number of iterations matches the expected pattern:
5333 a + 1 > b ? 0 : b - a - 1. */
5334 mbz = niter->may_be_zero;
5335 if (TREE_CODE (mbz) == GT_EXPR)
5336 {
5337 /* Handle a + 1 > b. */
5338 tree op0 = TREE_OPERAND (mbz, 0);
5339 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5340 {
5341 a = TREE_OPERAND (op0, 0);
5342 b = TREE_OPERAND (mbz, 1);
5343 }
5344 else
5345 return false;
5346 }
5347 else if (TREE_CODE (mbz) == LT_EXPR)
5348 {
5349 tree op1 = TREE_OPERAND (mbz, 1);
5350
5351 /* Handle b < a + 1. */
5352 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5353 {
5354 a = TREE_OPERAND (op1, 0);
5355 b = TREE_OPERAND (mbz, 0);
5356 }
5357 else
5358 return false;
5359 }
5360 else
5361 return false;
5362
5363 /* Expected number of iterations is B - A - 1. Check that it matches
5364 the actual number, i.e., that B - A - NITER = 1. */
5365 tree_to_aff_combination (niter->niter, nit_type, &nit);
5366 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5367 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5368 aff_combination_scale (&nit, -1);
5369 aff_combination_scale (&tmpa, -1);
5370 aff_combination_add (&tmpb, &tmpa);
5371 aff_combination_add (&tmpb, &nit);
5372 if (tmpb.n != 0 || tmpb.offset != 1)
5373 return false;
5374
5375 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5376 overflow. */
5377 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5378 cand->iv->step,
5379 fold_convert (TREE_TYPE (cand->iv->step), a));
5380 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5381 return false;
5382
5383 /* Determine the new comparison operator. */
5384 comp = step < 0 ? GT_EXPR : LT_EXPR;
5385 if (*comp_p == NE_EXPR)
5386 *comp_p = comp;
5387 else if (*comp_p == EQ_EXPR)
5388 *comp_p = invert_tree_comparison (comp, false);
5389 else
5390 gcc_unreachable ();
5391
5392 return true;
5393 }
5394
5395 /* Check whether it is possible to express the condition in USE by comparison
5396 of candidate CAND. If so, store the value compared with to BOUND, and the
5397 comparison operator to COMP. */
5398
5399 static bool
5400 may_eliminate_iv (struct ivopts_data *data,
5401 struct iv_use *use, struct iv_cand *cand, tree *bound,
5402 enum tree_code *comp)
5403 {
5404 basic_block ex_bb;
5405 edge exit;
5406 tree period;
5407 struct loop *loop = data->current_loop;
5408 aff_tree bnd;
5409 struct tree_niter_desc *desc = NULL;
5410
5411 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5412 return false;
5413
5414 /* For now works only for exits that dominate the loop latch.
5415 TODO: extend to other conditions inside loop body. */
5416 ex_bb = gimple_bb (use->stmt);
5417 if (use->stmt != last_stmt (ex_bb)
5418 || gimple_code (use->stmt) != GIMPLE_COND
5419 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5420 return false;
5421
5422 exit = EDGE_SUCC (ex_bb, 0);
5423 if (flow_bb_inside_loop_p (loop, exit->dest))
5424 exit = EDGE_SUCC (ex_bb, 1);
5425 if (flow_bb_inside_loop_p (loop, exit->dest))
5426 return false;
5427
5428 desc = niter_for_exit (data, exit);
5429 if (!desc)
5430 return false;
5431
5432 /* Determine whether we can use the variable to test the exit condition.
5433 This is the case iff the period of the induction variable is greater
5434 than the number of iterations for which the exit condition is true. */
5435 period = iv_period (cand->iv);
5436
5437 /* If the number of iterations is constant, compare against it directly. */
5438 if (TREE_CODE (desc->niter) == INTEGER_CST)
5439 {
5440 /* See cand_value_at. */
5441 if (stmt_after_increment (loop, cand, use->stmt))
5442 {
5443 if (!tree_int_cst_lt (desc->niter, period))
5444 return false;
5445 }
5446 else
5447 {
5448 if (tree_int_cst_lt (period, desc->niter))
5449 return false;
5450 }
5451 }
5452
5453 /* If not, and if this is the only possible exit of the loop, see whether
5454 we can get a conservative estimate on the number of iterations of the
5455 entire loop and compare against that instead. */
5456 else
5457 {
5458 widest_int period_value, max_niter;
5459
5460 max_niter = desc->max;
5461 if (stmt_after_increment (loop, cand, use->stmt))
5462 max_niter += 1;
5463 period_value = wi::to_widest (period);
5464 if (wi::gtu_p (max_niter, period_value))
5465 {
5466 /* See if we can take advantage of inferred loop bound information. */
5467 if (data->loop_single_exit_p)
5468 {
5469 if (!max_loop_iterations (loop, &max_niter))
5470 return false;
5471 /* The loop bound is already adjusted by adding 1. */
5472 if (wi::gtu_p (max_niter, period_value))
5473 return false;
5474 }
5475 else
5476 return false;
5477 }
5478 }
5479
5480 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5481
5482 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5483 aff_combination_to_tree (&bnd));
5484 *comp = iv_elimination_compare (data, use);
5485
5486 /* It is unlikely that computing the number of iterations using division
5487 would be more profitable than keeping the original induction variable. */
5488 if (expression_expensive_p (*bound))
5489 return false;
5490
5491 /* Sometimes, it is possible to handle the situation that the number of
5492 iterations may be zero unless additional assumtions by using <
5493 instead of != in the exit condition.
5494
5495 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5496 base the exit condition on it. However, that is often too
5497 expensive. */
5498 if (!integer_zerop (desc->may_be_zero))
5499 return iv_elimination_compare_lt (data, cand, comp, desc);
5500
5501 return true;
5502 }
5503
5504 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5505 be copied, if it is used in the loop body and DATA->body_includes_call. */
5506
5507 static int
5508 parm_decl_cost (struct ivopts_data *data, tree bound)
5509 {
5510 tree sbound = bound;
5511 STRIP_NOPS (sbound);
5512
5513 if (TREE_CODE (sbound) == SSA_NAME
5514 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5515 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5516 && data->body_includes_call)
5517 return COSTS_N_INSNS (1);
5518
5519 return 0;
5520 }
5521
5522 /* Determines cost of basing replacement of USE on CAND in a condition. */
5523
5524 static bool
5525 determine_use_iv_cost_condition (struct ivopts_data *data,
5526 struct iv_use *use, struct iv_cand *cand)
5527 {
5528 tree bound = NULL_TREE;
5529 struct iv *cmp_iv;
5530 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5531 comp_cost elim_cost, express_cost, cost, bound_cost;
5532 bool ok;
5533 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5534 tree *control_var, *bound_cst;
5535 enum tree_code comp = ERROR_MARK;
5536
5537 /* Only consider real candidates. */
5538 if (!cand->iv)
5539 {
5540 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5541 ERROR_MARK, -1);
5542 return false;
5543 }
5544
5545 /* Try iv elimination. */
5546 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5547 {
5548 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5549 if (elim_cost.cost == 0)
5550 elim_cost.cost = parm_decl_cost (data, bound);
5551 else if (TREE_CODE (bound) == INTEGER_CST)
5552 elim_cost.cost = 0;
5553 /* If we replace a loop condition 'i < n' with 'p < base + n',
5554 depends_on_elim will have 'base' and 'n' set, which implies
5555 that both 'base' and 'n' will be live during the loop. More likely,
5556 'base + n' will be loop invariant, resulting in only one live value
5557 during the loop. So in that case we clear depends_on_elim and set
5558 elim_inv_expr_id instead. */
5559 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5560 {
5561 elim_inv_expr_id = get_expr_id (data, bound);
5562 bitmap_clear (depends_on_elim);
5563 }
5564 /* The bound is a loop invariant, so it will be only computed
5565 once. */
5566 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5567 }
5568 else
5569 elim_cost = infinite_cost;
5570
5571 /* Try expressing the original giv. If it is compared with an invariant,
5572 note that we cannot get rid of it. */
5573 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5574 NULL, &cmp_iv);
5575 gcc_assert (ok);
5576
5577 /* When the condition is a comparison of the candidate IV against
5578 zero, prefer this IV.
5579
5580 TODO: The constant that we're subtracting from the cost should
5581 be target-dependent. This information should be added to the
5582 target costs for each backend. */
5583 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5584 && integer_zerop (*bound_cst)
5585 && (operand_equal_p (*control_var, cand->var_after, 0)
5586 || operand_equal_p (*control_var, cand->var_before, 0)))
5587 elim_cost.cost -= 1;
5588
5589 express_cost = get_computation_cost (data, use, cand, false,
5590 &depends_on_express, NULL,
5591 &express_inv_expr_id);
5592 fd_ivopts_data = data;
5593 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5594
5595 /* Count the cost of the original bound as well. */
5596 bound_cost = force_var_cost (data, *bound_cst, NULL);
5597 if (bound_cost.cost == 0)
5598 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5599 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5600 bound_cost.cost = 0;
5601 express_cost.cost += bound_cost.cost;
5602
5603 /* Choose the better approach, preferring the eliminated IV. */
5604 if (compare_costs (elim_cost, express_cost) <= 0)
5605 {
5606 cost = elim_cost;
5607 depends_on = depends_on_elim;
5608 depends_on_elim = NULL;
5609 inv_expr_id = elim_inv_expr_id;
5610 }
5611 else
5612 {
5613 cost = express_cost;
5614 depends_on = depends_on_express;
5615 depends_on_express = NULL;
5616 bound = NULL_TREE;
5617 comp = ERROR_MARK;
5618 inv_expr_id = express_inv_expr_id;
5619 }
5620
5621 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5622
5623 if (depends_on_elim)
5624 BITMAP_FREE (depends_on_elim);
5625 if (depends_on_express)
5626 BITMAP_FREE (depends_on_express);
5627
5628 return !infinite_cost_p (cost);
5629 }
5630
5631 /* Determines cost of basing replacement of USE on CAND. Returns false
5632 if USE cannot be based on CAND. */
5633
5634 static bool
5635 determine_use_iv_cost (struct ivopts_data *data,
5636 struct iv_use *use, struct iv_cand *cand)
5637 {
5638 switch (use->type)
5639 {
5640 case USE_NONLINEAR_EXPR:
5641 return determine_use_iv_cost_generic (data, use, cand);
5642
5643 case USE_ADDRESS:
5644 return determine_use_iv_cost_address (data, use, cand);
5645
5646 case USE_COMPARE:
5647 return determine_use_iv_cost_condition (data, use, cand);
5648
5649 default:
5650 gcc_unreachable ();
5651 }
5652 }
5653
5654 /* Return true if get_computation_cost indicates that autoincrement is
5655 a possibility for the pair of USE and CAND, false otherwise. */
5656
5657 static bool
5658 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5659 struct iv_cand *cand)
5660 {
5661 bitmap depends_on;
5662 bool can_autoinc;
5663 comp_cost cost;
5664
5665 if (use->type != USE_ADDRESS)
5666 return false;
5667
5668 cost = get_computation_cost (data, use, cand, true, &depends_on,
5669 &can_autoinc, NULL);
5670
5671 BITMAP_FREE (depends_on);
5672
5673 return !infinite_cost_p (cost) && can_autoinc;
5674 }
5675
5676 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5677 use that allows autoincrement, and set their AINC_USE if possible. */
5678
5679 static void
5680 set_autoinc_for_original_candidates (struct ivopts_data *data)
5681 {
5682 unsigned i, j;
5683
5684 for (i = 0; i < n_iv_cands (data); i++)
5685 {
5686 struct iv_cand *cand = iv_cand (data, i);
5687 struct iv_use *closest_before = NULL;
5688 struct iv_use *closest_after = NULL;
5689 if (cand->pos != IP_ORIGINAL)
5690 continue;
5691
5692 for (j = 0; j < n_iv_uses (data); j++)
5693 {
5694 struct iv_use *use = iv_use (data, j);
5695 unsigned uid = gimple_uid (use->stmt);
5696
5697 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5698 continue;
5699
5700 if (uid < gimple_uid (cand->incremented_at)
5701 && (closest_before == NULL
5702 || uid > gimple_uid (closest_before->stmt)))
5703 closest_before = use;
5704
5705 if (uid > gimple_uid (cand->incremented_at)
5706 && (closest_after == NULL
5707 || uid < gimple_uid (closest_after->stmt)))
5708 closest_after = use;
5709 }
5710
5711 if (closest_before != NULL
5712 && autoinc_possible_for_pair (data, closest_before, cand))
5713 cand->ainc_use = closest_before;
5714 else if (closest_after != NULL
5715 && autoinc_possible_for_pair (data, closest_after, cand))
5716 cand->ainc_use = closest_after;
5717 }
5718 }
5719
5720 /* Finds the candidates for the induction variables. */
5721
5722 static void
5723 find_iv_candidates (struct ivopts_data *data)
5724 {
5725 /* Add commonly used ivs. */
5726 add_standard_iv_candidates (data);
5727
5728 /* Add old induction variables. */
5729 add_iv_candidate_for_bivs (data);
5730
5731 /* Add induction variables derived from uses. */
5732 add_iv_candidate_for_uses (data);
5733
5734 set_autoinc_for_original_candidates (data);
5735
5736 /* Record the important candidates. */
5737 record_important_candidates (data);
5738 }
5739
5740 /* Determines costs of basing the use of the iv on an iv candidate. */
5741
5742 static void
5743 determine_use_iv_costs (struct ivopts_data *data)
5744 {
5745 unsigned i, j;
5746 struct iv_use *use;
5747 struct iv_cand *cand;
5748 bitmap to_clear = BITMAP_ALLOC (NULL);
5749
5750 alloc_use_cost_map (data);
5751
5752 for (i = 0; i < n_iv_uses (data); i++)
5753 {
5754 use = iv_use (data, i);
5755
5756 if (data->consider_all_candidates)
5757 {
5758 for (j = 0; j < n_iv_cands (data); j++)
5759 {
5760 cand = iv_cand (data, j);
5761 determine_use_iv_cost (data, use, cand);
5762 }
5763 }
5764 else
5765 {
5766 bitmap_iterator bi;
5767
5768 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5769 {
5770 cand = iv_cand (data, j);
5771 if (!determine_use_iv_cost (data, use, cand))
5772 bitmap_set_bit (to_clear, j);
5773 }
5774
5775 /* Remove the candidates for that the cost is infinite from
5776 the list of related candidates. */
5777 bitmap_and_compl_into (use->related_cands, to_clear);
5778 bitmap_clear (to_clear);
5779 }
5780 }
5781
5782 BITMAP_FREE (to_clear);
5783
5784 if (dump_file && (dump_flags & TDF_DETAILS))
5785 {
5786 fprintf (dump_file, "Use-candidate costs:\n");
5787
5788 for (i = 0; i < n_iv_uses (data); i++)
5789 {
5790 use = iv_use (data, i);
5791
5792 fprintf (dump_file, "Use %d:\n", i);
5793 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5794 for (j = 0; j < use->n_map_members; j++)
5795 {
5796 if (!use->cost_map[j].cand
5797 || infinite_cost_p (use->cost_map[j].cost))
5798 continue;
5799
5800 fprintf (dump_file, " %d\t%d\t%d\t",
5801 use->cost_map[j].cand->id,
5802 use->cost_map[j].cost.cost,
5803 use->cost_map[j].cost.complexity);
5804 if (use->cost_map[j].depends_on)
5805 bitmap_print (dump_file,
5806 use->cost_map[j].depends_on, "","");
5807 if (use->cost_map[j].inv_expr_id != -1)
5808 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5809 fprintf (dump_file, "\n");
5810 }
5811
5812 fprintf (dump_file, "\n");
5813 }
5814 fprintf (dump_file, "\n");
5815 }
5816 }
5817
5818 /* Determines cost of the candidate CAND. */
5819
5820 static void
5821 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5822 {
5823 comp_cost cost_base;
5824 unsigned cost, cost_step;
5825 tree base;
5826
5827 if (!cand->iv)
5828 {
5829 cand->cost = 0;
5830 return;
5831 }
5832
5833 /* There are two costs associated with the candidate -- its increment
5834 and its initialization. The second is almost negligible for any loop
5835 that rolls enough, so we take it just very little into account. */
5836
5837 base = cand->iv->base;
5838 cost_base = force_var_cost (data, base, NULL);
5839 /* It will be exceptional that the iv register happens to be initialized with
5840 the proper value at no cost. In general, there will at least be a regcopy
5841 or a const set. */
5842 if (cost_base.cost == 0)
5843 cost_base.cost = COSTS_N_INSNS (1);
5844 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5845
5846 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5847
5848 /* Prefer the original ivs unless we may gain something by replacing it.
5849 The reason is to make debugging simpler; so this is not relevant for
5850 artificial ivs created by other optimization passes. */
5851 if (cand->pos != IP_ORIGINAL
5852 || !SSA_NAME_VAR (cand->var_before)
5853 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5854 cost++;
5855
5856 /* Prefer not to insert statements into latch unless there are some
5857 already (so that we do not create unnecessary jumps). */
5858 if (cand->pos == IP_END
5859 && empty_block_p (ip_end_pos (data->current_loop)))
5860 cost++;
5861
5862 cand->cost = cost;
5863 cand->cost_step = cost_step;
5864 }
5865
5866 /* Determines costs of computation of the candidates. */
5867
5868 static void
5869 determine_iv_costs (struct ivopts_data *data)
5870 {
5871 unsigned i;
5872
5873 if (dump_file && (dump_flags & TDF_DETAILS))
5874 {
5875 fprintf (dump_file, "Candidate costs:\n");
5876 fprintf (dump_file, " cand\tcost\n");
5877 }
5878
5879 for (i = 0; i < n_iv_cands (data); i++)
5880 {
5881 struct iv_cand *cand = iv_cand (data, i);
5882
5883 determine_iv_cost (data, cand);
5884
5885 if (dump_file && (dump_flags & TDF_DETAILS))
5886 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5887 }
5888
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5890 fprintf (dump_file, "\n");
5891 }
5892
5893 /* Calculates cost for having SIZE induction variables. */
5894
5895 static unsigned
5896 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5897 {
5898 /* We add size to the cost, so that we prefer eliminating ivs
5899 if possible. */
5900 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5901 data->body_includes_call);
5902 }
5903
5904 /* For each size of the induction variable set determine the penalty. */
5905
5906 static void
5907 determine_set_costs (struct ivopts_data *data)
5908 {
5909 unsigned j, n;
5910 gphi *phi;
5911 gphi_iterator psi;
5912 tree op;
5913 struct loop *loop = data->current_loop;
5914 bitmap_iterator bi;
5915
5916 if (dump_file && (dump_flags & TDF_DETAILS))
5917 {
5918 fprintf (dump_file, "Global costs:\n");
5919 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5920 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5921 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5922 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5923 }
5924
5925 n = 0;
5926 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5927 {
5928 phi = psi.phi ();
5929 op = PHI_RESULT (phi);
5930
5931 if (virtual_operand_p (op))
5932 continue;
5933
5934 if (get_iv (data, op))
5935 continue;
5936
5937 n++;
5938 }
5939
5940 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5941 {
5942 struct version_info *info = ver_info (data, j);
5943
5944 if (info->inv_id && info->has_nonlin_use)
5945 n++;
5946 }
5947
5948 data->regs_used = n;
5949 if (dump_file && (dump_flags & TDF_DETAILS))
5950 fprintf (dump_file, " regs_used %d\n", n);
5951
5952 if (dump_file && (dump_flags & TDF_DETAILS))
5953 {
5954 fprintf (dump_file, " cost for size:\n");
5955 fprintf (dump_file, " ivs\tcost\n");
5956 for (j = 0; j <= 2 * target_avail_regs; j++)
5957 fprintf (dump_file, " %d\t%d\n", j,
5958 ivopts_global_cost_for_size (data, j));
5959 fprintf (dump_file, "\n");
5960 }
5961 }
5962
5963 /* Returns true if A is a cheaper cost pair than B. */
5964
5965 static bool
5966 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5967 {
5968 int cmp;
5969
5970 if (!a)
5971 return false;
5972
5973 if (!b)
5974 return true;
5975
5976 cmp = compare_costs (a->cost, b->cost);
5977 if (cmp < 0)
5978 return true;
5979
5980 if (cmp > 0)
5981 return false;
5982
5983 /* In case the costs are the same, prefer the cheaper candidate. */
5984 if (a->cand->cost < b->cand->cost)
5985 return true;
5986
5987 return false;
5988 }
5989
5990
5991 /* Returns candidate by that USE is expressed in IVS. */
5992
5993 static struct cost_pair *
5994 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5995 {
5996 return ivs->cand_for_use[use->id];
5997 }
5998
5999 /* Computes the cost field of IVS structure. */
6000
6001 static void
6002 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6003 {
6004 comp_cost cost = ivs->cand_use_cost;
6005
6006 cost.cost += ivs->cand_cost;
6007
6008 cost.cost += ivopts_global_cost_for_size (data,
6009 ivs->n_regs + ivs->num_used_inv_expr);
6010
6011 ivs->cost = cost;
6012 }
6013
6014 /* Remove invariants in set INVS to set IVS. */
6015
6016 static void
6017 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6018 {
6019 bitmap_iterator bi;
6020 unsigned iid;
6021
6022 if (!invs)
6023 return;
6024
6025 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6026 {
6027 ivs->n_invariant_uses[iid]--;
6028 if (ivs->n_invariant_uses[iid] == 0)
6029 ivs->n_regs--;
6030 }
6031 }
6032
6033 /* Set USE not to be expressed by any candidate in IVS. */
6034
6035 static void
6036 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6037 struct iv_use *use)
6038 {
6039 unsigned uid = use->id, cid;
6040 struct cost_pair *cp;
6041
6042 cp = ivs->cand_for_use[uid];
6043 if (!cp)
6044 return;
6045 cid = cp->cand->id;
6046
6047 ivs->bad_uses++;
6048 ivs->cand_for_use[uid] = NULL;
6049 ivs->n_cand_uses[cid]--;
6050
6051 if (ivs->n_cand_uses[cid] == 0)
6052 {
6053 bitmap_clear_bit (ivs->cands, cid);
6054 /* Do not count the pseudocandidates. */
6055 if (cp->cand->iv)
6056 ivs->n_regs--;
6057 ivs->n_cands--;
6058 ivs->cand_cost -= cp->cand->cost;
6059
6060 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6061 }
6062
6063 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6064
6065 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6066
6067 if (cp->inv_expr_id != -1)
6068 {
6069 ivs->used_inv_expr[cp->inv_expr_id]--;
6070 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6071 ivs->num_used_inv_expr--;
6072 }
6073 iv_ca_recount_cost (data, ivs);
6074 }
6075
6076 /* Add invariants in set INVS to set IVS. */
6077
6078 static void
6079 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6080 {
6081 bitmap_iterator bi;
6082 unsigned iid;
6083
6084 if (!invs)
6085 return;
6086
6087 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6088 {
6089 ivs->n_invariant_uses[iid]++;
6090 if (ivs->n_invariant_uses[iid] == 1)
6091 ivs->n_regs++;
6092 }
6093 }
6094
6095 /* Set cost pair for USE in set IVS to CP. */
6096
6097 static void
6098 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6099 struct iv_use *use, struct cost_pair *cp)
6100 {
6101 unsigned uid = use->id, cid;
6102
6103 if (ivs->cand_for_use[uid] == cp)
6104 return;
6105
6106 if (ivs->cand_for_use[uid])
6107 iv_ca_set_no_cp (data, ivs, use);
6108
6109 if (cp)
6110 {
6111 cid = cp->cand->id;
6112
6113 ivs->bad_uses--;
6114 ivs->cand_for_use[uid] = cp;
6115 ivs->n_cand_uses[cid]++;
6116 if (ivs->n_cand_uses[cid] == 1)
6117 {
6118 bitmap_set_bit (ivs->cands, cid);
6119 /* Do not count the pseudocandidates. */
6120 if (cp->cand->iv)
6121 ivs->n_regs++;
6122 ivs->n_cands++;
6123 ivs->cand_cost += cp->cand->cost;
6124
6125 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6126 }
6127
6128 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6129 iv_ca_set_add_invariants (ivs, cp->depends_on);
6130
6131 if (cp->inv_expr_id != -1)
6132 {
6133 ivs->used_inv_expr[cp->inv_expr_id]++;
6134 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6135 ivs->num_used_inv_expr++;
6136 }
6137 iv_ca_recount_cost (data, ivs);
6138 }
6139 }
6140
6141 /* Extend set IVS by expressing USE by some of the candidates in it
6142 if possible. Consider all important candidates if candidates in
6143 set IVS don't give any result. */
6144
6145 static void
6146 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6147 struct iv_use *use)
6148 {
6149 struct cost_pair *best_cp = NULL, *cp;
6150 bitmap_iterator bi;
6151 unsigned i;
6152 struct iv_cand *cand;
6153
6154 gcc_assert (ivs->upto >= use->id);
6155 ivs->upto++;
6156 ivs->bad_uses++;
6157
6158 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6159 {
6160 cand = iv_cand (data, i);
6161 cp = get_use_iv_cost (data, use, cand);
6162 if (cheaper_cost_pair (cp, best_cp))
6163 best_cp = cp;
6164 }
6165
6166 if (best_cp == NULL)
6167 {
6168 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6169 {
6170 cand = iv_cand (data, i);
6171 cp = get_use_iv_cost (data, use, cand);
6172 if (cheaper_cost_pair (cp, best_cp))
6173 best_cp = cp;
6174 }
6175 }
6176
6177 iv_ca_set_cp (data, ivs, use, best_cp);
6178 }
6179
6180 /* Get cost for assignment IVS. */
6181
6182 static comp_cost
6183 iv_ca_cost (struct iv_ca *ivs)
6184 {
6185 /* This was a conditional expression but it triggered a bug in
6186 Sun C 5.5. */
6187 if (ivs->bad_uses)
6188 return infinite_cost;
6189 else
6190 return ivs->cost;
6191 }
6192
6193 /* Returns true if all dependences of CP are among invariants in IVS. */
6194
6195 static bool
6196 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6197 {
6198 unsigned i;
6199 bitmap_iterator bi;
6200
6201 if (!cp->depends_on)
6202 return true;
6203
6204 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6205 {
6206 if (ivs->n_invariant_uses[i] == 0)
6207 return false;
6208 }
6209
6210 return true;
6211 }
6212
6213 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6214 it before NEXT_CHANGE. */
6215
6216 static struct iv_ca_delta *
6217 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6218 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6219 {
6220 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6221
6222 change->use = use;
6223 change->old_cp = old_cp;
6224 change->new_cp = new_cp;
6225 change->next_change = next_change;
6226
6227 return change;
6228 }
6229
6230 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6231 are rewritten. */
6232
6233 static struct iv_ca_delta *
6234 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6235 {
6236 struct iv_ca_delta *last;
6237
6238 if (!l2)
6239 return l1;
6240
6241 if (!l1)
6242 return l2;
6243
6244 for (last = l1; last->next_change; last = last->next_change)
6245 continue;
6246 last->next_change = l2;
6247
6248 return l1;
6249 }
6250
6251 /* Reverse the list of changes DELTA, forming the inverse to it. */
6252
6253 static struct iv_ca_delta *
6254 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6255 {
6256 struct iv_ca_delta *act, *next, *prev = NULL;
6257
6258 for (act = delta; act; act = next)
6259 {
6260 next = act->next_change;
6261 act->next_change = prev;
6262 prev = act;
6263
6264 std::swap (act->old_cp, act->new_cp);
6265 }
6266
6267 return prev;
6268 }
6269
6270 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6271 reverted instead. */
6272
6273 static void
6274 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6275 struct iv_ca_delta *delta, bool forward)
6276 {
6277 struct cost_pair *from, *to;
6278 struct iv_ca_delta *act;
6279
6280 if (!forward)
6281 delta = iv_ca_delta_reverse (delta);
6282
6283 for (act = delta; act; act = act->next_change)
6284 {
6285 from = act->old_cp;
6286 to = act->new_cp;
6287 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6288 iv_ca_set_cp (data, ivs, act->use, to);
6289 }
6290
6291 if (!forward)
6292 iv_ca_delta_reverse (delta);
6293 }
6294
6295 /* Returns true if CAND is used in IVS. */
6296
6297 static bool
6298 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6299 {
6300 return ivs->n_cand_uses[cand->id] > 0;
6301 }
6302
6303 /* Returns number of induction variable candidates in the set IVS. */
6304
6305 static unsigned
6306 iv_ca_n_cands (struct iv_ca *ivs)
6307 {
6308 return ivs->n_cands;
6309 }
6310
6311 /* Free the list of changes DELTA. */
6312
6313 static void
6314 iv_ca_delta_free (struct iv_ca_delta **delta)
6315 {
6316 struct iv_ca_delta *act, *next;
6317
6318 for (act = *delta; act; act = next)
6319 {
6320 next = act->next_change;
6321 free (act);
6322 }
6323
6324 *delta = NULL;
6325 }
6326
6327 /* Allocates new iv candidates assignment. */
6328
6329 static struct iv_ca *
6330 iv_ca_new (struct ivopts_data *data)
6331 {
6332 struct iv_ca *nw = XNEW (struct iv_ca);
6333
6334 nw->upto = 0;
6335 nw->bad_uses = 0;
6336 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6337 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6338 nw->cands = BITMAP_ALLOC (NULL);
6339 nw->n_cands = 0;
6340 nw->n_regs = 0;
6341 nw->cand_use_cost = no_cost;
6342 nw->cand_cost = 0;
6343 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6344 nw->cost = no_cost;
6345 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6346 nw->num_used_inv_expr = 0;
6347
6348 return nw;
6349 }
6350
6351 /* Free memory occupied by the set IVS. */
6352
6353 static void
6354 iv_ca_free (struct iv_ca **ivs)
6355 {
6356 free ((*ivs)->cand_for_use);
6357 free ((*ivs)->n_cand_uses);
6358 BITMAP_FREE ((*ivs)->cands);
6359 free ((*ivs)->n_invariant_uses);
6360 free ((*ivs)->used_inv_expr);
6361 free (*ivs);
6362 *ivs = NULL;
6363 }
6364
6365 /* Dumps IVS to FILE. */
6366
6367 static void
6368 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6369 {
6370 const char *pref = " invariants ";
6371 unsigned i;
6372 comp_cost cost = iv_ca_cost (ivs);
6373
6374 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6375 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6376 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6377 bitmap_print (file, ivs->cands, " candidates: ","\n");
6378
6379 for (i = 0; i < ivs->upto; i++)
6380 {
6381 struct iv_use *use = iv_use (data, i);
6382 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6383 if (cp)
6384 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6385 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6386 else
6387 fprintf (file, " use:%d --> ??\n", use->id);
6388 }
6389
6390 for (i = 1; i <= data->max_inv_id; i++)
6391 if (ivs->n_invariant_uses[i])
6392 {
6393 fprintf (file, "%s%d", pref, i);
6394 pref = ", ";
6395 }
6396 fprintf (file, "\n\n");
6397 }
6398
6399 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6400 new set, and store differences in DELTA. Number of induction variables
6401 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6402 the function will try to find a solution with mimimal iv candidates. */
6403
6404 static comp_cost
6405 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6406 struct iv_cand *cand, struct iv_ca_delta **delta,
6407 unsigned *n_ivs, bool min_ncand)
6408 {
6409 unsigned i;
6410 comp_cost cost;
6411 struct iv_use *use;
6412 struct cost_pair *old_cp, *new_cp;
6413
6414 *delta = NULL;
6415 for (i = 0; i < ivs->upto; i++)
6416 {
6417 use = iv_use (data, i);
6418 old_cp = iv_ca_cand_for_use (ivs, use);
6419
6420 if (old_cp
6421 && old_cp->cand == cand)
6422 continue;
6423
6424 new_cp = get_use_iv_cost (data, use, cand);
6425 if (!new_cp)
6426 continue;
6427
6428 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6429 continue;
6430
6431 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6432 continue;
6433
6434 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6435 }
6436
6437 iv_ca_delta_commit (data, ivs, *delta, true);
6438 cost = iv_ca_cost (ivs);
6439 if (n_ivs)
6440 *n_ivs = iv_ca_n_cands (ivs);
6441 iv_ca_delta_commit (data, ivs, *delta, false);
6442
6443 return cost;
6444 }
6445
6446 /* Try narrowing set IVS by removing CAND. Return the cost of
6447 the new set and store the differences in DELTA. START is
6448 the candidate with which we start narrowing. */
6449
6450 static comp_cost
6451 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6452 struct iv_cand *cand, struct iv_cand *start,
6453 struct iv_ca_delta **delta)
6454 {
6455 unsigned i, ci;
6456 struct iv_use *use;
6457 struct cost_pair *old_cp, *new_cp, *cp;
6458 bitmap_iterator bi;
6459 struct iv_cand *cnd;
6460 comp_cost cost, best_cost, acost;
6461
6462 *delta = NULL;
6463 for (i = 0; i < n_iv_uses (data); i++)
6464 {
6465 use = iv_use (data, i);
6466
6467 old_cp = iv_ca_cand_for_use (ivs, use);
6468 if (old_cp->cand != cand)
6469 continue;
6470
6471 best_cost = iv_ca_cost (ivs);
6472 /* Start narrowing with START. */
6473 new_cp = get_use_iv_cost (data, use, start);
6474
6475 if (data->consider_all_candidates)
6476 {
6477 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6478 {
6479 if (ci == cand->id || (start && ci == start->id))
6480 continue;
6481
6482 cnd = iv_cand (data, ci);
6483
6484 cp = get_use_iv_cost (data, use, cnd);
6485 if (!cp)
6486 continue;
6487
6488 iv_ca_set_cp (data, ivs, use, cp);
6489 acost = iv_ca_cost (ivs);
6490
6491 if (compare_costs (acost, best_cost) < 0)
6492 {
6493 best_cost = acost;
6494 new_cp = cp;
6495 }
6496 }
6497 }
6498 else
6499 {
6500 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6501 {
6502 if (ci == cand->id || (start && ci == start->id))
6503 continue;
6504
6505 cnd = iv_cand (data, ci);
6506
6507 cp = get_use_iv_cost (data, use, cnd);
6508 if (!cp)
6509 continue;
6510
6511 iv_ca_set_cp (data, ivs, use, cp);
6512 acost = iv_ca_cost (ivs);
6513
6514 if (compare_costs (acost, best_cost) < 0)
6515 {
6516 best_cost = acost;
6517 new_cp = cp;
6518 }
6519 }
6520 }
6521 /* Restore to old cp for use. */
6522 iv_ca_set_cp (data, ivs, use, old_cp);
6523
6524 if (!new_cp)
6525 {
6526 iv_ca_delta_free (delta);
6527 return infinite_cost;
6528 }
6529
6530 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6531 }
6532
6533 iv_ca_delta_commit (data, ivs, *delta, true);
6534 cost = iv_ca_cost (ivs);
6535 iv_ca_delta_commit (data, ivs, *delta, false);
6536
6537 return cost;
6538 }
6539
6540 /* Try optimizing the set of candidates IVS by removing candidates different
6541 from to EXCEPT_CAND from it. Return cost of the new set, and store
6542 differences in DELTA. */
6543
6544 static comp_cost
6545 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6546 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6547 {
6548 bitmap_iterator bi;
6549 struct iv_ca_delta *act_delta, *best_delta;
6550 unsigned i;
6551 comp_cost best_cost, acost;
6552 struct iv_cand *cand;
6553
6554 best_delta = NULL;
6555 best_cost = iv_ca_cost (ivs);
6556
6557 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6558 {
6559 cand = iv_cand (data, i);
6560
6561 if (cand == except_cand)
6562 continue;
6563
6564 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6565
6566 if (compare_costs (acost, best_cost) < 0)
6567 {
6568 best_cost = acost;
6569 iv_ca_delta_free (&best_delta);
6570 best_delta = act_delta;
6571 }
6572 else
6573 iv_ca_delta_free (&act_delta);
6574 }
6575
6576 if (!best_delta)
6577 {
6578 *delta = NULL;
6579 return best_cost;
6580 }
6581
6582 /* Recurse to possibly remove other unnecessary ivs. */
6583 iv_ca_delta_commit (data, ivs, best_delta, true);
6584 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6585 iv_ca_delta_commit (data, ivs, best_delta, false);
6586 *delta = iv_ca_delta_join (best_delta, *delta);
6587 return best_cost;
6588 }
6589
6590 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6591 cheaper local cost for USE than BEST_CP. Return pointer to
6592 the corresponding cost_pair, otherwise just return BEST_CP. */
6593
6594 static struct cost_pair*
6595 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6596 unsigned int cand_idx, struct iv_cand *old_cand,
6597 struct cost_pair *best_cp)
6598 {
6599 struct iv_cand *cand;
6600 struct cost_pair *cp;
6601
6602 gcc_assert (old_cand != NULL && best_cp != NULL);
6603 if (cand_idx == old_cand->id)
6604 return best_cp;
6605
6606 cand = iv_cand (data, cand_idx);
6607 cp = get_use_iv_cost (data, use, cand);
6608 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6609 return cp;
6610
6611 return best_cp;
6612 }
6613
6614 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6615 which are used by more than one iv uses. For each of those candidates,
6616 this function tries to represent iv uses under that candidate using
6617 other ones with lower local cost, then tries to prune the new set.
6618 If the new set has lower cost, It returns the new cost after recording
6619 candidate replacement in list DELTA. */
6620
6621 static comp_cost
6622 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6623 struct iv_ca_delta **delta)
6624 {
6625 bitmap_iterator bi, bj;
6626 unsigned int i, j, k;
6627 struct iv_use *use;
6628 struct iv_cand *cand;
6629 comp_cost orig_cost, acost;
6630 struct iv_ca_delta *act_delta, *tmp_delta;
6631 struct cost_pair *old_cp, *best_cp = NULL;
6632
6633 *delta = NULL;
6634 orig_cost = iv_ca_cost (ivs);
6635
6636 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6637 {
6638 if (ivs->n_cand_uses[i] == 1
6639 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6640 continue;
6641
6642 cand = iv_cand (data, i);
6643
6644 act_delta = NULL;
6645 /* Represent uses under current candidate using other ones with
6646 lower local cost. */
6647 for (j = 0; j < ivs->upto; j++)
6648 {
6649 use = iv_use (data, j);
6650 old_cp = iv_ca_cand_for_use (ivs, use);
6651
6652 if (old_cp->cand != cand)
6653 continue;
6654
6655 best_cp = old_cp;
6656 if (data->consider_all_candidates)
6657 for (k = 0; k < n_iv_cands (data); k++)
6658 best_cp = cheaper_cost_with_cand (data, use, k,
6659 old_cp->cand, best_cp);
6660 else
6661 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6662 best_cp = cheaper_cost_with_cand (data, use, k,
6663 old_cp->cand, best_cp);
6664
6665 if (best_cp == old_cp)
6666 continue;
6667
6668 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6669 }
6670 /* No need for further prune. */
6671 if (!act_delta)
6672 continue;
6673
6674 /* Prune the new candidate set. */
6675 iv_ca_delta_commit (data, ivs, act_delta, true);
6676 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6677 iv_ca_delta_commit (data, ivs, act_delta, false);
6678 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6679
6680 if (compare_costs (acost, orig_cost) < 0)
6681 {
6682 *delta = act_delta;
6683 return acost;
6684 }
6685 else
6686 iv_ca_delta_free (&act_delta);
6687 }
6688
6689 return orig_cost;
6690 }
6691
6692 /* Tries to extend the sets IVS in the best possible way in order
6693 to express the USE. If ORIGINALP is true, prefer candidates from
6694 the original set of IVs, otherwise favor important candidates not
6695 based on any memory object. */
6696
6697 static bool
6698 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6699 struct iv_use *use, bool originalp)
6700 {
6701 comp_cost best_cost, act_cost;
6702 unsigned i;
6703 bitmap_iterator bi;
6704 struct iv_cand *cand;
6705 struct iv_ca_delta *best_delta = NULL, *act_delta;
6706 struct cost_pair *cp;
6707
6708 iv_ca_add_use (data, ivs, use);
6709 best_cost = iv_ca_cost (ivs);
6710 cp = iv_ca_cand_for_use (ivs, use);
6711 if (cp)
6712 {
6713 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6714 iv_ca_set_no_cp (data, ivs, use);
6715 }
6716
6717 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6718 first try important candidates not based on any memory object. Only if
6719 this fails, try the specific ones. Rationale -- in loops with many
6720 variables the best choice often is to use just one generic biv. If we
6721 added here many ivs specific to the uses, the optimization algorithm later
6722 would be likely to get stuck in a local minimum, thus causing us to create
6723 too many ivs. The approach from few ivs to more seems more likely to be
6724 successful -- starting from few ivs, replacing an expensive use by a
6725 specific iv should always be a win. */
6726 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6727 {
6728 cand = iv_cand (data, i);
6729
6730 if (originalp && cand->pos !=IP_ORIGINAL)
6731 continue;
6732
6733 if (!originalp && cand->iv->base_object != NULL_TREE)
6734 continue;
6735
6736 if (iv_ca_cand_used_p (ivs, cand))
6737 continue;
6738
6739 cp = get_use_iv_cost (data, use, cand);
6740 if (!cp)
6741 continue;
6742
6743 iv_ca_set_cp (data, ivs, use, cp);
6744 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6745 true);
6746 iv_ca_set_no_cp (data, ivs, use);
6747 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6748
6749 if (compare_costs (act_cost, best_cost) < 0)
6750 {
6751 best_cost = act_cost;
6752
6753 iv_ca_delta_free (&best_delta);
6754 best_delta = act_delta;
6755 }
6756 else
6757 iv_ca_delta_free (&act_delta);
6758 }
6759
6760 if (infinite_cost_p (best_cost))
6761 {
6762 for (i = 0; i < use->n_map_members; i++)
6763 {
6764 cp = use->cost_map + i;
6765 cand = cp->cand;
6766 if (!cand)
6767 continue;
6768
6769 /* Already tried this. */
6770 if (cand->important)
6771 {
6772 if (originalp && cand->pos == IP_ORIGINAL)
6773 continue;
6774 if (!originalp && cand->iv->base_object == NULL_TREE)
6775 continue;
6776 }
6777
6778 if (iv_ca_cand_used_p (ivs, cand))
6779 continue;
6780
6781 act_delta = NULL;
6782 iv_ca_set_cp (data, ivs, use, cp);
6783 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6784 iv_ca_set_no_cp (data, ivs, use);
6785 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6786 cp, act_delta);
6787
6788 if (compare_costs (act_cost, best_cost) < 0)
6789 {
6790 best_cost = act_cost;
6791
6792 if (best_delta)
6793 iv_ca_delta_free (&best_delta);
6794 best_delta = act_delta;
6795 }
6796 else
6797 iv_ca_delta_free (&act_delta);
6798 }
6799 }
6800
6801 iv_ca_delta_commit (data, ivs, best_delta, true);
6802 iv_ca_delta_free (&best_delta);
6803
6804 return !infinite_cost_p (best_cost);
6805 }
6806
6807 /* Finds an initial assignment of candidates to uses. */
6808
6809 static struct iv_ca *
6810 get_initial_solution (struct ivopts_data *data, bool originalp)
6811 {
6812 struct iv_ca *ivs = iv_ca_new (data);
6813 unsigned i;
6814
6815 for (i = 0; i < n_iv_uses (data); i++)
6816 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6817 {
6818 iv_ca_free (&ivs);
6819 return NULL;
6820 }
6821
6822 return ivs;
6823 }
6824
6825 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6826 points to a bool variable, this function tries to break local
6827 optimal fixed-point by replacing candidates in IVS if it's true. */
6828
6829 static bool
6830 try_improve_iv_set (struct ivopts_data *data,
6831 struct iv_ca *ivs, bool *try_replace_p)
6832 {
6833 unsigned i, n_ivs;
6834 comp_cost acost, best_cost = iv_ca_cost (ivs);
6835 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6836 struct iv_cand *cand;
6837
6838 /* Try extending the set of induction variables by one. */
6839 for (i = 0; i < n_iv_cands (data); i++)
6840 {
6841 cand = iv_cand (data, i);
6842
6843 if (iv_ca_cand_used_p (ivs, cand))
6844 continue;
6845
6846 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6847 if (!act_delta)
6848 continue;
6849
6850 /* If we successfully added the candidate and the set is small enough,
6851 try optimizing it by removing other candidates. */
6852 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6853 {
6854 iv_ca_delta_commit (data, ivs, act_delta, true);
6855 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6856 iv_ca_delta_commit (data, ivs, act_delta, false);
6857 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6858 }
6859
6860 if (compare_costs (acost, best_cost) < 0)
6861 {
6862 best_cost = acost;
6863 iv_ca_delta_free (&best_delta);
6864 best_delta = act_delta;
6865 }
6866 else
6867 iv_ca_delta_free (&act_delta);
6868 }
6869
6870 if (!best_delta)
6871 {
6872 /* Try removing the candidates from the set instead. */
6873 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6874
6875 if (!best_delta && *try_replace_p)
6876 {
6877 *try_replace_p = false;
6878 /* So far candidate selecting algorithm tends to choose fewer IVs
6879 so that it can handle cases in which loops have many variables
6880 but the best choice is often to use only one general biv. One
6881 weakness is it can't handle opposite cases, in which different
6882 candidates should be chosen with respect to each use. To solve
6883 the problem, we replace candidates in a manner described by the
6884 comments of iv_ca_replace, thus give general algorithm a chance
6885 to break local optimal fixed-point in these cases. */
6886 best_cost = iv_ca_replace (data, ivs, &best_delta);
6887 }
6888
6889 if (!best_delta)
6890 return false;
6891 }
6892
6893 iv_ca_delta_commit (data, ivs, best_delta, true);
6894 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6895 iv_ca_delta_free (&best_delta);
6896 return true;
6897 }
6898
6899 /* Attempts to find the optimal set of induction variables. We do simple
6900 greedy heuristic -- we try to replace at most one candidate in the selected
6901 solution and remove the unused ivs while this improves the cost. */
6902
6903 static struct iv_ca *
6904 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6905 {
6906 struct iv_ca *set;
6907 bool try_replace_p = true;
6908
6909 /* Get the initial solution. */
6910 set = get_initial_solution (data, originalp);
6911 if (!set)
6912 {
6913 if (dump_file && (dump_flags & TDF_DETAILS))
6914 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6915 return NULL;
6916 }
6917
6918 if (dump_file && (dump_flags & TDF_DETAILS))
6919 {
6920 fprintf (dump_file, "Initial set of candidates:\n");
6921 iv_ca_dump (data, dump_file, set);
6922 }
6923
6924 while (try_improve_iv_set (data, set, &try_replace_p))
6925 {
6926 if (dump_file && (dump_flags & TDF_DETAILS))
6927 {
6928 fprintf (dump_file, "Improved to:\n");
6929 iv_ca_dump (data, dump_file, set);
6930 }
6931 }
6932
6933 return set;
6934 }
6935
6936 static struct iv_ca *
6937 find_optimal_iv_set (struct ivopts_data *data)
6938 {
6939 unsigned i;
6940 struct iv_ca *set, *origset;
6941 struct iv_use *use;
6942 comp_cost cost, origcost;
6943
6944 /* Determine the cost based on a strategy that starts with original IVs,
6945 and try again using a strategy that prefers candidates not based
6946 on any IVs. */
6947 origset = find_optimal_iv_set_1 (data, true);
6948 set = find_optimal_iv_set_1 (data, false);
6949
6950 if (!origset && !set)
6951 return NULL;
6952
6953 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6954 cost = set ? iv_ca_cost (set) : infinite_cost;
6955
6956 if (dump_file && (dump_flags & TDF_DETAILS))
6957 {
6958 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6959 origcost.cost, origcost.complexity);
6960 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6961 cost.cost, cost.complexity);
6962 }
6963
6964 /* Choose the one with the best cost. */
6965 if (compare_costs (origcost, cost) <= 0)
6966 {
6967 if (set)
6968 iv_ca_free (&set);
6969 set = origset;
6970 }
6971 else if (origset)
6972 iv_ca_free (&origset);
6973
6974 for (i = 0; i < n_iv_uses (data); i++)
6975 {
6976 use = iv_use (data, i);
6977 use->selected = iv_ca_cand_for_use (set, use)->cand;
6978 }
6979
6980 return set;
6981 }
6982
6983 /* Creates a new induction variable corresponding to CAND. */
6984
6985 static void
6986 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6987 {
6988 gimple_stmt_iterator incr_pos;
6989 tree base;
6990 bool after = false;
6991
6992 if (!cand->iv)
6993 return;
6994
6995 switch (cand->pos)
6996 {
6997 case IP_NORMAL:
6998 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6999 break;
7000
7001 case IP_END:
7002 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7003 after = true;
7004 break;
7005
7006 case IP_AFTER_USE:
7007 after = true;
7008 /* fall through */
7009 case IP_BEFORE_USE:
7010 incr_pos = gsi_for_stmt (cand->incremented_at);
7011 break;
7012
7013 case IP_ORIGINAL:
7014 /* Mark that the iv is preserved. */
7015 name_info (data, cand->var_before)->preserve_biv = true;
7016 name_info (data, cand->var_after)->preserve_biv = true;
7017
7018 /* Rewrite the increment so that it uses var_before directly. */
7019 find_interesting_uses_op (data, cand->var_after)->selected = cand;
7020 return;
7021 }
7022
7023 gimple_add_tmp_var (cand->var_before);
7024
7025 base = unshare_expr (cand->iv->base);
7026
7027 create_iv (base, unshare_expr (cand->iv->step),
7028 cand->var_before, data->current_loop,
7029 &incr_pos, after, &cand->var_before, &cand->var_after);
7030 }
7031
7032 /* Creates new induction variables described in SET. */
7033
7034 static void
7035 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7036 {
7037 unsigned i;
7038 struct iv_cand *cand;
7039 bitmap_iterator bi;
7040
7041 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7042 {
7043 cand = iv_cand (data, i);
7044 create_new_iv (data, cand);
7045 }
7046
7047 if (dump_file && (dump_flags & TDF_DETAILS))
7048 {
7049 fprintf (dump_file, "Selected IV set for loop %d",
7050 data->current_loop->num);
7051 if (data->loop_loc != UNKNOWN_LOCATION)
7052 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7053 LOCATION_LINE (data->loop_loc));
7054 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7055 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7056 {
7057 cand = iv_cand (data, i);
7058 dump_cand (dump_file, cand);
7059 }
7060 fprintf (dump_file, "\n");
7061 }
7062 }
7063
7064 /* Rewrites USE (definition of iv used in a nonlinear expression)
7065 using candidate CAND. */
7066
7067 static void
7068 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7069 struct iv_use *use, struct iv_cand *cand)
7070 {
7071 tree comp;
7072 tree op, tgt;
7073 gassign *ass;
7074 gimple_stmt_iterator bsi;
7075
7076 /* An important special case -- if we are asked to express value of
7077 the original iv by itself, just exit; there is no need to
7078 introduce a new computation (that might also need casting the
7079 variable to unsigned and back). */
7080 if (cand->pos == IP_ORIGINAL
7081 && cand->incremented_at == use->stmt)
7082 {
7083 enum tree_code stmt_code;
7084
7085 gcc_assert (is_gimple_assign (use->stmt));
7086 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7087
7088 /* Check whether we may leave the computation unchanged.
7089 This is the case only if it does not rely on other
7090 computations in the loop -- otherwise, the computation
7091 we rely upon may be removed in remove_unused_ivs,
7092 thus leading to ICE. */
7093 stmt_code = gimple_assign_rhs_code (use->stmt);
7094 if (stmt_code == PLUS_EXPR
7095 || stmt_code == MINUS_EXPR
7096 || stmt_code == POINTER_PLUS_EXPR)
7097 {
7098 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7099 op = gimple_assign_rhs2 (use->stmt);
7100 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7101 op = gimple_assign_rhs1 (use->stmt);
7102 else
7103 op = NULL_TREE;
7104 }
7105 else
7106 op = NULL_TREE;
7107
7108 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7109 return;
7110 }
7111
7112 comp = get_computation (data->current_loop, use, cand);
7113 gcc_assert (comp != NULL_TREE);
7114
7115 switch (gimple_code (use->stmt))
7116 {
7117 case GIMPLE_PHI:
7118 tgt = PHI_RESULT (use->stmt);
7119
7120 /* If we should keep the biv, do not replace it. */
7121 if (name_info (data, tgt)->preserve_biv)
7122 return;
7123
7124 bsi = gsi_after_labels (gimple_bb (use->stmt));
7125 break;
7126
7127 case GIMPLE_ASSIGN:
7128 tgt = gimple_assign_lhs (use->stmt);
7129 bsi = gsi_for_stmt (use->stmt);
7130 break;
7131
7132 default:
7133 gcc_unreachable ();
7134 }
7135
7136 if (!valid_gimple_rhs_p (comp)
7137 || (gimple_code (use->stmt) != GIMPLE_PHI
7138 /* We can't allow re-allocating the stmt as it might be pointed
7139 to still. */
7140 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7141 >= gimple_num_ops (gsi_stmt (bsi)))))
7142 {
7143 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7144 true, GSI_SAME_STMT);
7145 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7146 {
7147 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7148 /* As this isn't a plain copy we have to reset alignment
7149 information. */
7150 if (SSA_NAME_PTR_INFO (comp))
7151 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7152 }
7153 }
7154
7155 if (gimple_code (use->stmt) == GIMPLE_PHI)
7156 {
7157 ass = gimple_build_assign (tgt, comp);
7158 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7159
7160 bsi = gsi_for_stmt (use->stmt);
7161 remove_phi_node (&bsi, false);
7162 }
7163 else
7164 {
7165 gimple_assign_set_rhs_from_tree (&bsi, comp);
7166 use->stmt = gsi_stmt (bsi);
7167 }
7168 }
7169
7170 /* Performs a peephole optimization to reorder the iv update statement with
7171 a mem ref to enable instruction combining in later phases. The mem ref uses
7172 the iv value before the update, so the reordering transformation requires
7173 adjustment of the offset. CAND is the selected IV_CAND.
7174
7175 Example:
7176
7177 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7178 iv2 = iv1 + 1;
7179
7180 if (t < val) (1)
7181 goto L;
7182 goto Head;
7183
7184
7185 directly propagating t over to (1) will introduce overlapping live range
7186 thus increase register pressure. This peephole transform it into:
7187
7188
7189 iv2 = iv1 + 1;
7190 t = MEM_REF (base, iv2, 8, 8);
7191 if (t < val)
7192 goto L;
7193 goto Head;
7194 */
7195
7196 static void
7197 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7198 {
7199 tree var_after;
7200 gimple *iv_update, *stmt;
7201 basic_block bb;
7202 gimple_stmt_iterator gsi, gsi_iv;
7203
7204 if (cand->pos != IP_NORMAL)
7205 return;
7206
7207 var_after = cand->var_after;
7208 iv_update = SSA_NAME_DEF_STMT (var_after);
7209
7210 bb = gimple_bb (iv_update);
7211 gsi = gsi_last_nondebug_bb (bb);
7212 stmt = gsi_stmt (gsi);
7213
7214 /* Only handle conditional statement for now. */
7215 if (gimple_code (stmt) != GIMPLE_COND)
7216 return;
7217
7218 gsi_prev_nondebug (&gsi);
7219 stmt = gsi_stmt (gsi);
7220 if (stmt != iv_update)
7221 return;
7222
7223 gsi_prev_nondebug (&gsi);
7224 if (gsi_end_p (gsi))
7225 return;
7226
7227 stmt = gsi_stmt (gsi);
7228 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7229 return;
7230
7231 if (stmt != use->stmt)
7232 return;
7233
7234 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7235 return;
7236
7237 if (dump_file && (dump_flags & TDF_DETAILS))
7238 {
7239 fprintf (dump_file, "Reordering \n");
7240 print_gimple_stmt (dump_file, iv_update, 0, 0);
7241 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7242 fprintf (dump_file, "\n");
7243 }
7244
7245 gsi = gsi_for_stmt (use->stmt);
7246 gsi_iv = gsi_for_stmt (iv_update);
7247 gsi_move_before (&gsi_iv, &gsi);
7248
7249 cand->pos = IP_BEFORE_USE;
7250 cand->incremented_at = use->stmt;
7251 }
7252
7253 /* Rewrites USE (address that is an iv) using candidate CAND. */
7254
7255 static void
7256 rewrite_use_address_1 (struct ivopts_data *data,
7257 struct iv_use *use, struct iv_cand *cand)
7258 {
7259 aff_tree aff;
7260 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7261 tree base_hint = NULL_TREE;
7262 tree ref, iv;
7263 bool ok;
7264
7265 adjust_iv_update_pos (cand, use);
7266 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7267 gcc_assert (ok);
7268 unshare_aff_combination (&aff);
7269
7270 /* To avoid undefined overflow problems, all IV candidates use unsigned
7271 integer types. The drawback is that this makes it impossible for
7272 create_mem_ref to distinguish an IV that is based on a memory object
7273 from one that represents simply an offset.
7274
7275 To work around this problem, we pass a hint to create_mem_ref that
7276 indicates which variable (if any) in aff is an IV based on a memory
7277 object. Note that we only consider the candidate. If this is not
7278 based on an object, the base of the reference is in some subexpression
7279 of the use -- but these will use pointer types, so they are recognized
7280 by the create_mem_ref heuristics anyway. */
7281 if (cand->iv->base_object)
7282 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7283
7284 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7285 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7286 reference_alias_ptr_type (*use->op_p),
7287 iv, base_hint, data->speed);
7288 copy_ref_info (ref, *use->op_p);
7289 *use->op_p = ref;
7290 }
7291
7292 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7293 first use of a group, rewrites sub uses in the group too. */
7294
7295 static void
7296 rewrite_use_address (struct ivopts_data *data,
7297 struct iv_use *use, struct iv_cand *cand)
7298 {
7299 struct iv_use *next;
7300
7301 gcc_assert (use->sub_id == 0);
7302 rewrite_use_address_1 (data, use, cand);
7303 update_stmt (use->stmt);
7304
7305 for (next = use->next; next != NULL; next = next->next)
7306 {
7307 rewrite_use_address_1 (data, next, cand);
7308 update_stmt (next->stmt);
7309 }
7310
7311 return;
7312 }
7313
7314 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7315 candidate CAND. */
7316
7317 static void
7318 rewrite_use_compare (struct ivopts_data *data,
7319 struct iv_use *use, struct iv_cand *cand)
7320 {
7321 tree comp, *var_p, op, bound;
7322 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7323 enum tree_code compare;
7324 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7325 bool ok;
7326
7327 bound = cp->value;
7328 if (bound)
7329 {
7330 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7331 tree var_type = TREE_TYPE (var);
7332 gimple_seq stmts;
7333
7334 if (dump_file && (dump_flags & TDF_DETAILS))
7335 {
7336 fprintf (dump_file, "Replacing exit test: ");
7337 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7338 }
7339 compare = cp->comp;
7340 bound = unshare_expr (fold_convert (var_type, bound));
7341 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7342 if (stmts)
7343 gsi_insert_seq_on_edge_immediate (
7344 loop_preheader_edge (data->current_loop),
7345 stmts);
7346
7347 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7348 gimple_cond_set_lhs (cond_stmt, var);
7349 gimple_cond_set_code (cond_stmt, compare);
7350 gimple_cond_set_rhs (cond_stmt, op);
7351 return;
7352 }
7353
7354 /* The induction variable elimination failed; just express the original
7355 giv. */
7356 comp = get_computation (data->current_loop, use, cand);
7357 gcc_assert (comp != NULL_TREE);
7358
7359 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7360 gcc_assert (ok);
7361
7362 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7363 true, GSI_SAME_STMT);
7364 }
7365
7366 /* Rewrites USE using candidate CAND. */
7367
7368 static void
7369 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7370 {
7371 switch (use->type)
7372 {
7373 case USE_NONLINEAR_EXPR:
7374 rewrite_use_nonlinear_expr (data, use, cand);
7375 break;
7376
7377 case USE_ADDRESS:
7378 rewrite_use_address (data, use, cand);
7379 break;
7380
7381 case USE_COMPARE:
7382 rewrite_use_compare (data, use, cand);
7383 break;
7384
7385 default:
7386 gcc_unreachable ();
7387 }
7388
7389 update_stmt (use->stmt);
7390 }
7391
7392 /* Rewrite the uses using the selected induction variables. */
7393
7394 static void
7395 rewrite_uses (struct ivopts_data *data)
7396 {
7397 unsigned i;
7398 struct iv_cand *cand;
7399 struct iv_use *use;
7400
7401 for (i = 0; i < n_iv_uses (data); i++)
7402 {
7403 use = iv_use (data, i);
7404 cand = use->selected;
7405 gcc_assert (cand);
7406
7407 rewrite_use (data, use, cand);
7408 }
7409 }
7410
7411 /* Removes the ivs that are not used after rewriting. */
7412
7413 static void
7414 remove_unused_ivs (struct ivopts_data *data)
7415 {
7416 unsigned j;
7417 bitmap_iterator bi;
7418 bitmap toremove = BITMAP_ALLOC (NULL);
7419
7420 /* Figure out an order in which to release SSA DEFs so that we don't
7421 release something that we'd have to propagate into a debug stmt
7422 afterwards. */
7423 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7424 {
7425 struct version_info *info;
7426
7427 info = ver_info (data, j);
7428 if (info->iv
7429 && !integer_zerop (info->iv->step)
7430 && !info->inv_id
7431 && !info->iv->have_use_for
7432 && !info->preserve_biv)
7433 {
7434 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7435
7436 tree def = info->iv->ssa_name;
7437
7438 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7439 {
7440 imm_use_iterator imm_iter;
7441 use_operand_p use_p;
7442 gimple *stmt;
7443 int count = 0;
7444
7445 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7446 {
7447 if (!gimple_debug_bind_p (stmt))
7448 continue;
7449
7450 /* We just want to determine whether to do nothing
7451 (count == 0), to substitute the computed
7452 expression into a single use of the SSA DEF by
7453 itself (count == 1), or to use a debug temp
7454 because the SSA DEF is used multiple times or as
7455 part of a larger expression (count > 1). */
7456 count++;
7457 if (gimple_debug_bind_get_value (stmt) != def)
7458 count++;
7459
7460 if (count > 1)
7461 BREAK_FROM_IMM_USE_STMT (imm_iter);
7462 }
7463
7464 if (!count)
7465 continue;
7466
7467 struct iv_use dummy_use;
7468 struct iv_cand *best_cand = NULL, *cand;
7469 unsigned i, best_pref = 0, cand_pref;
7470
7471 memset (&dummy_use, 0, sizeof (dummy_use));
7472 dummy_use.iv = info->iv;
7473 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7474 {
7475 cand = iv_use (data, i)->selected;
7476 if (cand == best_cand)
7477 continue;
7478 cand_pref = operand_equal_p (cand->iv->step,
7479 info->iv->step, 0)
7480 ? 4 : 0;
7481 cand_pref
7482 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7483 == TYPE_MODE (TREE_TYPE (info->iv->base))
7484 ? 2 : 0;
7485 cand_pref
7486 += TREE_CODE (cand->iv->base) == INTEGER_CST
7487 ? 1 : 0;
7488 if (best_cand == NULL || best_pref < cand_pref)
7489 {
7490 best_cand = cand;
7491 best_pref = cand_pref;
7492 }
7493 }
7494
7495 if (!best_cand)
7496 continue;
7497
7498 tree comp = get_computation_at (data->current_loop,
7499 &dummy_use, best_cand,
7500 SSA_NAME_DEF_STMT (def));
7501 if (!comp)
7502 continue;
7503
7504 if (count > 1)
7505 {
7506 tree vexpr = make_node (DEBUG_EXPR_DECL);
7507 DECL_ARTIFICIAL (vexpr) = 1;
7508 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7509 if (SSA_NAME_VAR (def))
7510 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7511 else
7512 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7513 gdebug *def_temp
7514 = gimple_build_debug_bind (vexpr, comp, NULL);
7515 gimple_stmt_iterator gsi;
7516
7517 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7518 gsi = gsi_after_labels (gimple_bb
7519 (SSA_NAME_DEF_STMT (def)));
7520 else
7521 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7522
7523 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7524 comp = vexpr;
7525 }
7526
7527 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7528 {
7529 if (!gimple_debug_bind_p (stmt))
7530 continue;
7531
7532 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7533 SET_USE (use_p, comp);
7534
7535 update_stmt (stmt);
7536 }
7537 }
7538 }
7539 }
7540
7541 release_defs_bitset (toremove);
7542
7543 BITMAP_FREE (toremove);
7544 }
7545
7546 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7547 for hash_map::traverse. */
7548
7549 bool
7550 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7551 {
7552 free (value);
7553 return true;
7554 }
7555
7556 /* Frees data allocated by the optimization of a single loop. */
7557
7558 static void
7559 free_loop_data (struct ivopts_data *data)
7560 {
7561 unsigned i, j;
7562 bitmap_iterator bi;
7563 tree obj;
7564
7565 if (data->niters)
7566 {
7567 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7568 delete data->niters;
7569 data->niters = NULL;
7570 }
7571
7572 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7573 {
7574 struct version_info *info;
7575
7576 info = ver_info (data, i);
7577 info->iv = NULL;
7578 info->has_nonlin_use = false;
7579 info->preserve_biv = false;
7580 info->inv_id = 0;
7581 }
7582 bitmap_clear (data->relevant);
7583 bitmap_clear (data->important_candidates);
7584
7585 for (i = 0; i < n_iv_uses (data); i++)
7586 {
7587 struct iv_use *use = iv_use (data, i);
7588 struct iv_use *pre = use, *sub = use->next;
7589
7590 while (sub)
7591 {
7592 gcc_assert (sub->related_cands == NULL);
7593 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7594
7595 pre = sub;
7596 sub = sub->next;
7597 free (pre);
7598 }
7599
7600 BITMAP_FREE (use->related_cands);
7601 for (j = 0; j < use->n_map_members; j++)
7602 if (use->cost_map[j].depends_on)
7603 BITMAP_FREE (use->cost_map[j].depends_on);
7604 free (use->cost_map);
7605 free (use);
7606 }
7607 data->iv_uses.truncate (0);
7608
7609 for (i = 0; i < n_iv_cands (data); i++)
7610 {
7611 struct iv_cand *cand = iv_cand (data, i);
7612
7613 if (cand->depends_on)
7614 BITMAP_FREE (cand->depends_on);
7615 free (cand);
7616 }
7617 data->iv_candidates.truncate (0);
7618
7619 if (data->version_info_size < num_ssa_names)
7620 {
7621 data->version_info_size = 2 * num_ssa_names;
7622 free (data->version_info);
7623 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7624 }
7625
7626 data->max_inv_id = 0;
7627
7628 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7629 SET_DECL_RTL (obj, NULL_RTX);
7630
7631 decl_rtl_to_reset.truncate (0);
7632
7633 data->inv_expr_tab->empty ();
7634 data->inv_expr_id = 0;
7635
7636 data->iv_common_cand_tab->empty ();
7637 data->iv_common_cands.truncate (0);
7638 }
7639
7640 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7641 loop tree. */
7642
7643 static void
7644 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7645 {
7646 free_loop_data (data);
7647 free (data->version_info);
7648 BITMAP_FREE (data->relevant);
7649 BITMAP_FREE (data->important_candidates);
7650
7651 decl_rtl_to_reset.release ();
7652 data->iv_uses.release ();
7653 data->iv_candidates.release ();
7654 delete data->inv_expr_tab;
7655 data->inv_expr_tab = NULL;
7656 free_affine_expand_cache (&data->name_expansion_cache);
7657 delete data->iv_common_cand_tab;
7658 data->iv_common_cand_tab = NULL;
7659 data->iv_common_cands.release ();
7660 obstack_free (&data->iv_obstack, NULL);
7661 }
7662
7663 /* Returns true if the loop body BODY includes any function calls. */
7664
7665 static bool
7666 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7667 {
7668 gimple_stmt_iterator gsi;
7669 unsigned i;
7670
7671 for (i = 0; i < num_nodes; i++)
7672 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7673 {
7674 gimple *stmt = gsi_stmt (gsi);
7675 if (is_gimple_call (stmt)
7676 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7677 return true;
7678 }
7679 return false;
7680 }
7681
7682 /* Optimizes the LOOP. Returns true if anything changed. */
7683
7684 static bool
7685 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7686 {
7687 bool changed = false;
7688 struct iv_ca *iv_ca;
7689 edge exit = single_dom_exit (loop);
7690 basic_block *body;
7691
7692 gcc_assert (!data->niters);
7693 data->current_loop = loop;
7694 data->loop_loc = find_loop_location (loop);
7695 data->speed = optimize_loop_for_speed_p (loop);
7696
7697 if (dump_file && (dump_flags & TDF_DETAILS))
7698 {
7699 fprintf (dump_file, "Processing loop %d", loop->num);
7700 if (data->loop_loc != UNKNOWN_LOCATION)
7701 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7702 LOCATION_LINE (data->loop_loc));
7703 fprintf (dump_file, "\n");
7704
7705 if (exit)
7706 {
7707 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7708 exit->src->index, exit->dest->index);
7709 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7710 fprintf (dump_file, "\n");
7711 }
7712
7713 fprintf (dump_file, "\n");
7714 }
7715
7716 body = get_loop_body (loop);
7717 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7718 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7719 free (body);
7720
7721 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7722
7723 /* For each ssa name determines whether it behaves as an induction variable
7724 in some loop. */
7725 if (!find_induction_variables (data))
7726 goto finish;
7727
7728 /* Finds interesting uses (item 1). */
7729 find_interesting_uses (data);
7730 group_address_uses (data);
7731 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7732 goto finish;
7733
7734 /* Finds candidates for the induction variables (item 2). */
7735 find_iv_candidates (data);
7736
7737 /* Calculates the costs (item 3, part 1). */
7738 determine_iv_costs (data);
7739 determine_use_iv_costs (data);
7740 determine_set_costs (data);
7741
7742 /* Find the optimal set of induction variables (item 3, part 2). */
7743 iv_ca = find_optimal_iv_set (data);
7744 if (!iv_ca)
7745 goto finish;
7746 changed = true;
7747
7748 /* Create the new induction variables (item 4, part 1). */
7749 create_new_ivs (data, iv_ca);
7750 iv_ca_free (&iv_ca);
7751
7752 /* Rewrite the uses (item 4, part 2). */
7753 rewrite_uses (data);
7754
7755 /* Remove the ivs that are unused after rewriting. */
7756 remove_unused_ivs (data);
7757
7758 /* We have changed the structure of induction variables; it might happen
7759 that definitions in the scev database refer to some of them that were
7760 eliminated. */
7761 scev_reset ();
7762
7763 finish:
7764 free_loop_data (data);
7765
7766 return changed;
7767 }
7768
7769 /* Main entry point. Optimizes induction variables in loops. */
7770
7771 void
7772 tree_ssa_iv_optimize (void)
7773 {
7774 struct loop *loop;
7775 struct ivopts_data data;
7776
7777 tree_ssa_iv_optimize_init (&data);
7778
7779 /* Optimize the loops starting with the innermost ones. */
7780 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7781 {
7782 if (dump_file && (dump_flags & TDF_DETAILS))
7783 flow_loop_dump (loop, dump_file, NULL, 1);
7784
7785 tree_ssa_iv_optimize_loop (&data, loop);
7786 }
7787
7788 tree_ssa_iv_optimize_finalize (&data);
7789 }