tree-ssa-loop-ivopts.c (estimated_stmt_executions_int): Use likely_max_stmt_execution...
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
110
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
114
115 /* The infinite cost. */
116 #define INFTY 10000000
117
118 #define AVG_LOOP_NITER(LOOP) 5
119
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
123
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
126 {
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
129 {
130 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
171 };
172
173 /* Cost of a computation. */
174 struct comp_cost
175 {
176 comp_cost (): cost (0), complexity (0), scratch (0)
177 {}
178
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
181 {}
182
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
185
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
188
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
191
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
194
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
197
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
200
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
203
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
206
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
209
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
212
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
215
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
218
219 int cost; /* The runtime cost. */
220 unsigned complexity; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch; /* Scratch used during cost computation. */
225 };
226
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
229
230 bool
231 comp_cost::infinite_cost_p ()
232 {
233 return cost == INFTY;
234 }
235
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
238 {
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
241
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
244
245 return cost1;
246 }
247
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
250 {
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
253
254 gcc_assert (!cost2.infinite_cost_p ());
255
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
258
259 return cost1;
260 }
261
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
264 {
265 *this = *this + cost;
266 return *this;
267 }
268
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
271 {
272 if (infinite_cost_p ())
273 return *this;
274
275 this->cost += c;
276
277 return *this;
278 }
279
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
282 {
283 if (infinite_cost_p ())
284 return *this;
285
286 this->cost -= c;
287
288 return *this;
289 }
290
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
293 {
294 if (infinite_cost_p ())
295 return *this;
296
297 this->cost /= c;
298
299 return *this;
300 }
301
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
304 {
305 if (infinite_cost_p ())
306 return *this;
307
308 this->cost *= c;
309
310 return *this;
311 }
312
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
315 {
316 *this = *this - cost;
317 return *this;
318 }
319
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
322 {
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
325
326 return cost1.cost < cost2.cost;
327 }
328
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
331 {
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
334 }
335
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
338 {
339 return cost1 < cost2 || cost1 == cost2;
340 }
341
342 struct iv_inv_expr_ent;
343
344 /* The candidate - cost pair. */
345 struct cost_pair
346 {
347 struct iv_cand *cand; /* The candidate. */
348 comp_cost cost; /* The cost. */
349 bitmap depends_on; /* The list of invariants that have to be
350 preserved. */
351 tree value; /* For final value elimination, the expression for
352 the final value of the iv. For iv elimination,
353 the new bound to compare with. */
354 enum tree_code comp; /* For iv elimination, the comparison. */
355 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
356 };
357
358 /* Use. */
359 struct iv_use
360 {
361 unsigned id; /* The id of the use. */
362 unsigned group_id; /* The group id the use belongs to. */
363 enum use_type type; /* Type of the use. */
364 struct iv *iv; /* The induction variable it is based on. */
365 gimple *stmt; /* Statement in that it occurs. */
366 tree *op_p; /* The place where it occurs. */
367
368 tree addr_base; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset;
370 /* Const offset stripped from base address. */
371 };
372
373 /* Group of uses. */
374 struct iv_group
375 {
376 /* The id of the group. */
377 unsigned id;
378 /* Uses of the group are of the same type. */
379 enum use_type type;
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair *cost_map;
386 /* The selected candidate for the group. */
387 struct iv_cand *selected;
388 /* Uses in the group. */
389 vec<struct iv_use *> vuses;
390 };
391
392 /* The position where the iv is computed. */
393 enum iv_position
394 {
395 IP_NORMAL, /* At the end, just before the exit condition. */
396 IP_END, /* At the end of the latch block. */
397 IP_BEFORE_USE, /* Immediately before a specific use. */
398 IP_AFTER_USE, /* Immediately after a specific use. */
399 IP_ORIGINAL /* The original biv. */
400 };
401
402 /* The induction variable candidate. */
403 struct iv_cand
404 {
405 unsigned id; /* The number of the candidate. */
406 bool important; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
409 gimple *incremented_at;/* For original biv, the statement where it is
410 incremented. */
411 tree var_before; /* The variable used for it before increment. */
412 tree var_after; /* The variable used for it after increment. */
413 struct iv *iv; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost; /* Cost of the candidate. */
418 unsigned cost_step; /* Cost of the candidate's increment operation. */
419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap depends_on; /* The list of invariants that are used in step of the
422 biv. */
423 struct iv *orig_iv; /* The original iv if this cand is added from biv with
424 smaller type. */
425 };
426
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
429 {
430 tree base;
431 tree step;
432 /* IV uses from which this common candidate is derived. */
433 auto_vec<struct iv_use *> uses;
434 hashval_t hash;
435 };
436
437 /* Hashtable helpers. */
438
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
440 {
441 static inline hashval_t hash (const iv_common_cand *);
442 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
443 };
444
445 /* Hash function for possible common candidates. */
446
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
449 {
450 return ccand->hash;
451 }
452
453 /* Hash table equality function for common candidates. */
454
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 const iv_common_cand *ccand2)
458 {
459 return (ccand1->hash == ccand2->hash
460 && operand_equal_p (ccand1->base, ccand2->base, 0)
461 && operand_equal_p (ccand1->step, ccand2->step, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
464 }
465
466 /* Loop invariant expression hashtable entry. */
467
468 struct iv_inv_expr_ent
469 {
470 /* Tree expression of the entry. */
471 tree expr;
472 /* Unique indentifier. */
473 int id;
474 /* Hash value. */
475 hashval_t hash;
476 };
477
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
479
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
482 {
483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
485
486 unsigned id1 = (*e1)->id;
487 unsigned id2 = (*e2)->id;
488
489 if (id1 < id2)
490 return -1;
491 else if (id1 > id2)
492 return 1;
493 else
494 return 0;
495 }
496
497 /* Hashtable helpers. */
498
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
500 {
501 static inline hashval_t hash (const iv_inv_expr_ent *);
502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
503 };
504
505 /* Hash function for loop invariant expressions. */
506
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
509 {
510 return expr->hash;
511 }
512
513 /* Hash table equality function for expressions. */
514
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 const iv_inv_expr_ent *expr2)
518 {
519 return expr1->hash == expr2->hash
520 && operand_equal_p (expr1->expr, expr2->expr, 0);
521 }
522
523 struct ivopts_data
524 {
525 /* The currently optimized loop. */
526 struct loop *current_loop;
527 source_location loop_loc;
528
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map<edge, tree_niter_desc *> *niters;
531
532 /* Number of registers used in it. */
533 unsigned regs_used;
534
535 /* The size of version_info array allocated. */
536 unsigned version_info_size;
537
538 /* The array of information for the ssa names. */
539 struct version_info *version_info;
540
541 /* The hashtable of loop invariant expressions created
542 by ivopt. */
543 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
544
545 /* Loop invariant expression id. */
546 int max_inv_expr_id;
547
548 /* The bitmap of indices in version_info whose value was changed. */
549 bitmap relevant;
550
551 /* The uses of induction variables. */
552 vec<iv_group *> vgroups;
553
554 /* The candidates. */
555 vec<iv_cand *> vcands;
556
557 /* A bitmap of important candidates. */
558 bitmap important_candidates;
559
560 /* Cache used by tree_to_aff_combination_expand. */
561 hash_map<tree, name_expansion *> *name_expansion_cache;
562
563 /* The hashtable of common candidates derived from iv uses. */
564 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
565
566 /* The common candidates. */
567 vec<iv_common_cand *> iv_common_cands;
568
569 /* The maximum invariant id. */
570 unsigned max_inv_id;
571
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr;
574
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack;
577
578 /* Whether to consider just related and important candidates when replacing a
579 use. */
580 bool consider_all_candidates;
581
582 /* Are we optimizing for speed? */
583 bool speed;
584
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call;
587
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p;
590 };
591
592 /* An assignment of iv candidates to uses. */
593
594 struct iv_ca
595 {
596 /* The number of uses covered by the assignment. */
597 unsigned upto;
598
599 /* Number of uses that cannot be expressed by the candidates in the set. */
600 unsigned bad_groups;
601
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair **cand_for_group;
604
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses;
607
608 /* The candidates used. */
609 bitmap cands;
610
611 /* The number of candidates in the set. */
612 unsigned n_cands;
613
614 /* Total number of registers needed. */
615 unsigned n_regs;
616
617 /* Total cost of expressing uses. */
618 comp_cost cand_use_cost;
619
620 /* Total cost of candidates. */
621 unsigned cand_cost;
622
623 /* Number of times each invariant is used. */
624 unsigned *n_invariant_uses;
625
626 /* Hash set with used invariant expression. */
627 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
628
629 /* Total cost of the assignment. */
630 comp_cost cost;
631 };
632
633 /* Difference of two iv candidate assignments. */
634
635 struct iv_ca_delta
636 {
637 /* Changed group. */
638 struct iv_group *group;
639
640 /* An old assignment (for rollback purposes). */
641 struct cost_pair *old_cp;
642
643 /* A new assignment. */
644 struct cost_pair *new_cp;
645
646 /* Next change in the list. */
647 struct iv_ca_delta *next;
648 };
649
650 /* Bound on number of candidates below that all candidates are considered. */
651
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
654
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656 optimizing such a loop would help, and it would take ages). */
657
658 #define MAX_CONSIDERED_GROUPS \
659 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
660
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662 ivs from the set always. */
663
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
666
667 /* The list of trees for that the decl_rtl field must be reset is stored
668 here. */
669
670 static vec<tree> decl_rtl_to_reset;
671
672 static comp_cost force_expr_to_var_cost (tree, bool);
673
674 /* The single loop exit if it dominates the latch, NULL otherwise. */
675
676 edge
677 single_dom_exit (struct loop *loop)
678 {
679 edge exit = single_exit (loop);
680
681 if (!exit)
682 return NULL;
683
684 if (!just_once_each_iteration_p (loop, exit->src))
685 return NULL;
686
687 return exit;
688 }
689
690 /* Dumps information about the induction variable IV to FILE. Don't dump
691 variable's name if DUMP_NAME is FALSE. The information is dumped with
692 preceding spaces indicated by INDENT_LEVEL. */
693
694 void
695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
696 {
697 const char *p;
698 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
699
700 if (indent_level > 4)
701 indent_level = 4;
702 p = spaces + 8 - (indent_level << 1);
703
704 fprintf (file, "%sIV struct:\n", p);
705 if (iv->ssa_name && dump_name)
706 {
707 fprintf (file, "%s SSA_NAME:\t", p);
708 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
709 fprintf (file, "\n");
710 }
711
712 fprintf (file, "%s Type:\t", p);
713 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
714 fprintf (file, "\n");
715
716 fprintf (file, "%s Base:\t", p);
717 print_generic_expr (file, iv->base, TDF_SLIM);
718 fprintf (file, "\n");
719
720 fprintf (file, "%s Step:\t", p);
721 print_generic_expr (file, iv->step, TDF_SLIM);
722 fprintf (file, "\n");
723
724 if (iv->base_object)
725 {
726 fprintf (file, "%s Object:\t", p);
727 print_generic_expr (file, iv->base_object, TDF_SLIM);
728 fprintf (file, "\n");
729 }
730
731 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
732
733 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
734 p, iv->no_overflow ? "No-overflow" : "Overflow");
735 }
736
737 /* Dumps information about the USE to FILE. */
738
739 void
740 dump_use (FILE *file, struct iv_use *use)
741 {
742 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
743 fprintf (file, " At stmt:\t");
744 print_gimple_stmt (file, use->stmt, 0, 0);
745 fprintf (file, " At pos:\t");
746 if (use->op_p)
747 print_generic_expr (file, *use->op_p, TDF_SLIM);
748 fprintf (file, "\n");
749 dump_iv (file, use->iv, false, 2);
750 }
751
752 /* Dumps information about the uses to FILE. */
753
754 void
755 dump_groups (FILE *file, struct ivopts_data *data)
756 {
757 unsigned i, j;
758 struct iv_group *group;
759
760 for (i = 0; i < data->vgroups.length (); i++)
761 {
762 group = data->vgroups[i];
763 fprintf (file, "Group %d:\n", group->id);
764 if (group->type == USE_NONLINEAR_EXPR)
765 fprintf (file, " Type:\tGENERIC\n");
766 else if (group->type == USE_ADDRESS)
767 fprintf (file, " Type:\tADDRESS\n");
768 else
769 {
770 gcc_assert (group->type == USE_COMPARE);
771 fprintf (file, " Type:\tCOMPARE\n");
772 }
773 for (j = 0; j < group->vuses.length (); j++)
774 dump_use (file, group->vuses[j]);
775 }
776 }
777
778 /* Dumps information about induction variable candidate CAND to FILE. */
779
780 void
781 dump_cand (FILE *file, struct iv_cand *cand)
782 {
783 struct iv *iv = cand->iv;
784
785 fprintf (file, "Candidate %d:\n", cand->id);
786 if (cand->depends_on)
787 {
788 fprintf (file, " Depend on: ");
789 dump_bitmap (file, cand->depends_on);
790 }
791
792 if (cand->var_before)
793 {
794 fprintf (file, " Var befor: ");
795 print_generic_expr (file, cand->var_before, TDF_SLIM);
796 fprintf (file, "\n");
797 }
798 if (cand->var_after)
799 {
800 fprintf (file, " Var after: ");
801 print_generic_expr (file, cand->var_after, TDF_SLIM);
802 fprintf (file, "\n");
803 }
804
805 switch (cand->pos)
806 {
807 case IP_NORMAL:
808 fprintf (file, " Incr POS: before exit test\n");
809 break;
810
811 case IP_BEFORE_USE:
812 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
813 break;
814
815 case IP_AFTER_USE:
816 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
817 break;
818
819 case IP_END:
820 fprintf (file, " Incr POS: at end\n");
821 break;
822
823 case IP_ORIGINAL:
824 fprintf (file, " Incr POS: orig biv\n");
825 break;
826 }
827
828 dump_iv (file, iv, false, 1);
829 }
830
831 /* Returns the info for ssa version VER. */
832
833 static inline struct version_info *
834 ver_info (struct ivopts_data *data, unsigned ver)
835 {
836 return data->version_info + ver;
837 }
838
839 /* Returns the info for ssa name NAME. */
840
841 static inline struct version_info *
842 name_info (struct ivopts_data *data, tree name)
843 {
844 return ver_info (data, SSA_NAME_VERSION (name));
845 }
846
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
848 emitted in LOOP. */
849
850 static bool
851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
852 {
853 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
854
855 gcc_assert (bb);
856
857 if (sbb == loop->latch)
858 return true;
859
860 if (sbb != bb)
861 return false;
862
863 return stmt == last_stmt (bb);
864 }
865
866 /* Returns true if STMT if after the place where the original induction
867 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
868 if the positions are identical. */
869
870 static bool
871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
872 {
873 basic_block cand_bb = gimple_bb (cand->incremented_at);
874 basic_block stmt_bb = gimple_bb (stmt);
875
876 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
877 return false;
878
879 if (stmt_bb != cand_bb)
880 return true;
881
882 if (true_if_equal
883 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
884 return true;
885 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
886 }
887
888 /* Returns true if STMT if after the place where the induction variable
889 CAND is incremented in LOOP. */
890
891 static bool
892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
893 {
894 switch (cand->pos)
895 {
896 case IP_END:
897 return false;
898
899 case IP_NORMAL:
900 return stmt_after_ip_normal_pos (loop, stmt);
901
902 case IP_ORIGINAL:
903 case IP_AFTER_USE:
904 return stmt_after_inc_pos (cand, stmt, false);
905
906 case IP_BEFORE_USE:
907 return stmt_after_inc_pos (cand, stmt, true);
908
909 default:
910 gcc_unreachable ();
911 }
912 }
913
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
915
916 static bool
917 abnormal_ssa_name_p (tree exp)
918 {
919 if (!exp)
920 return false;
921
922 if (TREE_CODE (exp) != SSA_NAME)
923 return false;
924
925 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
926 }
927
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929 abnormal phi node. Callback for for_each_index. */
930
931 static bool
932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
933 void *data ATTRIBUTE_UNUSED)
934 {
935 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
936 {
937 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
938 return false;
939 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
940 return false;
941 }
942
943 return !abnormal_ssa_name_p (*index);
944 }
945
946 /* Returns true if EXPR contains a ssa name that occurs in an
947 abnormal phi node. */
948
949 bool
950 contains_abnormal_ssa_name_p (tree expr)
951 {
952 enum tree_code code;
953 enum tree_code_class codeclass;
954
955 if (!expr)
956 return false;
957
958 code = TREE_CODE (expr);
959 codeclass = TREE_CODE_CLASS (code);
960
961 if (code == SSA_NAME)
962 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
963
964 if (code == INTEGER_CST
965 || is_gimple_min_invariant (expr))
966 return false;
967
968 if (code == ADDR_EXPR)
969 return !for_each_index (&TREE_OPERAND (expr, 0),
970 idx_contains_abnormal_ssa_name_p,
971 NULL);
972
973 if (code == COND_EXPR)
974 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
975 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
977
978 switch (codeclass)
979 {
980 case tcc_binary:
981 case tcc_comparison:
982 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
983 return true;
984
985 /* Fallthru. */
986 case tcc_unary:
987 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
988 return true;
989
990 break;
991
992 default:
993 gcc_unreachable ();
994 }
995
996 return false;
997 }
998
999 /* Returns the structure describing number of iterations determined from
1000 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1001
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005 struct tree_niter_desc *desc;
1006 tree_niter_desc **slot;
1007
1008 if (!data->niters)
1009 {
1010 data->niters = new hash_map<edge, tree_niter_desc *>;
1011 slot = NULL;
1012 }
1013 else
1014 slot = data->niters->get (exit);
1015
1016 if (!slot)
1017 {
1018 /* Try to determine number of iterations. We cannot safely work with ssa
1019 names that appear in phi nodes on abnormal edges, so that we do not
1020 create overlapping life ranges for them (PR 27283). */
1021 desc = XNEW (struct tree_niter_desc);
1022 if (!number_of_iterations_exit (data->current_loop,
1023 exit, desc, true)
1024 || contains_abnormal_ssa_name_p (desc->niter))
1025 {
1026 XDELETE (desc);
1027 desc = NULL;
1028 }
1029 data->niters->put (exit, desc);
1030 }
1031 else
1032 desc = *slot;
1033
1034 return desc;
1035 }
1036
1037 /* Returns the structure describing number of iterations determined from
1038 single dominating exit of DATA->current_loop, or NULL if something
1039 goes wrong. */
1040
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044 edge exit = single_dom_exit (data->current_loop);
1045
1046 if (!exit)
1047 return NULL;
1048
1049 return niter_for_exit (data, exit);
1050 }
1051
1052 /* Initializes data structures used by the iv optimization pass, stored
1053 in DATA. */
1054
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058 data->version_info_size = 2 * num_ssa_names;
1059 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060 data->relevant = BITMAP_ALLOC (NULL);
1061 data->important_candidates = BITMAP_ALLOC (NULL);
1062 data->max_inv_id = 0;
1063 data->niters = NULL;
1064 data->vgroups.create (20);
1065 data->vcands.create (20);
1066 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067 data->max_inv_expr_id = 0;
1068 data->name_expansion_cache = NULL;
1069 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070 data->iv_common_cands.create (20);
1071 decl_rtl_to_reset.create (20);
1072 gcc_obstack_init (&data->iv_obstack);
1073 }
1074
1075 /* Returns a memory object to that EXPR points. In case we are able to
1076 determine that it does not point to any such object, NULL is returned. */
1077
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081 enum tree_code code = TREE_CODE (expr);
1082 tree base, obj;
1083
1084 /* If this is a pointer casted to any type, we need to determine
1085 the base object for the pointer; so handle conversions before
1086 throwing away non-pointer expressions. */
1087 if (CONVERT_EXPR_P (expr))
1088 return determine_base_object (TREE_OPERAND (expr, 0));
1089
1090 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091 return NULL_TREE;
1092
1093 switch (code)
1094 {
1095 case INTEGER_CST:
1096 return NULL_TREE;
1097
1098 case ADDR_EXPR:
1099 obj = TREE_OPERAND (expr, 0);
1100 base = get_base_address (obj);
1101
1102 if (!base)
1103 return expr;
1104
1105 if (TREE_CODE (base) == MEM_REF)
1106 return determine_base_object (TREE_OPERAND (base, 0));
1107
1108 return fold_convert (ptr_type_node,
1109 build_fold_addr_expr (base));
1110
1111 case POINTER_PLUS_EXPR:
1112 return determine_base_object (TREE_OPERAND (expr, 0));
1113
1114 case PLUS_EXPR:
1115 case MINUS_EXPR:
1116 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1117 gcc_unreachable ();
1118
1119 default:
1120 return fold_convert (ptr_type_node, expr);
1121 }
1122 }
1123
1124 /* Return true if address expression with non-DECL_P operand appears
1125 in EXPR. */
1126
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130 bool res = false;
1131
1132 STRIP_NOPS (expr);
1133 switch (TREE_CODE (expr))
1134 {
1135 case POINTER_PLUS_EXPR:
1136 case PLUS_EXPR:
1137 case MINUS_EXPR:
1138 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140 break;
1141
1142 case ADDR_EXPR:
1143 return (!DECL_P (TREE_OPERAND (expr, 0)));
1144
1145 default:
1146 return false;
1147 }
1148
1149 return res;
1150 }
1151
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1154
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157 bool no_overflow = false)
1158 {
1159 tree expr = base;
1160 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161 sizeof (struct iv));
1162 gcc_assert (step != NULL_TREE);
1163
1164 /* Lower address expression in base except ones with DECL_P as operand.
1165 By doing this:
1166 1) More accurate cost can be computed for address expressions;
1167 2) Duplicate candidates won't be created for bases in different
1168 forms, like &a[0] and &a. */
1169 STRIP_NOPS (expr);
1170 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171 || contain_complex_addr_expr (expr))
1172 {
1173 aff_tree comb;
1174 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176 }
1177
1178 iv->base = base;
1179 iv->base_object = determine_base_object (base);
1180 iv->step = step;
1181 iv->biv_p = false;
1182 iv->nonlin_use = NULL;
1183 iv->ssa_name = NULL_TREE;
1184 iv->no_overflow = no_overflow;
1185 iv->have_address_use = false;
1186
1187 return iv;
1188 }
1189
1190 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1191 doesn't overflow. */
1192
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195 bool no_overflow)
1196 {
1197 struct version_info *info = name_info (data, iv);
1198
1199 gcc_assert (!info->iv);
1200
1201 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202 info->iv = alloc_iv (data, base, step, no_overflow);
1203 info->iv->ssa_name = iv;
1204 }
1205
1206 /* Finds induction variable declaration for VAR. */
1207
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1210 {
1211 basic_block bb;
1212 tree type = TREE_TYPE (var);
1213
1214 if (!POINTER_TYPE_P (type)
1215 && !INTEGRAL_TYPE_P (type))
1216 return NULL;
1217
1218 if (!name_info (data, var)->iv)
1219 {
1220 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1221
1222 if (!bb
1223 || !flow_bb_inside_loop_p (data->current_loop, bb))
1224 set_iv (data, var, var, build_int_cst (type, 0), true);
1225 }
1226
1227 return name_info (data, var)->iv;
1228 }
1229
1230 /* Return the first non-invariant ssa var found in EXPR. */
1231
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1234 {
1235 int i, n;
1236 tree tmp;
1237 enum tree_code code;
1238
1239 if (!expr || is_gimple_min_invariant (expr))
1240 return NULL;
1241
1242 code = TREE_CODE (expr);
1243 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1244 {
1245 n = TREE_OPERAND_LENGTH (expr);
1246 for (i = 0; i < n; i++)
1247 {
1248 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1249
1250 if (tmp)
1251 return tmp;
1252 }
1253 }
1254 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1255 }
1256
1257 /* Finds basic ivs. */
1258
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1261 {
1262 gphi *phi;
1263 affine_iv iv;
1264 tree step, type, base, stop;
1265 bool found = false;
1266 struct loop *loop = data->current_loop;
1267 gphi_iterator psi;
1268
1269 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1270 {
1271 phi = psi.phi ();
1272
1273 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274 continue;
1275
1276 if (virtual_operand_p (PHI_RESULT (phi)))
1277 continue;
1278
1279 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280 continue;
1281
1282 if (integer_zerop (iv.step))
1283 continue;
1284
1285 step = iv.step;
1286 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287 /* Stop expanding iv base at the first ssa var referred by iv step.
1288 Ideally we should stop at any ssa var, because that's expensive
1289 and unusual to happen, we just do it on the first one.
1290
1291 See PR64705 for the rationale. */
1292 stop = extract_single_var_from_expr (step);
1293 base = expand_simple_operations (base, stop);
1294 if (contains_abnormal_ssa_name_p (base)
1295 || contains_abnormal_ssa_name_p (step))
1296 continue;
1297
1298 type = TREE_TYPE (PHI_RESULT (phi));
1299 base = fold_convert (type, base);
1300 if (step)
1301 {
1302 if (POINTER_TYPE_P (type))
1303 step = convert_to_ptrofftype (step);
1304 else
1305 step = fold_convert (type, step);
1306 }
1307
1308 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309 found = true;
1310 }
1311
1312 return found;
1313 }
1314
1315 /* Marks basic ivs. */
1316
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1319 {
1320 gphi *phi;
1321 gimple *def;
1322 tree var;
1323 struct iv *iv, *incr_iv;
1324 struct loop *loop = data->current_loop;
1325 basic_block incr_bb;
1326 gphi_iterator psi;
1327
1328 data->bivs_not_used_in_addr = 0;
1329 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1330 {
1331 phi = psi.phi ();
1332
1333 iv = get_iv (data, PHI_RESULT (phi));
1334 if (!iv)
1335 continue;
1336
1337 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338 def = SSA_NAME_DEF_STMT (var);
1339 /* Don't mark iv peeled from other one as biv. */
1340 if (def
1341 && gimple_code (def) == GIMPLE_PHI
1342 && gimple_bb (def) == loop->header)
1343 continue;
1344
1345 incr_iv = get_iv (data, var);
1346 if (!incr_iv)
1347 continue;
1348
1349 /* If the increment is in the subloop, ignore it. */
1350 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351 if (incr_bb->loop_father != data->current_loop
1352 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353 continue;
1354
1355 iv->biv_p = true;
1356 incr_iv->biv_p = true;
1357 if (iv->no_overflow)
1358 data->bivs_not_used_in_addr++;
1359 if (incr_iv->no_overflow)
1360 data->bivs_not_used_in_addr++;
1361 }
1362 }
1363
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365 parameters to IV. */
1366
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1369 {
1370 tree lhs, stop;
1371 struct loop *loop = data->current_loop;
1372
1373 iv->base = NULL_TREE;
1374 iv->step = NULL_TREE;
1375
1376 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377 return false;
1378
1379 lhs = gimple_assign_lhs (stmt);
1380 if (TREE_CODE (lhs) != SSA_NAME)
1381 return false;
1382
1383 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384 return false;
1385
1386 /* Stop expanding iv base at the first ssa var referred by iv step.
1387 Ideally we should stop at any ssa var, because that's expensive
1388 and unusual to happen, we just do it on the first one.
1389
1390 See PR64705 for the rationale. */
1391 stop = extract_single_var_from_expr (iv->step);
1392 iv->base = expand_simple_operations (iv->base, stop);
1393 if (contains_abnormal_ssa_name_p (iv->base)
1394 || contains_abnormal_ssa_name_p (iv->step))
1395 return false;
1396
1397 /* If STMT could throw, then do not consider STMT as defining a GIV.
1398 While this will suppress optimizations, we can not safely delete this
1399 GIV and associated statements, even if it appears it is not used. */
1400 if (stmt_could_throw_p (stmt))
1401 return false;
1402
1403 return true;
1404 }
1405
1406 /* Finds general ivs in statement STMT. */
1407
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1410 {
1411 affine_iv iv;
1412
1413 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414 return;
1415
1416 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1417 }
1418
1419 /* Finds general ivs in basic block BB. */
1420
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1423 {
1424 gimple_stmt_iterator bsi;
1425
1426 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427 find_givs_in_stmt (data, gsi_stmt (bsi));
1428 }
1429
1430 /* Finds general ivs. */
1431
1432 static void
1433 find_givs (struct ivopts_data *data)
1434 {
1435 struct loop *loop = data->current_loop;
1436 basic_block *body = get_loop_body_in_dom_order (loop);
1437 unsigned i;
1438
1439 for (i = 0; i < loop->num_nodes; i++)
1440 find_givs_in_bb (data, body[i]);
1441 free (body);
1442 }
1443
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445 variable and if so, its initial value and step. */
1446
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1449 {
1450 unsigned i;
1451 bitmap_iterator bi;
1452
1453 if (!find_bivs (data))
1454 return false;
1455
1456 find_givs (data);
1457 mark_bivs (data);
1458
1459 if (dump_file && (dump_flags & TDF_DETAILS))
1460 {
1461 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1462
1463 if (niter)
1464 {
1465 fprintf (dump_file, " number of iterations ");
1466 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467 if (!integer_zerop (niter->may_be_zero))
1468 {
1469 fprintf (dump_file, "; zero if ");
1470 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1471 }
1472 fprintf (dump_file, "\n");
1473 };
1474
1475 fprintf (dump_file, "\n<Induction Vars>:\n");
1476 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1477 {
1478 struct version_info *info = ver_info (data, i);
1479 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1481 }
1482 }
1483
1484 return true;
1485 }
1486
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489 is the const offset stripped from IV base; for other types use, both
1490 are zero by default. */
1491
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494 gimple *stmt, enum use_type type, tree addr_base,
1495 unsigned HOST_WIDE_INT addr_offset)
1496 {
1497 struct iv_use *use = XCNEW (struct iv_use);
1498
1499 use->id = group->vuses.length ();
1500 use->group_id = group->id;
1501 use->type = type;
1502 use->iv = iv;
1503 use->stmt = stmt;
1504 use->op_p = use_p;
1505 use->addr_base = addr_base;
1506 use->addr_offset = addr_offset;
1507
1508 group->vuses.safe_push (use);
1509 return use;
1510 }
1511
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513 NONLINEAR_USE is true if the invariant is used in a way we do not
1514 handle specially. */
1515
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1518 {
1519 basic_block bb;
1520 struct version_info *info;
1521
1522 if (TREE_CODE (op) != SSA_NAME
1523 || virtual_operand_p (op))
1524 return;
1525
1526 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527 if (bb
1528 && flow_bb_inside_loop_p (data->current_loop, bb))
1529 return;
1530
1531 info = name_info (data, op);
1532 info->name = op;
1533 info->has_nonlin_use |= nonlinear_use;
1534 if (!info->inv_id)
1535 info->inv_id = ++data->max_inv_id;
1536 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1537 }
1538
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1541
1542 /* Record a group of TYPE. */
1543
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1546 {
1547 struct iv_group *group = XCNEW (struct iv_group);
1548
1549 group->id = data->vgroups.length ();
1550 group->type = type;
1551 group->related_cands = BITMAP_ALLOC (NULL);
1552 group->vuses.create (1);
1553
1554 data->vgroups.safe_push (group);
1555 return group;
1556 }
1557
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559 New group will be created if there is no existing group for the use. */
1560
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563 struct iv *iv, gimple *stmt, enum use_type type)
1564 {
1565 tree addr_base = NULL;
1566 struct iv_group *group = NULL;
1567 unsigned HOST_WIDE_INT addr_offset = 0;
1568
1569 /* Record non address type use in a new group. */
1570 if (type == USE_ADDRESS && iv->base_object)
1571 {
1572 unsigned int i;
1573
1574 addr_base = strip_offset (iv->base, &addr_offset);
1575 for (i = 0; i < data->vgroups.length (); i++)
1576 {
1577 struct iv_use *use;
1578
1579 group = data->vgroups[i];
1580 use = group->vuses[0];
1581 if (use->type != USE_ADDRESS || !use->iv->base_object)
1582 continue;
1583
1584 /* Check if it has the same stripped base and step. */
1585 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586 && operand_equal_p (iv->step, use->iv->step, 0)
1587 && operand_equal_p (addr_base, use->addr_base, 0))
1588 break;
1589 }
1590 if (i == data->vgroups.length ())
1591 group = NULL;
1592 }
1593
1594 if (!group)
1595 group = record_group (data, type);
1596
1597 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1598 }
1599
1600 /* Checks whether the use OP is interesting and if so, records it. */
1601
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1604 {
1605 struct iv *iv;
1606 gimple *stmt;
1607 struct iv_use *use;
1608
1609 if (TREE_CODE (op) != SSA_NAME)
1610 return NULL;
1611
1612 iv = get_iv (data, op);
1613 if (!iv)
1614 return NULL;
1615
1616 if (iv->nonlin_use)
1617 {
1618 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619 return iv->nonlin_use;
1620 }
1621
1622 if (integer_zerop (iv->step))
1623 {
1624 record_invariant (data, op, true);
1625 return NULL;
1626 }
1627
1628 stmt = SSA_NAME_DEF_STMT (op);
1629 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1630
1631 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632 iv->nonlin_use = use;
1633 return use;
1634 }
1635
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637 of an induction variable and an invariant. If this is the case,
1638 CONTROL_VAR is set to location of the iv, BOUND to the location of
1639 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640 induction variable descriptions, and true is returned. If this is not
1641 the case, CONTROL_VAR and BOUND are set to the arguments of the
1642 condition and false is returned. */
1643
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646 tree **control_var, tree **bound,
1647 struct iv **iv_var, struct iv **iv_bound)
1648 {
1649 /* The objects returned when COND has constant operands. */
1650 static struct iv const_iv;
1651 static tree zero;
1652 tree *op0 = &zero, *op1 = &zero;
1653 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654 bool ret = false;
1655
1656 if (gimple_code (stmt) == GIMPLE_COND)
1657 {
1658 gcond *cond_stmt = as_a <gcond *> (stmt);
1659 op0 = gimple_cond_lhs_ptr (cond_stmt);
1660 op1 = gimple_cond_rhs_ptr (cond_stmt);
1661 }
1662 else
1663 {
1664 op0 = gimple_assign_rhs1_ptr (stmt);
1665 op1 = gimple_assign_rhs2_ptr (stmt);
1666 }
1667
1668 zero = integer_zero_node;
1669 const_iv.step = integer_zero_node;
1670
1671 if (TREE_CODE (*op0) == SSA_NAME)
1672 iv0 = get_iv (data, *op0);
1673 if (TREE_CODE (*op1) == SSA_NAME)
1674 iv1 = get_iv (data, *op1);
1675
1676 /* Exactly one of the compared values must be an iv, and the other one must
1677 be an invariant. */
1678 if (!iv0 || !iv1)
1679 goto end;
1680
1681 if (integer_zerop (iv0->step))
1682 {
1683 /* Control variable may be on the other side. */
1684 std::swap (op0, op1);
1685 std::swap (iv0, iv1);
1686 }
1687 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1688
1689 end:
1690 if (control_var)
1691 *control_var = op0;
1692 if (iv_var)
1693 *iv_var = iv0;
1694 if (bound)
1695 *bound = op1;
1696 if (iv_bound)
1697 *iv_bound = iv1;
1698
1699 return ret;
1700 }
1701
1702 /* Checks whether the condition in STMT is interesting and if so,
1703 records it. */
1704
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1707 {
1708 tree *var_p, *bound_p;
1709 struct iv *var_iv;
1710
1711 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1712 {
1713 find_interesting_uses_op (data, *var_p);
1714 find_interesting_uses_op (data, *bound_p);
1715 return;
1716 }
1717
1718 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1719 }
1720
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722 relative to the loop LOOP, i.e. if all its operands are defined
1723 outside of the returned loop. Returns NULL if EXPR is not
1724 even obviously invariant in LOOP. */
1725
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1728 {
1729 basic_block def_bb;
1730 unsigned i, len;
1731
1732 if (is_gimple_min_invariant (expr))
1733 return current_loops->tree_root;
1734
1735 if (TREE_CODE (expr) == SSA_NAME)
1736 {
1737 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738 if (def_bb)
1739 {
1740 if (flow_bb_inside_loop_p (loop, def_bb))
1741 return NULL;
1742 return superloop_at_depth (loop,
1743 loop_depth (def_bb->loop_father) + 1);
1744 }
1745
1746 return current_loops->tree_root;
1747 }
1748
1749 if (!EXPR_P (expr))
1750 return NULL;
1751
1752 unsigned maxdepth = 0;
1753 len = TREE_OPERAND_LENGTH (expr);
1754 for (i = 0; i < len; i++)
1755 {
1756 struct loop *ivloop;
1757 if (!TREE_OPERAND (expr, i))
1758 continue;
1759
1760 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761 if (!ivloop)
1762 return NULL;
1763 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1764 }
1765
1766 return superloop_at_depth (loop, maxdepth);
1767 }
1768
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770 i.e. if all its operands are defined outside of the LOOP. LOOP
1771 should not be the function body. */
1772
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1775 {
1776 basic_block def_bb;
1777 unsigned i, len;
1778
1779 gcc_assert (loop_depth (loop) > 0);
1780
1781 if (is_gimple_min_invariant (expr))
1782 return true;
1783
1784 if (TREE_CODE (expr) == SSA_NAME)
1785 {
1786 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787 if (def_bb
1788 && flow_bb_inside_loop_p (loop, def_bb))
1789 return false;
1790
1791 return true;
1792 }
1793
1794 if (!EXPR_P (expr))
1795 return false;
1796
1797 len = TREE_OPERAND_LENGTH (expr);
1798 for (i = 0; i < len; i++)
1799 if (TREE_OPERAND (expr, i)
1800 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801 return false;
1802
1803 return true;
1804 }
1805
1806 /* Given expression EXPR which computes inductive values with respect
1807 to loop recorded in DATA, this function returns biv from which EXPR
1808 is derived by tracing definition chains of ssa variables in EXPR. */
1809
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1812 {
1813 struct iv *iv;
1814 unsigned i, n;
1815 tree e2, e1;
1816 enum tree_code code;
1817 gimple *stmt;
1818
1819 if (expr == NULL_TREE)
1820 return NULL;
1821
1822 if (is_gimple_min_invariant (expr))
1823 return NULL;
1824
1825 code = TREE_CODE (expr);
1826 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1827 {
1828 n = TREE_OPERAND_LENGTH (expr);
1829 for (i = 0; i < n; i++)
1830 {
1831 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832 if (iv)
1833 return iv;
1834 }
1835 }
1836
1837 /* Stop if it's not ssa name. */
1838 if (code != SSA_NAME)
1839 return NULL;
1840
1841 iv = get_iv (data, expr);
1842 if (!iv || integer_zerop (iv->step))
1843 return NULL;
1844 else if (iv->biv_p)
1845 return iv;
1846
1847 stmt = SSA_NAME_DEF_STMT (expr);
1848 if (gphi *phi = dyn_cast <gphi *> (stmt))
1849 {
1850 ssa_op_iter iter;
1851 use_operand_p use_p;
1852
1853 if (virtual_operand_p (gimple_phi_result (phi)))
1854 return NULL;
1855
1856 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1857 {
1858 tree use = USE_FROM_PTR (use_p);
1859 iv = find_deriving_biv_for_expr (data, use);
1860 if (iv)
1861 return iv;
1862 }
1863 return NULL;
1864 }
1865 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866 return NULL;
1867
1868 e1 = gimple_assign_rhs1 (stmt);
1869 code = gimple_assign_rhs_code (stmt);
1870 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871 return find_deriving_biv_for_expr (data, e1);
1872
1873 switch (code)
1874 {
1875 case MULT_EXPR:
1876 case PLUS_EXPR:
1877 case MINUS_EXPR:
1878 case POINTER_PLUS_EXPR:
1879 /* Increments, decrements and multiplications by a constant
1880 are simple. */
1881 e2 = gimple_assign_rhs2 (stmt);
1882 iv = find_deriving_biv_for_expr (data, e2);
1883 if (iv)
1884 return iv;
1885
1886 /* Fallthru. */
1887 CASE_CONVERT:
1888 /* Casts are simple. */
1889 return find_deriving_biv_for_expr (data, e1);
1890
1891 default:
1892 break;
1893 }
1894
1895 return NULL;
1896 }
1897
1898 /* Record BIV, its predecessor and successor that they are used in
1899 address type uses. */
1900
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1903 {
1904 unsigned i;
1905 tree type, base_1, base_2;
1906 bitmap_iterator bi;
1907
1908 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909 || biv->have_address_use || !biv->no_overflow)
1910 return;
1911
1912 type = TREE_TYPE (biv->base);
1913 if (!INTEGRAL_TYPE_P (type))
1914 return;
1915
1916 biv->have_address_use = true;
1917 data->bivs_not_used_in_addr--;
1918 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1920 {
1921 struct iv *iv = ver_info (data, i)->iv;
1922
1923 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924 || iv->have_address_use || !iv->no_overflow)
1925 continue;
1926
1927 if (type != TREE_TYPE (iv->base)
1928 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929 continue;
1930
1931 if (!operand_equal_p (biv->step, iv->step, 0))
1932 continue;
1933
1934 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935 if (operand_equal_p (base_1, iv->base, 0)
1936 || operand_equal_p (base_2, biv->base, 0))
1937 {
1938 iv->have_address_use = true;
1939 data->bivs_not_used_in_addr--;
1940 }
1941 }
1942 }
1943
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945 initial ones. Returns false when the value of the index cannot be determined.
1946 Callback for for_each_index. */
1947
1948 struct ifs_ivopts_data
1949 {
1950 struct ivopts_data *ivopts_data;
1951 gimple *stmt;
1952 tree step;
1953 };
1954
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1957 {
1958 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959 struct iv *iv;
1960 bool use_overflow_semantics = false;
1961 tree step, iv_base, iv_step, lbound, off;
1962 struct loop *loop = dta->ivopts_data->current_loop;
1963
1964 /* If base is a component ref, require that the offset of the reference
1965 be invariant. */
1966 if (TREE_CODE (base) == COMPONENT_REF)
1967 {
1968 off = component_ref_field_offset (base);
1969 return expr_invariant_in_loop_p (loop, off);
1970 }
1971
1972 /* If base is array, first check whether we will be able to move the
1973 reference out of the loop (in order to take its address in strength
1974 reduction). In order for this to work we need both lower bound
1975 and step to be loop invariants. */
1976 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1977 {
1978 /* Moreover, for a range, the size needs to be invariant as well. */
1979 if (TREE_CODE (base) == ARRAY_RANGE_REF
1980 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981 return false;
1982
1983 step = array_ref_element_size (base);
1984 lbound = array_ref_low_bound (base);
1985
1986 if (!expr_invariant_in_loop_p (loop, step)
1987 || !expr_invariant_in_loop_p (loop, lbound))
1988 return false;
1989 }
1990
1991 if (TREE_CODE (*idx) != SSA_NAME)
1992 return true;
1993
1994 iv = get_iv (dta->ivopts_data, *idx);
1995 if (!iv)
1996 return false;
1997
1998 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1999 *&x[0], which is not folded and does not trigger the
2000 ARRAY_REF path below. */
2001 *idx = iv->base;
2002
2003 if (integer_zerop (iv->step))
2004 return true;
2005
2006 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2007 {
2008 step = array_ref_element_size (base);
2009
2010 /* We only handle addresses whose step is an integer constant. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 return false;
2013 }
2014 else
2015 /* The step for pointer arithmetics already is 1 byte. */
2016 step = size_one_node;
2017
2018 iv_base = iv->base;
2019 iv_step = iv->step;
2020 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021 use_overflow_semantics = true;
2022
2023 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024 sizetype, &iv_base, &iv_step, dta->stmt,
2025 use_overflow_semantics))
2026 {
2027 /* The index might wrap. */
2028 return false;
2029 }
2030
2031 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2033
2034 if (dta->ivopts_data->bivs_not_used_in_addr)
2035 {
2036 if (!iv->biv_p)
2037 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2038
2039 record_biv_for_address_use (dta->ivopts_data, iv);
2040 }
2041 return true;
2042 }
2043
2044 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2045 object is passed to it in DATA. */
2046
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049 void *vdata)
2050 {
2051 struct ivopts_data *data = (struct ivopts_data *) vdata;
2052 find_interesting_uses_op (data, *idx);
2053 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054 {
2055 find_interesting_uses_op (data, array_ref_element_size (base));
2056 find_interesting_uses_op (data, array_ref_low_bound (base));
2057 }
2058 return true;
2059 }
2060
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062 store cst to MUL and return true. Otherwise return false.
2063 The returned value is always sign-extended, regardless of the
2064 signedness of TOP and BOT. */
2065
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2068 {
2069 tree mby;
2070 enum tree_code code;
2071 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072 widest_int res, p0, p1;
2073
2074 STRIP_NOPS (top);
2075 STRIP_NOPS (bot);
2076
2077 if (operand_equal_p (top, bot, 0))
2078 {
2079 *mul = 1;
2080 return true;
2081 }
2082
2083 code = TREE_CODE (top);
2084 switch (code)
2085 {
2086 case MULT_EXPR:
2087 mby = TREE_OPERAND (top, 1);
2088 if (TREE_CODE (mby) != INTEGER_CST)
2089 return false;
2090
2091 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092 return false;
2093
2094 *mul = wi::sext (res * wi::to_widest (mby), precision);
2095 return true;
2096
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101 return false;
2102
2103 if (code == MINUS_EXPR)
2104 p1 = -p1;
2105 *mul = wi::sext (p0 + p1, precision);
2106 return true;
2107
2108 case INTEGER_CST:
2109 if (TREE_CODE (bot) != INTEGER_CST)
2110 return false;
2111
2112 p0 = widest_int::from (top, SIGNED);
2113 p1 = widest_int::from (bot, SIGNED);
2114 if (p1 == 0)
2115 return false;
2116 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117 return res == 0;
2118
2119 default:
2120 return false;
2121 }
2122 }
2123
2124 /* Return true if memory reference REF with step STEP may be unaligned. */
2125
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2128 {
2129 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130 thus they are not misaligned. */
2131 if (TREE_CODE (ref) == TARGET_MEM_REF)
2132 return false;
2133
2134 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2137
2138 unsigned HOST_WIDE_INT bitpos;
2139 unsigned int ref_align;
2140 get_object_alignment_1 (ref, &ref_align, &bitpos);
2141 if (ref_align < align
2142 || (bitpos % align) != 0
2143 || (bitpos % BITS_PER_UNIT) != 0)
2144 return true;
2145
2146 unsigned int trailing_zeros = tree_ctz (step);
2147 if (trailing_zeros < HOST_BITS_PER_INT
2148 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149 return true;
2150
2151 return false;
2152 }
2153
2154 /* Return true if EXPR may be non-addressable. */
2155
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2158 {
2159 switch (TREE_CODE (expr))
2160 {
2161 case TARGET_MEM_REF:
2162 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163 target, thus they are always addressable. */
2164 return false;
2165
2166 case MEM_REF:
2167 /* Likewise for MEM_REFs, modulo the storage order. */
2168 return REF_REVERSE_STORAGE_ORDER (expr);
2169
2170 case BIT_FIELD_REF:
2171 if (REF_REVERSE_STORAGE_ORDER (expr))
2172 return true;
2173 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2174
2175 case COMPONENT_REF:
2176 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177 return true;
2178 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2180
2181 case ARRAY_REF:
2182 case ARRAY_RANGE_REF:
2183 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184 return true;
2185 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2186
2187 case VIEW_CONVERT_EXPR:
2188 /* This kind of view-conversions may wrap non-addressable objects
2189 and make them look addressable. After some processing the
2190 non-addressability may be uncovered again, causing ADDR_EXPRs
2191 of inappropriate objects to be built. */
2192 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194 return true;
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196
2197 CASE_CONVERT:
2198 return true;
2199
2200 default:
2201 break;
2202 }
2203
2204 return false;
2205 }
2206
2207 /* Finds addresses in *OP_P inside STMT. */
2208
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211 tree *op_p)
2212 {
2213 tree base = *op_p, step = size_zero_node;
2214 struct iv *civ;
2215 struct ifs_ivopts_data ifs_ivopts_data;
2216
2217 /* Do not play with volatile memory references. A bit too conservative,
2218 perhaps, but safe. */
2219 if (gimple_has_volatile_ops (stmt))
2220 goto fail;
2221
2222 /* Ignore bitfields for now. Not really something terribly complicated
2223 to handle. TODO. */
2224 if (TREE_CODE (base) == BIT_FIELD_REF)
2225 goto fail;
2226
2227 base = unshare_expr (base);
2228
2229 if (TREE_CODE (base) == TARGET_MEM_REF)
2230 {
2231 tree type = build_pointer_type (TREE_TYPE (base));
2232 tree astep;
2233
2234 if (TMR_BASE (base)
2235 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2236 {
2237 civ = get_iv (data, TMR_BASE (base));
2238 if (!civ)
2239 goto fail;
2240
2241 TMR_BASE (base) = civ->base;
2242 step = civ->step;
2243 }
2244 if (TMR_INDEX2 (base)
2245 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2246 {
2247 civ = get_iv (data, TMR_INDEX2 (base));
2248 if (!civ)
2249 goto fail;
2250
2251 TMR_INDEX2 (base) = civ->base;
2252 step = civ->step;
2253 }
2254 if (TMR_INDEX (base)
2255 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2256 {
2257 civ = get_iv (data, TMR_INDEX (base));
2258 if (!civ)
2259 goto fail;
2260
2261 TMR_INDEX (base) = civ->base;
2262 astep = civ->step;
2263
2264 if (astep)
2265 {
2266 if (TMR_STEP (base))
2267 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2268
2269 step = fold_build2 (PLUS_EXPR, type, step, astep);
2270 }
2271 }
2272
2273 if (integer_zerop (step))
2274 goto fail;
2275 base = tree_mem_ref_addr (type, base);
2276 }
2277 else
2278 {
2279 ifs_ivopts_data.ivopts_data = data;
2280 ifs_ivopts_data.stmt = stmt;
2281 ifs_ivopts_data.step = size_zero_node;
2282 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283 || integer_zerop (ifs_ivopts_data.step))
2284 goto fail;
2285 step = ifs_ivopts_data.step;
2286
2287 /* Check that the base expression is addressable. This needs
2288 to be done after substituting bases of IVs into it. */
2289 if (may_be_nonaddressable_p (base))
2290 goto fail;
2291
2292 /* Moreover, on strict alignment platforms, check that it is
2293 sufficiently aligned. */
2294 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295 goto fail;
2296
2297 base = build_fold_addr_expr (base);
2298
2299 /* Substituting bases of IVs into the base expression might
2300 have caused folding opportunities. */
2301 if (TREE_CODE (base) == ADDR_EXPR)
2302 {
2303 tree *ref = &TREE_OPERAND (base, 0);
2304 while (handled_component_p (*ref))
2305 ref = &TREE_OPERAND (*ref, 0);
2306 if (TREE_CODE (*ref) == MEM_REF)
2307 {
2308 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309 TREE_OPERAND (*ref, 0),
2310 TREE_OPERAND (*ref, 1));
2311 if (tem)
2312 *ref = tem;
2313 }
2314 }
2315 }
2316
2317 civ = alloc_iv (data, base, step);
2318 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319 return;
2320
2321 fail:
2322 for_each_index (op_p, idx_record_use, data);
2323 }
2324
2325 /* Finds and records invariants used in STMT. */
2326
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2329 {
2330 ssa_op_iter iter;
2331 use_operand_p use_p;
2332 tree op;
2333
2334 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2335 {
2336 op = USE_FROM_PTR (use_p);
2337 record_invariant (data, op, false);
2338 }
2339 }
2340
2341 /* Finds interesting uses of induction variables in the statement STMT. */
2342
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2345 {
2346 struct iv *iv;
2347 tree op, *lhs, *rhs;
2348 ssa_op_iter iter;
2349 use_operand_p use_p;
2350 enum tree_code code;
2351
2352 find_invariants_stmt (data, stmt);
2353
2354 if (gimple_code (stmt) == GIMPLE_COND)
2355 {
2356 find_interesting_uses_cond (data, stmt);
2357 return;
2358 }
2359
2360 if (is_gimple_assign (stmt))
2361 {
2362 lhs = gimple_assign_lhs_ptr (stmt);
2363 rhs = gimple_assign_rhs1_ptr (stmt);
2364
2365 if (TREE_CODE (*lhs) == SSA_NAME)
2366 {
2367 /* If the statement defines an induction variable, the uses are not
2368 interesting by themselves. */
2369
2370 iv = get_iv (data, *lhs);
2371
2372 if (iv && !integer_zerop (iv->step))
2373 return;
2374 }
2375
2376 code = gimple_assign_rhs_code (stmt);
2377 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378 && (REFERENCE_CLASS_P (*rhs)
2379 || is_gimple_val (*rhs)))
2380 {
2381 if (REFERENCE_CLASS_P (*rhs))
2382 find_interesting_uses_address (data, stmt, rhs);
2383 else
2384 find_interesting_uses_op (data, *rhs);
2385
2386 if (REFERENCE_CLASS_P (*lhs))
2387 find_interesting_uses_address (data, stmt, lhs);
2388 return;
2389 }
2390 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2391 {
2392 find_interesting_uses_cond (data, stmt);
2393 return;
2394 }
2395
2396 /* TODO -- we should also handle address uses of type
2397
2398 memory = call (whatever);
2399
2400 and
2401
2402 call (memory). */
2403 }
2404
2405 if (gimple_code (stmt) == GIMPLE_PHI
2406 && gimple_bb (stmt) == data->current_loop->header)
2407 {
2408 iv = get_iv (data, PHI_RESULT (stmt));
2409
2410 if (iv && !integer_zerop (iv->step))
2411 return;
2412 }
2413
2414 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415 {
2416 op = USE_FROM_PTR (use_p);
2417
2418 if (TREE_CODE (op) != SSA_NAME)
2419 continue;
2420
2421 iv = get_iv (data, op);
2422 if (!iv)
2423 continue;
2424
2425 find_interesting_uses_op (data, op);
2426 }
2427 }
2428
2429 /* Finds interesting uses of induction variables outside of loops
2430 on loop exit edge EXIT. */
2431
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2434 {
2435 gphi *phi;
2436 gphi_iterator psi;
2437 tree def;
2438
2439 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2440 {
2441 phi = psi.phi ();
2442 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443 if (!virtual_operand_p (def))
2444 find_interesting_uses_op (data, def);
2445 }
2446 }
2447
2448 /* Compute maximum offset of [base + offset] addressing mode
2449 for memory reference represented by USE. */
2450
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2453 {
2454 int width;
2455 rtx reg, addr;
2456 HOST_WIDE_INT i, off;
2457 unsigned list_index, num;
2458 addr_space_t as;
2459 machine_mode mem_mode, addr_mode;
2460 static vec<HOST_WIDE_INT> max_offset_list;
2461
2462 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2464
2465 num = max_offset_list.length ();
2466 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467 if (list_index >= num)
2468 {
2469 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470 for (; num < max_offset_list.length (); num++)
2471 max_offset_list[num] = -1;
2472 }
2473
2474 off = max_offset_list[list_index];
2475 if (off != -1)
2476 return off;
2477
2478 addr_mode = targetm.addr_space.address_mode (as);
2479 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2481
2482 width = GET_MODE_BITSIZE (addr_mode) - 1;
2483 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484 width = HOST_BITS_PER_WIDE_INT - 1;
2485
2486 for (i = width; i > 0; i--)
2487 {
2488 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490 if (memory_address_addr_space_p (mem_mode, addr, as))
2491 break;
2492
2493 /* For some strict-alignment targets, the offset must be naturally
2494 aligned. Try an aligned offset if mem_mode is not QImode. */
2495 off = ((unsigned HOST_WIDE_INT) 1 << i);
2496 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2497 {
2498 off -= GET_MODE_SIZE (mem_mode);
2499 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500 if (memory_address_addr_space_p (mem_mode, addr, as))
2501 break;
2502 }
2503 }
2504 if (i == 0)
2505 off = 0;
2506
2507 max_offset_list[list_index] = off;
2508 return off;
2509 }
2510
2511 /* Comparison function to sort group in ascending order of addr_offset. */
2512
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2515 {
2516 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2518
2519 if ((*u1)->addr_offset != (*u2)->addr_offset)
2520 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521 else
2522 return 0;
2523 }
2524
2525 /* Check if small groups should be split. Return true if no group
2526 contains more than two uses with distinct addr_offsets. Return
2527 false otherwise. We want to split such groups because:
2528
2529 1) Small groups don't have much benefit and may interfer with
2530 general candidate selection.
2531 2) Size for problem with only small groups is usually small and
2532 general algorithm can handle it well.
2533
2534 TODO -- Above claim may not hold when we want to merge memory
2535 accesses with conseuctive addresses. */
2536
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2539 {
2540 unsigned int i, j, distinct = 1;
2541 struct iv_use *pre;
2542 struct iv_group *group;
2543
2544 for (i = 0; i < data->vgroups.length (); i++)
2545 {
2546 group = data->vgroups[i];
2547 if (group->vuses.length () == 1)
2548 continue;
2549
2550 gcc_assert (group->type == USE_ADDRESS);
2551 if (group->vuses.length () == 2)
2552 {
2553 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554 std::swap (group->vuses[0], group->vuses[1]);
2555 }
2556 else
2557 group->vuses.qsort (group_compare_offset);
2558
2559 if (distinct > 2)
2560 continue;
2561
2562 distinct = 1;
2563 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2564 {
2565 if (group->vuses[j]->addr_offset != pre->addr_offset)
2566 {
2567 pre = group->vuses[j];
2568 distinct++;
2569 }
2570
2571 if (distinct > 2)
2572 break;
2573 }
2574 }
2575
2576 return (distinct <= 2);
2577 }
2578
2579 /* For each group of address type uses, this function further groups
2580 these uses according to the maximum offset supported by target's
2581 [base + offset] addressing mode. */
2582
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2585 {
2586 unsigned int i, j;
2587 HOST_WIDE_INT max_offset = -1;
2588
2589 /* Reset max offset to split all small groups. */
2590 if (split_small_address_groups_p (data))
2591 max_offset = 0;
2592
2593 for (i = 0; i < data->vgroups.length (); i++)
2594 {
2595 struct iv_group *group = data->vgroups[i];
2596 struct iv_use *use = group->vuses[0];
2597
2598 use->id = 0;
2599 use->group_id = group->id;
2600 if (group->vuses.length () == 1)
2601 continue;
2602
2603 if (max_offset != 0)
2604 max_offset = compute_max_addr_offset (use);
2605
2606 for (j = 1; j < group->vuses.length (); j++)
2607 {
2608 struct iv_use *next = group->vuses[j];
2609
2610 /* Only uses with offset that can fit in offset part against
2611 the first use can be grouped together. */
2612 if (next->addr_offset - use->addr_offset
2613 > (unsigned HOST_WIDE_INT) max_offset)
2614 break;
2615
2616 next->id = j;
2617 next->group_id = group->id;
2618 }
2619 /* Split group. */
2620 if (j < group->vuses.length ())
2621 {
2622 struct iv_group *new_group = record_group (data, group->type);
2623 new_group->vuses.safe_splice (group->vuses);
2624 new_group->vuses.block_remove (0, j);
2625 group->vuses.truncate (j);
2626 }
2627 }
2628 }
2629
2630 /* Finds uses of the induction variables that are interesting. */
2631
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2634 {
2635 basic_block bb;
2636 gimple_stmt_iterator bsi;
2637 basic_block *body = get_loop_body (data->current_loop);
2638 unsigned i;
2639 edge e;
2640
2641 for (i = 0; i < data->current_loop->num_nodes; i++)
2642 {
2643 edge_iterator ei;
2644 bb = body[i];
2645
2646 FOR_EACH_EDGE (e, ei, bb->succs)
2647 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649 find_interesting_uses_outside (data, e);
2650
2651 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654 if (!is_gimple_debug (gsi_stmt (bsi)))
2655 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2656 }
2657
2658 split_address_groups (data);
2659
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2661 {
2662 bitmap_iterator bi;
2663
2664 fprintf (dump_file, "\n<Invariant Vars>:\n");
2665 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2666 {
2667 struct version_info *info = ver_info (data, i);
2668 if (info->inv_id)
2669 {
2670 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671 print_generic_expr (dump_file, info->name, TDF_SLIM);
2672 fprintf (dump_file, "%s\n",
2673 info->has_nonlin_use ? "" : "\t(eliminable)");
2674 }
2675 }
2676
2677 fprintf (dump_file, "\n<IV Groups>:\n");
2678 dump_groups (dump_file, data);
2679 fprintf (dump_file, "\n");
2680 }
2681
2682 free (body);
2683 }
2684
2685 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2686 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2687 we are at the top-level of the processed address. */
2688
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691 HOST_WIDE_INT *offset)
2692 {
2693 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694 enum tree_code code;
2695 tree type, orig_type = TREE_TYPE (expr);
2696 HOST_WIDE_INT off0, off1, st;
2697 tree orig_expr = expr;
2698
2699 STRIP_NOPS (expr);
2700
2701 type = TREE_TYPE (expr);
2702 code = TREE_CODE (expr);
2703 *offset = 0;
2704
2705 switch (code)
2706 {
2707 case INTEGER_CST:
2708 if (!cst_and_fits_in_hwi (expr)
2709 || integer_zerop (expr))
2710 return orig_expr;
2711
2712 *offset = int_cst_value (expr);
2713 return build_int_cst (orig_type, 0);
2714
2715 case POINTER_PLUS_EXPR:
2716 case PLUS_EXPR:
2717 case MINUS_EXPR:
2718 op0 = TREE_OPERAND (expr, 0);
2719 op1 = TREE_OPERAND (expr, 1);
2720
2721 op0 = strip_offset_1 (op0, false, false, &off0);
2722 op1 = strip_offset_1 (op1, false, false, &off1);
2723
2724 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725 if (op0 == TREE_OPERAND (expr, 0)
2726 && op1 == TREE_OPERAND (expr, 1))
2727 return orig_expr;
2728
2729 if (integer_zerop (op1))
2730 expr = op0;
2731 else if (integer_zerop (op0))
2732 {
2733 if (code == MINUS_EXPR)
2734 expr = fold_build1 (NEGATE_EXPR, type, op1);
2735 else
2736 expr = op1;
2737 }
2738 else
2739 expr = fold_build2 (code, type, op0, op1);
2740
2741 return fold_convert (orig_type, expr);
2742
2743 case MULT_EXPR:
2744 op1 = TREE_OPERAND (expr, 1);
2745 if (!cst_and_fits_in_hwi (op1))
2746 return orig_expr;
2747
2748 op0 = TREE_OPERAND (expr, 0);
2749 op0 = strip_offset_1 (op0, false, false, &off0);
2750 if (op0 == TREE_OPERAND (expr, 0))
2751 return orig_expr;
2752
2753 *offset = off0 * int_cst_value (op1);
2754 if (integer_zerop (op0))
2755 expr = op0;
2756 else
2757 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2758
2759 return fold_convert (orig_type, expr);
2760
2761 case ARRAY_REF:
2762 case ARRAY_RANGE_REF:
2763 if (!inside_addr)
2764 return orig_expr;
2765
2766 step = array_ref_element_size (expr);
2767 if (!cst_and_fits_in_hwi (step))
2768 break;
2769
2770 st = int_cst_value (step);
2771 op1 = TREE_OPERAND (expr, 1);
2772 op1 = strip_offset_1 (op1, false, false, &off1);
2773 *offset = off1 * st;
2774
2775 if (top_compref
2776 && integer_zerop (op1))
2777 {
2778 /* Strip the component reference completely. */
2779 op0 = TREE_OPERAND (expr, 0);
2780 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781 *offset += off0;
2782 return op0;
2783 }
2784 break;
2785
2786 case COMPONENT_REF:
2787 {
2788 tree field;
2789
2790 if (!inside_addr)
2791 return orig_expr;
2792
2793 tmp = component_ref_field_offset (expr);
2794 field = TREE_OPERAND (expr, 1);
2795 if (top_compref
2796 && cst_and_fits_in_hwi (tmp)
2797 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2798 {
2799 HOST_WIDE_INT boffset, abs_off;
2800
2801 /* Strip the component reference completely. */
2802 op0 = TREE_OPERAND (expr, 0);
2803 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806 if (boffset < 0)
2807 abs_off = -abs_off;
2808
2809 *offset = off0 + int_cst_value (tmp) + abs_off;
2810 return op0;
2811 }
2812 }
2813 break;
2814
2815 case ADDR_EXPR:
2816 op0 = TREE_OPERAND (expr, 0);
2817 op0 = strip_offset_1 (op0, true, true, &off0);
2818 *offset += off0;
2819
2820 if (op0 == TREE_OPERAND (expr, 0))
2821 return orig_expr;
2822
2823 expr = build_fold_addr_expr (op0);
2824 return fold_convert (orig_type, expr);
2825
2826 case MEM_REF:
2827 /* ??? Offset operand? */
2828 inside_addr = false;
2829 break;
2830
2831 default:
2832 return orig_expr;
2833 }
2834
2835 /* Default handling of expressions for that we want to recurse into
2836 the first operand. */
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839 *offset += off0;
2840
2841 if (op0 == TREE_OPERAND (expr, 0)
2842 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843 return orig_expr;
2844
2845 expr = copy_node (expr);
2846 TREE_OPERAND (expr, 0) = op0;
2847 if (op1)
2848 TREE_OPERAND (expr, 1) = op1;
2849
2850 /* Inside address, we might strip the top level component references,
2851 thus changing type of the expression. Handling of ADDR_EXPR
2852 will fix that. */
2853 expr = fold_convert (orig_type, expr);
2854
2855 return expr;
2856 }
2857
2858 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2859
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2862 {
2863 HOST_WIDE_INT off;
2864 tree core = strip_offset_1 (expr, false, false, &off);
2865 *offset = off;
2866 return core;
2867 }
2868
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870 We return unsigned type with the same precision, which avoids problems
2871 with overflows. */
2872
2873 static tree
2874 generic_type_for (tree type)
2875 {
2876 if (POINTER_TYPE_P (type))
2877 return unsigned_type_for (type);
2878
2879 if (TYPE_UNSIGNED (type))
2880 return type;
2881
2882 return unsigned_type_for (type);
2883 }
2884
2885 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2886 the bitmap to that we should store it. */
2887
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2891 {
2892 bitmap *depends_on = (bitmap *) data;
2893 struct version_info *info;
2894
2895 if (TREE_CODE (*expr_p) != SSA_NAME)
2896 return NULL_TREE;
2897 info = name_info (fd_ivopts_data, *expr_p);
2898
2899 if (!info->inv_id || info->has_nonlin_use)
2900 return NULL_TREE;
2901
2902 if (!*depends_on)
2903 *depends_on = BITMAP_ALLOC (NULL);
2904 bitmap_set_bit (*depends_on, info->inv_id);
2905
2906 return NULL_TREE;
2907 }
2908
2909 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2910 position to POS. If USE is not NULL, the candidate is set as related to
2911 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2912 replacement of the final value of the iv by a direct computation. */
2913
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916 tree base, tree step, bool important, enum iv_position pos,
2917 struct iv_use *use, gimple *incremented_at,
2918 struct iv *orig_iv = NULL)
2919 {
2920 unsigned i;
2921 struct iv_cand *cand = NULL;
2922 tree type, orig_type;
2923
2924 gcc_assert (base && step);
2925
2926 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927 live, but the ivopts code may replace a real pointer with one
2928 pointing before or after the memory block that is then adjusted
2929 into the memory block during the loop. FIXME: It would likely be
2930 better to actually force the pointer live and still use ivopts;
2931 for example, it would be enough to write the pointer into memory
2932 and keep it there until after the loop. */
2933 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934 return NULL;
2935
2936 /* For non-original variables, make sure their values are computed in a type
2937 that does not invoke undefined behavior on overflows (since in general,
2938 we cannot prove that these induction variables are non-wrapping). */
2939 if (pos != IP_ORIGINAL)
2940 {
2941 orig_type = TREE_TYPE (base);
2942 type = generic_type_for (orig_type);
2943 if (type != orig_type)
2944 {
2945 base = fold_convert (type, base);
2946 step = fold_convert (type, step);
2947 }
2948 }
2949
2950 for (i = 0; i < data->vcands.length (); i++)
2951 {
2952 cand = data->vcands[i];
2953
2954 if (cand->pos != pos)
2955 continue;
2956
2957 if (cand->incremented_at != incremented_at
2958 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959 && cand->ainc_use != use))
2960 continue;
2961
2962 if (operand_equal_p (base, cand->iv->base, 0)
2963 && operand_equal_p (step, cand->iv->step, 0)
2964 && (TYPE_PRECISION (TREE_TYPE (base))
2965 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966 break;
2967 }
2968
2969 if (i == data->vcands.length ())
2970 {
2971 cand = XCNEW (struct iv_cand);
2972 cand->id = i;
2973 cand->iv = alloc_iv (data, base, step);
2974 cand->pos = pos;
2975 if (pos != IP_ORIGINAL)
2976 {
2977 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978 cand->var_after = cand->var_before;
2979 }
2980 cand->important = important;
2981 cand->incremented_at = incremented_at;
2982 data->vcands.safe_push (cand);
2983
2984 if (TREE_CODE (step) != INTEGER_CST)
2985 {
2986 fd_ivopts_data = data;
2987 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2988 }
2989
2990 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991 cand->ainc_use = use;
2992 else
2993 cand->ainc_use = NULL;
2994
2995 cand->orig_iv = orig_iv;
2996 if (dump_file && (dump_flags & TDF_DETAILS))
2997 dump_cand (dump_file, cand);
2998 }
2999
3000 cand->important |= important;
3001
3002 /* Relate candidate to the group for which it is added. */
3003 if (use)
3004 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3005
3006 return cand;
3007 }
3008
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010 is allowed.
3011
3012 The purpose is to avoid splitting latch edge with a biv increment, thus
3013 creating a jump, possibly confusing other optimization passes and leaving
3014 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3015 is not available (so we do not have a better alternative), or if the latch
3016 edge is already nonempty. */
3017
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3020 {
3021 if (!ip_normal_pos (loop))
3022 return true;
3023
3024 if (!empty_block_p (ip_end_pos (loop)))
3025 return true;
3026
3027 return false;
3028 }
3029
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031 Important field is set to IMPORTANT. */
3032
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035 bool important, struct iv_use *use)
3036 {
3037 basic_block use_bb = gimple_bb (use->stmt);
3038 machine_mode mem_mode;
3039 unsigned HOST_WIDE_INT cstepi;
3040
3041 /* If we insert the increment in any position other than the standard
3042 ones, we must ensure that it is incremented once per iteration.
3043 It must not be in an inner nested loop, or one side of an if
3044 statement. */
3045 if (use_bb->loop_father != data->current_loop
3046 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047 || stmt_could_throw_p (use->stmt)
3048 || !cst_and_fits_in_hwi (step))
3049 return;
3050
3051 cstepi = int_cst_value (step);
3052
3053 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055 || USE_STORE_PRE_INCREMENT (mem_mode))
3056 && GET_MODE_SIZE (mem_mode) == cstepi)
3057 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058 || USE_STORE_PRE_DECREMENT (mem_mode))
3059 && GET_MODE_SIZE (mem_mode) == -cstepi))
3060 {
3061 enum tree_code code = MINUS_EXPR;
3062 tree new_base;
3063 tree new_step = step;
3064
3065 if (POINTER_TYPE_P (TREE_TYPE (base)))
3066 {
3067 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068 code = POINTER_PLUS_EXPR;
3069 }
3070 else
3071 new_step = fold_convert (TREE_TYPE (base), new_step);
3072 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074 use->stmt);
3075 }
3076 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077 || USE_STORE_POST_INCREMENT (mem_mode))
3078 && GET_MODE_SIZE (mem_mode) == cstepi)
3079 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080 || USE_STORE_POST_DECREMENT (mem_mode))
3081 && GET_MODE_SIZE (mem_mode) == -cstepi))
3082 {
3083 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084 use->stmt);
3085 }
3086 }
3087
3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3089 position to POS. If USE is not NULL, the candidate is set as related to
3090 it. The candidate computation is scheduled before exit condition and at
3091 the end of loop. */
3092
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095 tree base, tree step, bool important, struct iv_use *use,
3096 struct iv *orig_iv = NULL)
3097 {
3098 if (ip_normal_pos (data->current_loop))
3099 add_candidate_1 (data, base, step, important,
3100 IP_NORMAL, use, NULL, orig_iv);
3101 if (ip_end_pos (data->current_loop)
3102 && allow_ip_end_pos_p (data->current_loop))
3103 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3104 }
3105
3106 /* Adds standard iv candidates. */
3107
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3110 {
3111 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3112
3113 /* The same for a double-integer type if it is still fast enough. */
3114 if (TYPE_PRECISION
3115 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118 build_int_cst (long_integer_type_node, 1), true, NULL);
3119
3120 /* The same for a double-integer type if it is still fast enough. */
3121 if (TYPE_PRECISION
3122 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3126 }
3127
3128
3129 /* Adds candidates bases on the old induction variable IV. */
3130
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3133 {
3134 gimple *phi;
3135 tree def;
3136 struct iv_cand *cand;
3137
3138 /* Check if this biv is used in address type use. */
3139 if (iv->no_overflow && iv->have_address_use
3140 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3142 {
3143 tree base = fold_convert (sizetype, iv->base);
3144 tree step = fold_convert (sizetype, iv->step);
3145
3146 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3147 add_candidate (data, base, step, true, NULL, iv);
3148 /* Add iv cand of the original type only if it has nonlinear use. */
3149 if (iv->nonlin_use)
3150 add_candidate (data, iv->base, iv->step, true, NULL);
3151 }
3152 else
3153 add_candidate (data, iv->base, iv->step, true, NULL);
3154
3155 /* The same, but with initial value zero. */
3156 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157 add_candidate (data, size_int (0), iv->step, true, NULL);
3158 else
3159 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160 iv->step, true, NULL);
3161
3162 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163 if (gimple_code (phi) == GIMPLE_PHI)
3164 {
3165 /* Additionally record the possibility of leaving the original iv
3166 untouched. */
3167 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168 /* Don't add candidate if it's from another PHI node because
3169 it's an affine iv appearing in the form of PEELED_CHREC. */
3170 phi = SSA_NAME_DEF_STMT (def);
3171 if (gimple_code (phi) != GIMPLE_PHI)
3172 {
3173 cand = add_candidate_1 (data,
3174 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175 SSA_NAME_DEF_STMT (def));
3176 if (cand)
3177 {
3178 cand->var_before = iv->ssa_name;
3179 cand->var_after = def;
3180 }
3181 }
3182 else
3183 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3184 }
3185 }
3186
3187 /* Adds candidates based on the old induction variables. */
3188
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3191 {
3192 unsigned i;
3193 struct iv *iv;
3194 bitmap_iterator bi;
3195
3196 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3197 {
3198 iv = ver_info (data, i)->iv;
3199 if (iv && iv->biv_p && !integer_zerop (iv->step))
3200 add_iv_candidate_for_biv (data, iv);
3201 }
3202 }
3203
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3205
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208 tree step, struct iv_use *use)
3209 {
3210 struct iv_common_cand ent;
3211 struct iv_common_cand **slot;
3212
3213 ent.base = base;
3214 ent.step = step;
3215 ent.hash = iterative_hash_expr (base, 0);
3216 ent.hash = iterative_hash_expr (step, ent.hash);
3217
3218 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219 if (*slot == NULL)
3220 {
3221 *slot = new iv_common_cand ();
3222 (*slot)->base = base;
3223 (*slot)->step = step;
3224 (*slot)->uses.create (8);
3225 (*slot)->hash = ent.hash;
3226 data->iv_common_cands.safe_push ((*slot));
3227 }
3228
3229 gcc_assert (use != NULL);
3230 (*slot)->uses.safe_push (use);
3231 return;
3232 }
3233
3234 /* Comparison function used to sort common candidates. */
3235
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3238 {
3239 unsigned n1, n2;
3240 const struct iv_common_cand *const *const ccand1
3241 = (const struct iv_common_cand *const *)p1;
3242 const struct iv_common_cand *const *const ccand2
3243 = (const struct iv_common_cand *const *)p2;
3244
3245 n1 = (*ccand1)->uses.length ();
3246 n2 = (*ccand2)->uses.length ();
3247 return n2 - n1;
3248 }
3249
3250 /* Adds IV candidates based on common candidated recorded. */
3251
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3254 {
3255 unsigned i, j;
3256 struct iv_cand *cand_1, *cand_2;
3257
3258 data->iv_common_cands.qsort (common_cand_cmp);
3259 for (i = 0; i < data->iv_common_cands.length (); i++)
3260 {
3261 struct iv_common_cand *ptr = data->iv_common_cands[i];
3262
3263 /* Only add IV candidate if it's derived from multiple uses. */
3264 if (ptr->uses.length () <= 1)
3265 break;
3266
3267 cand_1 = NULL;
3268 cand_2 = NULL;
3269 if (ip_normal_pos (data->current_loop))
3270 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271 false, IP_NORMAL, NULL, NULL);
3272
3273 if (ip_end_pos (data->current_loop)
3274 && allow_ip_end_pos_p (data->current_loop))
3275 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276 false, IP_END, NULL, NULL);
3277
3278 /* Bind deriving uses and the new candidates. */
3279 for (j = 0; j < ptr->uses.length (); j++)
3280 {
3281 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282 if (cand_1)
3283 bitmap_set_bit (group->related_cands, cand_1->id);
3284 if (cand_2)
3285 bitmap_set_bit (group->related_cands, cand_2->id);
3286 }
3287 }
3288
3289 /* Release data since it is useless from this point. */
3290 data->iv_common_cand_tab->empty ();
3291 data->iv_common_cands.truncate (0);
3292 }
3293
3294 /* Adds candidates based on the value of USE's iv. */
3295
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3298 {
3299 unsigned HOST_WIDE_INT offset;
3300 tree base;
3301 tree basetype;
3302 struct iv *iv = use->iv;
3303
3304 add_candidate (data, iv->base, iv->step, false, use);
3305
3306 /* Record common candidate for use in case it can be shared by others. */
3307 record_common_cand (data, iv->base, iv->step, use);
3308
3309 /* Record common candidate with initial value zero. */
3310 basetype = TREE_TYPE (iv->base);
3311 if (POINTER_TYPE_P (basetype))
3312 basetype = sizetype;
3313 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3314
3315 /* Record common candidate with constant offset stripped in base.
3316 Like the use itself, we also add candidate directly for it. */
3317 base = strip_offset (iv->base, &offset);
3318 if (offset || base != iv->base)
3319 {
3320 record_common_cand (data, base, iv->step, use);
3321 add_candidate (data, base, iv->step, false, use);
3322 }
3323
3324 /* Record common candidate with base_object removed in base. */
3325 if (iv->base_object != NULL)
3326 {
3327 unsigned i;
3328 aff_tree aff_base;
3329 tree step, base_object = iv->base_object;
3330
3331 base = iv->base;
3332 step = iv->step;
3333 STRIP_NOPS (base);
3334 STRIP_NOPS (step);
3335 STRIP_NOPS (base_object);
3336 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337 for (i = 0; i < aff_base.n; i++)
3338 {
3339 if (aff_base.elts[i].coef != 1)
3340 continue;
3341
3342 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343 break;
3344 }
3345 if (i < aff_base.n)
3346 {
3347 aff_combination_remove_elt (&aff_base, i);
3348 base = aff_combination_to_tree (&aff_base);
3349 basetype = TREE_TYPE (base);
3350 if (POINTER_TYPE_P (basetype))
3351 basetype = sizetype;
3352
3353 step = fold_convert (basetype, step);
3354 record_common_cand (data, base, step, use);
3355 /* Also record common candidate with offset stripped. */
3356 base = strip_offset (base, &offset);
3357 if (offset)
3358 record_common_cand (data, base, step, use);
3359 }
3360 }
3361
3362 /* At last, add auto-incremental candidates. Make such variables
3363 important since other iv uses with same base object may be based
3364 on it. */
3365 if (use != NULL && use->type == USE_ADDRESS)
3366 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3367 }
3368
3369 /* Adds candidates based on the uses. */
3370
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3373 {
3374 unsigned i;
3375
3376 /* Only add candidate for the first use in group. */
3377 for (i = 0; i < data->vgroups.length (); i++)
3378 {
3379 struct iv_group *group = data->vgroups[i];
3380
3381 gcc_assert (group->vuses[0] != NULL);
3382 add_iv_candidate_for_use (data, group->vuses[0]);
3383 }
3384 add_iv_candidate_derived_from_uses (data);
3385 }
3386
3387 /* Record important candidates and add them to related_cands bitmaps. */
3388
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3391 {
3392 unsigned i;
3393 struct iv_group *group;
3394
3395 for (i = 0; i < data->vcands.length (); i++)
3396 {
3397 struct iv_cand *cand = data->vcands[i];
3398
3399 if (cand->important)
3400 bitmap_set_bit (data->important_candidates, i);
3401 }
3402
3403 data->consider_all_candidates = (data->vcands.length ()
3404 <= CONSIDER_ALL_CANDIDATES_BOUND);
3405
3406 /* Add important candidates to groups' related_cands bitmaps. */
3407 for (i = 0; i < data->vgroups.length (); i++)
3408 {
3409 group = data->vgroups[i];
3410 bitmap_ior_into (group->related_cands, data->important_candidates);
3411 }
3412 }
3413
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416 we allocate a simple list to every use. */
3417
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3420 {
3421 unsigned i, size, s;
3422
3423 for (i = 0; i < data->vgroups.length (); i++)
3424 {
3425 struct iv_group *group = data->vgroups[i];
3426
3427 if (data->consider_all_candidates)
3428 size = data->vcands.length ();
3429 else
3430 {
3431 s = bitmap_count_bits (group->related_cands);
3432
3433 /* Round up to the power of two, so that moduling by it is fast. */
3434 size = s ? (1 << ceil_log2 (s)) : 1;
3435 }
3436
3437 group->n_map_members = size;
3438 group->cost_map = XCNEWVEC (struct cost_pair, size);
3439 }
3440 }
3441
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443 on invariants DEPENDS_ON and that the value used in expressing it
3444 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3445
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448 struct iv_group *group, struct iv_cand *cand,
3449 comp_cost cost, bitmap depends_on, tree value,
3450 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3451 {
3452 unsigned i, s;
3453
3454 if (cost.infinite_cost_p ())
3455 {
3456 BITMAP_FREE (depends_on);
3457 return;
3458 }
3459
3460 if (data->consider_all_candidates)
3461 {
3462 group->cost_map[cand->id].cand = cand;
3463 group->cost_map[cand->id].cost = cost;
3464 group->cost_map[cand->id].depends_on = depends_on;
3465 group->cost_map[cand->id].value = value;
3466 group->cost_map[cand->id].comp = comp;
3467 group->cost_map[cand->id].inv_expr = inv_expr;
3468 return;
3469 }
3470
3471 /* n_map_members is a power of two, so this computes modulo. */
3472 s = cand->id & (group->n_map_members - 1);
3473 for (i = s; i < group->n_map_members; i++)
3474 if (!group->cost_map[i].cand)
3475 goto found;
3476 for (i = 0; i < s; i++)
3477 if (!group->cost_map[i].cand)
3478 goto found;
3479
3480 gcc_unreachable ();
3481
3482 found:
3483 group->cost_map[i].cand = cand;
3484 group->cost_map[i].cost = cost;
3485 group->cost_map[i].depends_on = depends_on;
3486 group->cost_map[i].value = value;
3487 group->cost_map[i].comp = comp;
3488 group->cost_map[i].inv_expr = inv_expr;
3489 }
3490
3491 /* Gets cost of (GROUP, CAND) pair. */
3492
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495 struct iv_cand *cand)
3496 {
3497 unsigned i, s;
3498 struct cost_pair *ret;
3499
3500 if (!cand)
3501 return NULL;
3502
3503 if (data->consider_all_candidates)
3504 {
3505 ret = group->cost_map + cand->id;
3506 if (!ret->cand)
3507 return NULL;
3508
3509 return ret;
3510 }
3511
3512 /* n_map_members is a power of two, so this computes modulo. */
3513 s = cand->id & (group->n_map_members - 1);
3514 for (i = s; i < group->n_map_members; i++)
3515 if (group->cost_map[i].cand == cand)
3516 return group->cost_map + i;
3517 else if (group->cost_map[i].cand == NULL)
3518 return NULL;
3519 for (i = 0; i < s; i++)
3520 if (group->cost_map[i].cand == cand)
3521 return group->cost_map + i;
3522 else if (group->cost_map[i].cand == NULL)
3523 return NULL;
3524
3525 return NULL;
3526 }
3527
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3531 {
3532 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533 machine_mode address_mode = targetm.addr_space.address_mode (as);
3534 rtx x;
3535
3536 gcc_assert (obj);
3537 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3538 {
3539 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540 x = gen_rtx_SYMBOL_REF (address_mode, name);
3541 SET_SYMBOL_REF_DECL (x, obj);
3542 x = gen_rtx_MEM (DECL_MODE (obj), x);
3543 set_mem_addr_space (x, as);
3544 targetm.encode_section_info (obj, x, true);
3545 }
3546 else
3547 {
3548 x = gen_raw_REG (address_mode, (*regno)++);
3549 x = gen_rtx_MEM (DECL_MODE (obj), x);
3550 set_mem_addr_space (x, as);
3551 }
3552
3553 return x;
3554 }
3555
3556 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3557 walk_tree. DATA contains the actual fake register number. */
3558
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3561 {
3562 tree obj = NULL_TREE;
3563 rtx x = NULL_RTX;
3564 int *regno = (int *) data;
3565
3566 switch (TREE_CODE (*expr_p))
3567 {
3568 case ADDR_EXPR:
3569 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570 handled_component_p (*expr_p);
3571 expr_p = &TREE_OPERAND (*expr_p, 0))
3572 continue;
3573 obj = *expr_p;
3574 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575 x = produce_memory_decl_rtl (obj, regno);
3576 break;
3577
3578 case SSA_NAME:
3579 *ws = 0;
3580 obj = SSA_NAME_VAR (*expr_p);
3581 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3582 if (!obj)
3583 return NULL_TREE;
3584 if (!DECL_RTL_SET_P (obj))
3585 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586 break;
3587
3588 case VAR_DECL:
3589 case PARM_DECL:
3590 case RESULT_DECL:
3591 *ws = 0;
3592 obj = *expr_p;
3593
3594 if (DECL_RTL_SET_P (obj))
3595 break;
3596
3597 if (DECL_MODE (obj) == BLKmode)
3598 x = produce_memory_decl_rtl (obj, regno);
3599 else
3600 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3601
3602 break;
3603
3604 default:
3605 break;
3606 }
3607
3608 if (x)
3609 {
3610 decl_rtl_to_reset.safe_push (obj);
3611 SET_DECL_RTL (obj, x);
3612 }
3613
3614 return NULL_TREE;
3615 }
3616
3617 /* Determines cost of the computation of EXPR. */
3618
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3621 {
3622 rtx_insn *seq;
3623 rtx rslt;
3624 tree type = TREE_TYPE (expr);
3625 unsigned cost;
3626 /* Avoid using hard regs in ways which may be unsupported. */
3627 int regno = LAST_VIRTUAL_REGISTER + 1;
3628 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629 enum node_frequency real_frequency = node->frequency;
3630
3631 node->frequency = NODE_FREQUENCY_NORMAL;
3632 crtl->maybe_hot_insn_p = speed;
3633 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634 start_sequence ();
3635 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636 seq = get_insns ();
3637 end_sequence ();
3638 default_rtl_profile ();
3639 node->frequency = real_frequency;
3640
3641 cost = seq_cost (seq, speed);
3642 if (MEM_P (rslt))
3643 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644 TYPE_ADDR_SPACE (type), speed);
3645 else if (!REG_P (rslt))
3646 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3647
3648 return cost;
3649 }
3650
3651 /* Returns variable containing the value of candidate CAND at statement AT. */
3652
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3655 {
3656 if (stmt_after_increment (loop, cand, stmt))
3657 return cand->var_after;
3658 else
3659 return cand->var_before;
3660 }
3661
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663 same precision that is at least as wide as the precision of TYPE, stores
3664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3665 type of A and B. */
3666
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3669 {
3670 tree wider_type = NULL;
3671 tree suba, subb;
3672 tree atype = TREE_TYPE (*a);
3673
3674 if (CONVERT_EXPR_P (*a))
3675 {
3676 suba = TREE_OPERAND (*a, 0);
3677 wider_type = TREE_TYPE (suba);
3678 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679 return atype;
3680 }
3681 else
3682 return atype;
3683
3684 if (CONVERT_EXPR_P (*b))
3685 {
3686 subb = TREE_OPERAND (*b, 0);
3687 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688 return atype;
3689 }
3690 else
3691 return atype;
3692
3693 *a = suba;
3694 *b = subb;
3695 return wider_type;
3696 }
3697
3698 /* Determines the expression by that USE is expressed from induction variable
3699 CAND at statement AT in LOOP. The expression is stored in a decomposed
3700 form into AFF. Returns false if USE cannot be expressed using CAND. */
3701
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704 struct iv_use *use, struct iv_cand *cand, gimple *at,
3705 struct aff_tree *aff)
3706 {
3707 tree ubase = use->iv->base;
3708 tree ustep = use->iv->step;
3709 tree cbase = cand->iv->base;
3710 tree cstep = cand->iv->step, cstep_common;
3711 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712 tree common_type, var;
3713 tree uutype;
3714 aff_tree cbase_aff, var_aff;
3715 widest_int rat;
3716
3717 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3718 {
3719 /* We do not have a precision to express the values of use. */
3720 return false;
3721 }
3722
3723 var = var_at_stmt (loop, cand, at);
3724 uutype = unsigned_type_for (utype);
3725
3726 /* If the conversion is not noop, perform it. */
3727 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3728 {
3729 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3731 {
3732 tree inner_base, inner_step, inner_type;
3733 inner_base = TREE_OPERAND (cbase, 0);
3734 if (CONVERT_EXPR_P (cstep))
3735 inner_step = TREE_OPERAND (cstep, 0);
3736 else
3737 inner_step = cstep;
3738
3739 inner_type = TREE_TYPE (inner_base);
3740 /* If candidate is added from a biv whose type is smaller than
3741 ctype, we know both candidate and the biv won't overflow.
3742 In this case, it's safe to skip the convertion in candidate.
3743 As an example, (unsigned short)((unsigned long)A) equals to
3744 (unsigned short)A, if A has a type no larger than short. */
3745 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3746 {
3747 cbase = inner_base;
3748 cstep = inner_step;
3749 }
3750 }
3751 cstep = fold_convert (uutype, cstep);
3752 cbase = fold_convert (uutype, cbase);
3753 var = fold_convert (uutype, var);
3754 }
3755
3756 /* Ratio is 1 when computing the value of biv cand by itself.
3757 We can't rely on constant_multiple_of in this case because the
3758 use is created after the original biv is selected. The call
3759 could fail because of inconsistent fold behavior. See PR68021
3760 for more information. */
3761 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3762 {
3763 gcc_assert (is_gimple_assign (use->stmt));
3764 gcc_assert (use->iv->ssa_name == cand->var_after);
3765 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766 rat = 1;
3767 }
3768 else if (!constant_multiple_of (ustep, cstep, &rat))
3769 return false;
3770
3771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772 type, we achieve better folding by computing their difference in this
3773 wider type, and cast the result to UUTYPE. We do not need to worry about
3774 overflows, as all the arithmetics will in the end be performed in UUTYPE
3775 anyway. */
3776 common_type = determine_common_wider_type (&ubase, &cbase);
3777
3778 /* use = ubase - ratio * cbase + ratio * var. */
3779 tree_to_aff_combination (ubase, common_type, aff);
3780 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781 tree_to_aff_combination (var, uutype, &var_aff);
3782
3783 /* We need to shift the value if we are after the increment. */
3784 if (stmt_after_increment (loop, cand, at))
3785 {
3786 aff_tree cstep_aff;
3787
3788 if (common_type != uutype)
3789 cstep_common = fold_convert (common_type, cstep);
3790 else
3791 cstep_common = cstep;
3792
3793 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794 aff_combination_add (&cbase_aff, &cstep_aff);
3795 }
3796
3797 aff_combination_scale (&cbase_aff, -rat);
3798 aff_combination_add (aff, &cbase_aff);
3799 if (common_type != uutype)
3800 aff_combination_convert (aff, uutype);
3801
3802 aff_combination_scale (&var_aff, rat);
3803 aff_combination_add (aff, &var_aff);
3804
3805 return true;
3806 }
3807
3808 /* Return the type of USE. */
3809
3810 static tree
3811 get_use_type (struct iv_use *use)
3812 {
3813 tree base_type = TREE_TYPE (use->iv->base);
3814 tree type;
3815
3816 if (use->type == USE_ADDRESS)
3817 {
3818 /* The base_type may be a void pointer. Create a pointer type based on
3819 the mem_ref instead. */
3820 type = build_pointer_type (TREE_TYPE (*use->op_p));
3821 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3823 }
3824 else
3825 type = base_type;
3826
3827 return type;
3828 }
3829
3830 /* Determines the expression by that USE is expressed from induction variable
3831 CAND at statement AT in LOOP. The computation is unshared. */
3832
3833 static tree
3834 get_computation_at (struct loop *loop,
3835 struct iv_use *use, struct iv_cand *cand, gimple *at)
3836 {
3837 aff_tree aff;
3838 tree type = get_use_type (use);
3839
3840 if (!get_computation_aff (loop, use, cand, at, &aff))
3841 return NULL_TREE;
3842 unshare_aff_combination (&aff);
3843 return fold_convert (type, aff_combination_to_tree (&aff));
3844 }
3845
3846 /* Determines the expression by that USE is expressed from induction variable
3847 CAND in LOOP. The computation is unshared. */
3848
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3851 {
3852 return get_computation_at (loop, use, cand, use->stmt);
3853 }
3854
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856 If we're optimizing for space, the loop setup overhead is constant;
3857 if we're optimizing for speed, amortize it over the per-iteration cost. */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3860 {
3861 if (cost == INFTY)
3862 return cost;
3863 else if (optimize_loop_for_speed_p (data->current_loop))
3864 return cost / avg_loop_niter (data->current_loop);
3865 else
3866 return cost;
3867 }
3868
3869 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3870 validity for a memory reference accessing memory of mode MODE in
3871 address space AS. */
3872
3873
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876 addr_space_t as)
3877 {
3878 #define MAX_RATIO 128
3879 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880 static vec<sbitmap> valid_mult_list;
3881 sbitmap valid_mult;
3882
3883 if (data_index >= valid_mult_list.length ())
3884 valid_mult_list.safe_grow_cleared (data_index + 1);
3885
3886 valid_mult = valid_mult_list[data_index];
3887 if (!valid_mult)
3888 {
3889 machine_mode address_mode = targetm.addr_space.address_mode (as);
3890 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892 rtx addr, scaled;
3893 HOST_WIDE_INT i;
3894
3895 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896 bitmap_clear (valid_mult);
3897 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3900 {
3901 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902 if (memory_address_addr_space_p (mode, addr, as)
3903 || memory_address_addr_space_p (mode, scaled, as))
3904 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3905 }
3906
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3908 {
3909 fprintf (dump_file, " allowed multipliers:");
3910 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912 fprintf (dump_file, " %d", (int) i);
3913 fprintf (dump_file, "\n");
3914 fprintf (dump_file, "\n");
3915 }
3916
3917 valid_mult_list[data_index] = valid_mult;
3918 }
3919
3920 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921 return false;
3922
3923 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3924 }
3925
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3928 variable is omitted. Compute the cost for a memory reference that accesses
3929 a memory location of mode MEM_MODE in address space AS.
3930
3931 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932 size of MEM_MODE / RATIO) is available. To make this determination, we
3933 look at the size of the increment to be made, which is given in CSTEP.
3934 CSTEP may be zero if the step is unknown.
3935 STMT_AFTER_INC is true iff the statement we're looking at is after the
3936 increment of the original biv.
3937
3938 TODO -- there must be some better way. This all is quite crude. */
3939
3940 enum ainc_type
3941 {
3942 AINC_PRE_INC, /* Pre increment. */
3943 AINC_PRE_DEC, /* Pre decrement. */
3944 AINC_POST_INC, /* Post increment. */
3945 AINC_POST_DEC, /* Post decrement. */
3946 AINC_NONE /* Also the number of auto increment types. */
3947 };
3948
3949 struct address_cost_data
3950 {
3951 HOST_WIDE_INT min_offset, max_offset;
3952 unsigned costs[2][2][2][2];
3953 unsigned ainc_costs[AINC_NONE];
3954 };
3955
3956
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960 HOST_WIDE_INT cstep, machine_mode mem_mode,
3961 addr_space_t as, bool speed,
3962 bool stmt_after_inc, bool *may_autoinc)
3963 {
3964 machine_mode address_mode = targetm.addr_space.address_mode (as);
3965 static vec<address_cost_data *> address_cost_data_list;
3966 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967 address_cost_data *data;
3968 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970 unsigned cost, acost, complexity;
3971 enum ainc_type autoinc_type;
3972 bool offset_p, ratio_p, autoinc;
3973 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974 unsigned HOST_WIDE_INT mask;
3975 unsigned bits;
3976
3977 if (data_index >= address_cost_data_list.length ())
3978 address_cost_data_list.safe_grow_cleared (data_index + 1);
3979
3980 data = address_cost_data_list[data_index];
3981 if (!data)
3982 {
3983 HOST_WIDE_INT i;
3984 HOST_WIDE_INT rat, off = 0;
3985 int old_cse_not_expected, width;
3986 unsigned sym_p, var_p, off_p, rat_p, add_c;
3987 rtx_insn *seq;
3988 rtx addr, base;
3989 rtx reg0, reg1;
3990
3991 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3992
3993 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3994
3995 width = GET_MODE_BITSIZE (address_mode) - 1;
3996 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997 width = HOST_BITS_PER_WIDE_INT - 1;
3998 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3999
4000 for (i = width; i >= 0; i--)
4001 {
4002 off = -((unsigned HOST_WIDE_INT) 1 << i);
4003 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 if (memory_address_addr_space_p (mem_mode, addr, as))
4005 break;
4006 }
4007 data->min_offset = (i == -1? 0 : off);
4008
4009 for (i = width; i >= 0; i--)
4010 {
4011 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013 if (memory_address_addr_space_p (mem_mode, addr, as))
4014 break;
4015 /* For some strict-alignment targets, the offset must be naturally
4016 aligned. Try an aligned offset if mem_mode is not QImode. */
4017 off = mem_mode != QImode
4018 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019 - GET_MODE_SIZE (mem_mode)
4020 : 0;
4021 if (off > 0)
4022 {
4023 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024 if (memory_address_addr_space_p (mem_mode, addr, as))
4025 break;
4026 }
4027 }
4028 if (i == -1)
4029 off = 0;
4030 data->max_offset = off;
4031
4032 if (dump_file && (dump_flags & TDF_DETAILS))
4033 {
4034 fprintf (dump_file, "get_address_cost:\n");
4035 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036 GET_MODE_NAME (mem_mode),
4037 data->min_offset);
4038 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039 GET_MODE_NAME (mem_mode),
4040 data->max_offset);
4041 }
4042
4043 rat = 1;
4044 for (i = 2; i <= MAX_RATIO; i++)
4045 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4046 {
4047 rat = i;
4048 break;
4049 }
4050
4051 /* Compute the cost of various addressing modes. */
4052 acost = 0;
4053 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4055
4056 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057 || USE_STORE_PRE_DECREMENT (mem_mode))
4058 {
4059 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060 has_predec[mem_mode]
4061 = memory_address_addr_space_p (mem_mode, addr, as);
4062
4063 if (has_predec[mem_mode])
4064 data->ainc_costs[AINC_PRE_DEC]
4065 = address_cost (addr, mem_mode, as, speed);
4066 }
4067 if (USE_LOAD_POST_DECREMENT (mem_mode)
4068 || USE_STORE_POST_DECREMENT (mem_mode))
4069 {
4070 addr = gen_rtx_POST_DEC (address_mode, reg0);
4071 has_postdec[mem_mode]
4072 = memory_address_addr_space_p (mem_mode, addr, as);
4073
4074 if (has_postdec[mem_mode])
4075 data->ainc_costs[AINC_POST_DEC]
4076 = address_cost (addr, mem_mode, as, speed);
4077 }
4078 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079 || USE_STORE_PRE_DECREMENT (mem_mode))
4080 {
4081 addr = gen_rtx_PRE_INC (address_mode, reg0);
4082 has_preinc[mem_mode]
4083 = memory_address_addr_space_p (mem_mode, addr, as);
4084
4085 if (has_preinc[mem_mode])
4086 data->ainc_costs[AINC_PRE_INC]
4087 = address_cost (addr, mem_mode, as, speed);
4088 }
4089 if (USE_LOAD_POST_INCREMENT (mem_mode)
4090 || USE_STORE_POST_INCREMENT (mem_mode))
4091 {
4092 addr = gen_rtx_POST_INC (address_mode, reg0);
4093 has_postinc[mem_mode]
4094 = memory_address_addr_space_p (mem_mode, addr, as);
4095
4096 if (has_postinc[mem_mode])
4097 data->ainc_costs[AINC_POST_INC]
4098 = address_cost (addr, mem_mode, as, speed);
4099 }
4100 for (i = 0; i < 16; i++)
4101 {
4102 sym_p = i & 1;
4103 var_p = (i >> 1) & 1;
4104 off_p = (i >> 2) & 1;
4105 rat_p = (i >> 3) & 1;
4106
4107 addr = reg0;
4108 if (rat_p)
4109 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110 gen_int_mode (rat, address_mode));
4111
4112 if (var_p)
4113 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4114
4115 if (sym_p)
4116 {
4117 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118 /* ??? We can run into trouble with some backends by presenting
4119 it with symbols which haven't been properly passed through
4120 targetm.encode_section_info. By setting the local bit, we
4121 enhance the probability of things working. */
4122 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4123
4124 if (off_p)
4125 base = gen_rtx_fmt_e (CONST, address_mode,
4126 gen_rtx_fmt_ee
4127 (PLUS, address_mode, base,
4128 gen_int_mode (off, address_mode)));
4129 }
4130 else if (off_p)
4131 base = gen_int_mode (off, address_mode);
4132 else
4133 base = NULL_RTX;
4134
4135 if (base)
4136 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4137
4138 start_sequence ();
4139 /* To avoid splitting addressing modes, pretend that no cse will
4140 follow. */
4141 old_cse_not_expected = cse_not_expected;
4142 cse_not_expected = true;
4143 addr = memory_address_addr_space (mem_mode, addr, as);
4144 cse_not_expected = old_cse_not_expected;
4145 seq = get_insns ();
4146 end_sequence ();
4147
4148 acost = seq_cost (seq, speed);
4149 acost += address_cost (addr, mem_mode, as, speed);
4150
4151 if (!acost)
4152 acost = 1;
4153 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4154 }
4155
4156 /* On some targets, it is quite expensive to load symbol to a register,
4157 which makes addresses that contain symbols look much more expensive.
4158 However, the symbol will have to be loaded in any case before the
4159 loop (and quite likely we have it in register already), so it does not
4160 make much sense to penalize them too heavily. So make some final
4161 tweaks for the SYMBOL_PRESENT modes:
4162
4163 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164 var is cheaper, use this mode with small penalty.
4165 If VAR_PRESENT is true, try whether the mode with
4166 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167 if this is the case, use it. */
4168 add_c = add_cost (speed, address_mode);
4169 for (i = 0; i < 8; i++)
4170 {
4171 var_p = i & 1;
4172 off_p = (i >> 1) & 1;
4173 rat_p = (i >> 2) & 1;
4174
4175 acost = data->costs[0][1][off_p][rat_p] + 1;
4176 if (var_p)
4177 acost += add_c;
4178
4179 if (acost < data->costs[1][var_p][off_p][rat_p])
4180 data->costs[1][var_p][off_p][rat_p] = acost;
4181 }
4182
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4184 {
4185 fprintf (dump_file, "<Address Costs>:\n");
4186
4187 for (i = 0; i < 16; i++)
4188 {
4189 sym_p = i & 1;
4190 var_p = (i >> 1) & 1;
4191 off_p = (i >> 2) & 1;
4192 rat_p = (i >> 3) & 1;
4193
4194 fprintf (dump_file, " ");
4195 if (sym_p)
4196 fprintf (dump_file, "sym + ");
4197 if (var_p)
4198 fprintf (dump_file, "var + ");
4199 if (off_p)
4200 fprintf (dump_file, "cst + ");
4201 if (rat_p)
4202 fprintf (dump_file, "rat * ");
4203
4204 acost = data->costs[sym_p][var_p][off_p][rat_p];
4205 fprintf (dump_file, "index costs %d\n", acost);
4206 }
4207 if (has_predec[mem_mode] || has_postdec[mem_mode]
4208 || has_preinc[mem_mode] || has_postinc[mem_mode])
4209 fprintf (dump_file, " May include autoinc/dec\n");
4210 fprintf (dump_file, "\n");
4211 }
4212
4213 address_cost_data_list[data_index] = data;
4214 }
4215
4216 bits = GET_MODE_BITSIZE (address_mode);
4217 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218 offset &= mask;
4219 if ((offset >> (bits - 1) & 1))
4220 offset |= ~mask;
4221 s_offset = offset;
4222
4223 autoinc = false;
4224 autoinc_type = AINC_NONE;
4225 msize = GET_MODE_SIZE (mem_mode);
4226 autoinc_offset = offset;
4227 if (stmt_after_inc)
4228 autoinc_offset += ratio * cstep;
4229 if (symbol_present || var_present || ratio != 1)
4230 autoinc = false;
4231 else
4232 {
4233 if (has_postinc[mem_mode] && autoinc_offset == 0
4234 && msize == cstep)
4235 autoinc_type = AINC_POST_INC;
4236 else if (has_postdec[mem_mode] && autoinc_offset == 0
4237 && msize == -cstep)
4238 autoinc_type = AINC_POST_DEC;
4239 else if (has_preinc[mem_mode] && autoinc_offset == msize
4240 && msize == cstep)
4241 autoinc_type = AINC_PRE_INC;
4242 else if (has_predec[mem_mode] && autoinc_offset == -msize
4243 && msize == -cstep)
4244 autoinc_type = AINC_PRE_DEC;
4245
4246 if (autoinc_type != AINC_NONE)
4247 autoinc = true;
4248 }
4249
4250 cost = 0;
4251 offset_p = (s_offset != 0
4252 && data->min_offset <= s_offset
4253 && s_offset <= data->max_offset);
4254 ratio_p = (ratio != 1
4255 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4256
4257 if (ratio != 1 && !ratio_p)
4258 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4259
4260 if (s_offset && !offset_p && !symbol_present)
4261 cost += add_cost (speed, address_mode);
4262
4263 if (may_autoinc)
4264 *may_autoinc = autoinc;
4265 if (autoinc)
4266 acost = data->ainc_costs[autoinc_type];
4267 else
4268 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270 return comp_cost (cost + acost, complexity);
4271 }
4272
4273 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4274 EXPR operand holding the shift. COST0 and COST1 are the costs for
4275 calculating the operands of EXPR. Returns true if successful, and returns
4276 the cost in COST. */
4277
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4281 {
4282 comp_cost res;
4283 tree op1 = TREE_OPERAND (expr, 1);
4284 tree cst = TREE_OPERAND (mult, 1);
4285 tree multop = TREE_OPERAND (mult, 0);
4286 int m = exact_log2 (int_cst_value (cst));
4287 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288 int as_cost, sa_cost;
4289 bool mult_in_op1;
4290
4291 if (!(m >= 0 && m < maxm))
4292 return false;
4293
4294 STRIP_NOPS (op1);
4295 mult_in_op1 = operand_equal_p (op1, mult, 0);
4296
4297 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4298
4299 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300 use that in preference to a shift insn followed by an add insn. */
4301 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302 ? shiftadd_cost (speed, mode, m)
4303 : (mult_in_op1
4304 ? shiftsub1_cost (speed, mode, m)
4305 : shiftsub0_cost (speed, mode, m)));
4306
4307 res = comp_cost (MIN (as_cost, sa_cost), 0);
4308 res += (mult_in_op1 ? cost0 : cost1);
4309
4310 STRIP_NOPS (multop);
4311 if (!is_gimple_val (multop))
4312 res += force_expr_to_var_cost (multop, speed);
4313
4314 *cost = res;
4315 return true;
4316 }
4317
4318 /* Estimates cost of forcing expression EXPR into a variable. */
4319
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4322 {
4323 static bool costs_initialized = false;
4324 static unsigned integer_cost [2];
4325 static unsigned symbol_cost [2];
4326 static unsigned address_cost [2];
4327 tree op0, op1;
4328 comp_cost cost0, cost1, cost;
4329 machine_mode mode;
4330
4331 if (!costs_initialized)
4332 {
4333 tree type = build_pointer_type (integer_type_node);
4334 tree var, addr;
4335 rtx x;
4336 int i;
4337
4338 var = create_tmp_var_raw (integer_type_node, "test_var");
4339 TREE_STATIC (var) = 1;
4340 x = produce_memory_decl_rtl (var, NULL);
4341 SET_DECL_RTL (var, x);
4342
4343 addr = build1 (ADDR_EXPR, type, var);
4344
4345
4346 for (i = 0; i < 2; i++)
4347 {
4348 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349 2000), i);
4350
4351 symbol_cost[i] = computation_cost (addr, i) + 1;
4352
4353 address_cost[i]
4354 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355 if (dump_file && (dump_flags & TDF_DETAILS))
4356 {
4357 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4359 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4360 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4361 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4362 fprintf (dump_file, "\n");
4363 }
4364 }
4365
4366 costs_initialized = true;
4367 }
4368
4369 STRIP_NOPS (expr);
4370
4371 if (SSA_VAR_P (expr))
4372 return no_cost;
4373
4374 if (is_gimple_min_invariant (expr))
4375 {
4376 if (TREE_CODE (expr) == INTEGER_CST)
4377 return comp_cost (integer_cost [speed], 0);
4378
4379 if (TREE_CODE (expr) == ADDR_EXPR)
4380 {
4381 tree obj = TREE_OPERAND (expr, 0);
4382
4383 if (TREE_CODE (obj) == VAR_DECL
4384 || TREE_CODE (obj) == PARM_DECL
4385 || TREE_CODE (obj) == RESULT_DECL)
4386 return comp_cost (symbol_cost [speed], 0);
4387 }
4388
4389 return comp_cost (address_cost [speed], 0);
4390 }
4391
4392 switch (TREE_CODE (expr))
4393 {
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case MULT_EXPR:
4398 op0 = TREE_OPERAND (expr, 0);
4399 op1 = TREE_OPERAND (expr, 1);
4400 STRIP_NOPS (op0);
4401 STRIP_NOPS (op1);
4402 break;
4403
4404 CASE_CONVERT:
4405 case NEGATE_EXPR:
4406 op0 = TREE_OPERAND (expr, 0);
4407 STRIP_NOPS (op0);
4408 op1 = NULL_TREE;
4409 break;
4410
4411 default:
4412 /* Just an arbitrary value, FIXME. */
4413 return comp_cost (target_spill_cost[speed], 0);
4414 }
4415
4416 if (op0 == NULL_TREE
4417 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418 cost0 = no_cost;
4419 else
4420 cost0 = force_expr_to_var_cost (op0, speed);
4421
4422 if (op1 == NULL_TREE
4423 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424 cost1 = no_cost;
4425 else
4426 cost1 = force_expr_to_var_cost (op1, speed);
4427
4428 mode = TYPE_MODE (TREE_TYPE (expr));
4429 switch (TREE_CODE (expr))
4430 {
4431 case POINTER_PLUS_EXPR:
4432 case PLUS_EXPR:
4433 case MINUS_EXPR:
4434 case NEGATE_EXPR:
4435 cost = comp_cost (add_cost (speed, mode), 0);
4436 if (TREE_CODE (expr) != NEGATE_EXPR)
4437 {
4438 tree mult = NULL_TREE;
4439 comp_cost sa_cost;
4440 if (TREE_CODE (op1) == MULT_EXPR)
4441 mult = op1;
4442 else if (TREE_CODE (op0) == MULT_EXPR)
4443 mult = op0;
4444
4445 if (mult != NULL_TREE
4446 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448 speed, &sa_cost))
4449 return sa_cost;
4450 }
4451 break;
4452
4453 CASE_CONVERT:
4454 {
4455 tree inner_mode, outer_mode;
4456 outer_mode = TREE_TYPE (expr);
4457 inner_mode = TREE_TYPE (op0);
4458 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459 TYPE_MODE (inner_mode), speed), 0);
4460 }
4461 break;
4462
4463 case MULT_EXPR:
4464 if (cst_and_fits_in_hwi (op0))
4465 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466 mode, speed), 0);
4467 else if (cst_and_fits_in_hwi (op1))
4468 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469 mode, speed), 0);
4470 else
4471 return comp_cost (target_spill_cost [speed], 0);
4472 break;
4473
4474 default:
4475 gcc_unreachable ();
4476 }
4477
4478 cost += cost0;
4479 cost += cost1;
4480
4481 /* Bound the cost by target_spill_cost. The parts of complicated
4482 computations often are either loop invariant or at least can
4483 be shared between several iv uses, so letting this grow without
4484 limits would not give reasonable results. */
4485 if (cost.cost > (int) target_spill_cost [speed])
4486 cost.cost = target_spill_cost [speed];
4487
4488 return cost;
4489 }
4490
4491 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4492 invariants the computation depends on. */
4493
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496 tree expr, bitmap *depends_on)
4497 {
4498 if (depends_on)
4499 {
4500 fd_ivopts_data = data;
4501 walk_tree (&expr, find_depends, depends_on, NULL);
4502 }
4503
4504 return force_expr_to_var_cost (expr, data->speed);
4505 }
4506
4507 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4508 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4510 invariants the computation depends on. */
4511
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514 tree addr, bool *symbol_present, bool *var_present,
4515 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4516 {
4517 tree core;
4518 HOST_WIDE_INT bitsize;
4519 HOST_WIDE_INT bitpos;
4520 tree toffset;
4521 machine_mode mode;
4522 int unsignedp, reversep, volatilep;
4523
4524 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525 &unsignedp, &reversep, &volatilep, false);
4526
4527 if (toffset != 0
4528 || bitpos % BITS_PER_UNIT != 0
4529 || reversep
4530 || TREE_CODE (core) != VAR_DECL)
4531 {
4532 *symbol_present = false;
4533 *var_present = true;
4534 fd_ivopts_data = data;
4535 if (depends_on)
4536 walk_tree (&addr, find_depends, depends_on, NULL);
4537
4538 return comp_cost (target_spill_cost[data->speed], 0);
4539 }
4540
4541 *offset += bitpos / BITS_PER_UNIT;
4542 if (TREE_STATIC (core)
4543 || DECL_EXTERNAL (core))
4544 {
4545 *symbol_present = true;
4546 *var_present = false;
4547 return no_cost;
4548 }
4549
4550 *symbol_present = false;
4551 *var_present = true;
4552 return no_cost;
4553 }
4554
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556 var + symbol + offset. The value of offset is added to OFFSET,
4557 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558 part is missing. DEPENDS_ON is a set of the invariants the computation
4559 depends on. */
4560
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563 tree e1, tree e2, bool *symbol_present, bool *var_present,
4564 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4565 {
4566 HOST_WIDE_INT diff = 0;
4567 aff_tree aff_e1, aff_e2;
4568 tree type;
4569
4570 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4571
4572 if (ptr_difference_const (e1, e2, &diff))
4573 {
4574 *offset += diff;
4575 *symbol_present = false;
4576 *var_present = false;
4577 return no_cost;
4578 }
4579
4580 if (integer_zerop (e2))
4581 return split_address_cost (data, TREE_OPERAND (e1, 0),
4582 symbol_present, var_present, offset, depends_on);
4583
4584 *symbol_present = false;
4585 *var_present = true;
4586
4587 type = signed_type_for (TREE_TYPE (e1));
4588 tree_to_aff_combination (e1, type, &aff_e1);
4589 tree_to_aff_combination (e2, type, &aff_e2);
4590 aff_combination_scale (&aff_e2, -1);
4591 aff_combination_add (&aff_e1, &aff_e2);
4592
4593 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4594 }
4595
4596 /* Estimates cost of expressing difference E1 - E2 as
4597 var + symbol + offset. The value of offset is added to OFFSET,
4598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599 part is missing. DEPENDS_ON is a set of the invariants the computation
4600 depends on. */
4601
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604 tree e1, tree e2, bool *symbol_present, bool *var_present,
4605 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4606 {
4607 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608 unsigned HOST_WIDE_INT off1, off2;
4609 aff_tree aff_e1, aff_e2;
4610 tree type;
4611
4612 e1 = strip_offset (e1, &off1);
4613 e2 = strip_offset (e2, &off2);
4614 *offset += off1 - off2;
4615
4616 STRIP_NOPS (e1);
4617 STRIP_NOPS (e2);
4618
4619 if (TREE_CODE (e1) == ADDR_EXPR)
4620 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621 offset, depends_on);
4622 *symbol_present = false;
4623
4624 if (operand_equal_p (e1, e2, 0))
4625 {
4626 *var_present = false;
4627 return no_cost;
4628 }
4629
4630 *var_present = true;
4631
4632 if (integer_zerop (e2))
4633 return force_var_cost (data, e1, depends_on);
4634
4635 if (integer_zerop (e1))
4636 {
4637 comp_cost cost = force_var_cost (data, e2, depends_on);
4638 cost += mult_by_coeff_cost (-1, mode, data->speed);
4639 return cost;
4640 }
4641
4642 type = signed_type_for (TREE_TYPE (e1));
4643 tree_to_aff_combination (e1, type, &aff_e1);
4644 tree_to_aff_combination (e2, type, &aff_e2);
4645 aff_combination_scale (&aff_e2, -1);
4646 aff_combination_add (&aff_e1, &aff_e2);
4647
4648 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4649 }
4650
4651 /* Returns true if AFF1 and AFF2 are identical. */
4652
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4655 {
4656 unsigned i;
4657
4658 if (aff1->n != aff2->n)
4659 return false;
4660
4661 for (i = 0; i < aff1->n; i++)
4662 {
4663 if (aff1->elts[i].coef != aff2->elts[i].coef)
4664 return false;
4665
4666 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667 return false;
4668 }
4669 return true;
4670 }
4671
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4673
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4676 {
4677 struct iv_inv_expr_ent ent;
4678 struct iv_inv_expr_ent **slot;
4679
4680 ent.expr = expr;
4681 ent.hash = iterative_hash_expr (expr, 0);
4682 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4683
4684 if (!*slot)
4685 {
4686 *slot = XNEW (struct iv_inv_expr_ent);
4687 (*slot)->expr = expr;
4688 (*slot)->hash = ent.hash;
4689 (*slot)->id = data->max_inv_expr_id++;
4690 }
4691
4692 return *slot;
4693 }
4694
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696 requires a new compiler generated temporary. Returns -1 otherwise.
4697 ADDRESS_P is a flag indicating if the expression is for address
4698 computation. */
4699
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702 tree cbase, HOST_WIDE_INT ratio,
4703 bool address_p)
4704 {
4705 aff_tree ubase_aff, cbase_aff;
4706 tree expr, ub, cb;
4707
4708 STRIP_NOPS (ubase);
4709 STRIP_NOPS (cbase);
4710 ub = ubase;
4711 cb = cbase;
4712
4713 if ((TREE_CODE (ubase) == INTEGER_CST)
4714 && (TREE_CODE (cbase) == INTEGER_CST))
4715 return NULL;
4716
4717 /* Strips the constant part. */
4718 if (TREE_CODE (ubase) == PLUS_EXPR
4719 || TREE_CODE (ubase) == MINUS_EXPR
4720 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4721 {
4722 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723 ubase = TREE_OPERAND (ubase, 0);
4724 }
4725
4726 /* Strips the constant part. */
4727 if (TREE_CODE (cbase) == PLUS_EXPR
4728 || TREE_CODE (cbase) == MINUS_EXPR
4729 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4730 {
4731 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732 cbase = TREE_OPERAND (cbase, 0);
4733 }
4734
4735 if (address_p)
4736 {
4737 if (((TREE_CODE (ubase) == SSA_NAME)
4738 || (TREE_CODE (ubase) == ADDR_EXPR
4739 && is_gimple_min_invariant (ubase)))
4740 && (TREE_CODE (cbase) == INTEGER_CST))
4741 return NULL;
4742
4743 if (((TREE_CODE (cbase) == SSA_NAME)
4744 || (TREE_CODE (cbase) == ADDR_EXPR
4745 && is_gimple_min_invariant (cbase)))
4746 && (TREE_CODE (ubase) == INTEGER_CST))
4747 return NULL;
4748 }
4749
4750 if (ratio == 1)
4751 {
4752 if (operand_equal_p (ubase, cbase, 0))
4753 return NULL;
4754
4755 if (TREE_CODE (ubase) == ADDR_EXPR
4756 && TREE_CODE (cbase) == ADDR_EXPR)
4757 {
4758 tree usym, csym;
4759
4760 usym = TREE_OPERAND (ubase, 0);
4761 csym = TREE_OPERAND (cbase, 0);
4762 if (TREE_CODE (usym) == ARRAY_REF)
4763 {
4764 tree ind = TREE_OPERAND (usym, 1);
4765 if (TREE_CODE (ind) == INTEGER_CST
4766 && tree_fits_shwi_p (ind)
4767 && tree_to_shwi (ind) == 0)
4768 usym = TREE_OPERAND (usym, 0);
4769 }
4770 if (TREE_CODE (csym) == ARRAY_REF)
4771 {
4772 tree ind = TREE_OPERAND (csym, 1);
4773 if (TREE_CODE (ind) == INTEGER_CST
4774 && tree_fits_shwi_p (ind)
4775 && tree_to_shwi (ind) == 0)
4776 csym = TREE_OPERAND (csym, 0);
4777 }
4778 if (operand_equal_p (usym, csym, 0))
4779 return NULL;
4780 }
4781 /* Now do more complex comparison */
4782 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785 return NULL;
4786 }
4787
4788 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4790
4791 aff_combination_scale (&cbase_aff, -1 * ratio);
4792 aff_combination_add (&ubase_aff, &cbase_aff);
4793 expr = aff_combination_to_tree (&ubase_aff);
4794 return record_inv_expr (data, expr);
4795 }
4796
4797
4798
4799 /* Determines the cost of the computation by that USE is expressed
4800 from induction variable CAND. If ADDRESS_P is true, we just need
4801 to create an address from it, otherwise we want to get it into
4802 register. A set of invariants we depend on is stored in
4803 DEPENDS_ON. AT is the statement at that the value is computed.
4804 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4805 addressing is likely. */
4806
4807 static comp_cost
4808 get_computation_cost_at (struct ivopts_data *data,
4809 struct iv_use *use, struct iv_cand *cand,
4810 bool address_p, bitmap *depends_on, gimple *at,
4811 bool *can_autoinc,
4812 iv_inv_expr_ent **inv_expr)
4813 {
4814 tree ubase = use->iv->base, ustep = use->iv->step;
4815 tree cbase, cstep;
4816 tree utype = TREE_TYPE (ubase), ctype;
4817 unsigned HOST_WIDE_INT cstepi, offset = 0;
4818 HOST_WIDE_INT ratio, aratio;
4819 bool var_present, symbol_present, stmt_is_after_inc;
4820 comp_cost cost;
4821 widest_int rat;
4822 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4823 machine_mode mem_mode = (address_p
4824 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4825 : VOIDmode);
4826
4827 if (depends_on)
4828 *depends_on = NULL;
4829
4830 /* Only consider real candidates. */
4831 if (!cand->iv)
4832 return infinite_cost;
4833
4834 cbase = cand->iv->base;
4835 cstep = cand->iv->step;
4836 ctype = TREE_TYPE (cbase);
4837
4838 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4839 {
4840 /* We do not have a precision to express the values of use. */
4841 return infinite_cost;
4842 }
4843
4844 if (address_p
4845 || (use->iv->base_object
4846 && cand->iv->base_object
4847 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4848 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4849 {
4850 /* Do not try to express address of an object with computation based
4851 on address of a different object. This may cause problems in rtl
4852 level alias analysis (that does not expect this to be happening,
4853 as this is illegal in C), and would be unlikely to be useful
4854 anyway. */
4855 if (use->iv->base_object
4856 && cand->iv->base_object
4857 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4858 return infinite_cost;
4859 }
4860
4861 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4862 {
4863 /* TODO -- add direct handling of this case. */
4864 goto fallback;
4865 }
4866
4867 /* CSTEPI is removed from the offset in case statement is after the
4868 increment. If the step is not constant, we use zero instead.
4869 This is a bit imprecise (there is the extra addition), but
4870 redundancy elimination is likely to transform the code so that
4871 it uses value of the variable before increment anyway,
4872 so it is not that much unrealistic. */
4873 if (cst_and_fits_in_hwi (cstep))
4874 cstepi = int_cst_value (cstep);
4875 else
4876 cstepi = 0;
4877
4878 if (!constant_multiple_of (ustep, cstep, &rat))
4879 return infinite_cost;
4880
4881 if (wi::fits_shwi_p (rat))
4882 ratio = rat.to_shwi ();
4883 else
4884 return infinite_cost;
4885
4886 STRIP_NOPS (cbase);
4887 ctype = TREE_TYPE (cbase);
4888
4889 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4890
4891 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4892 or ratio == 1, it is better to handle this like
4893
4894 ubase - ratio * cbase + ratio * var
4895
4896 (also holds in the case ratio == -1, TODO. */
4897
4898 if (cst_and_fits_in_hwi (cbase))
4899 {
4900 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4901 cost = difference_cost (data,
4902 ubase, build_int_cst (utype, 0),
4903 &symbol_present, &var_present, &offset,
4904 depends_on);
4905 cost /= avg_loop_niter (data->current_loop);
4906 }
4907 else if (ratio == 1)
4908 {
4909 tree real_cbase = cbase;
4910
4911 /* Check to see if any adjustment is needed. */
4912 if (cstepi == 0 && stmt_is_after_inc)
4913 {
4914 aff_tree real_cbase_aff;
4915 aff_tree cstep_aff;
4916
4917 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4918 &real_cbase_aff);
4919 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4920
4921 aff_combination_add (&real_cbase_aff, &cstep_aff);
4922 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4923 }
4924
4925 cost = difference_cost (data,
4926 ubase, real_cbase,
4927 &symbol_present, &var_present, &offset,
4928 depends_on);
4929 cost /= avg_loop_niter (data->current_loop);
4930 }
4931 else if (address_p
4932 && !POINTER_TYPE_P (ctype)
4933 && multiplier_allowed_in_address_p
4934 (ratio, mem_mode,
4935 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4936 {
4937 tree real_cbase = cbase;
4938
4939 if (cstepi == 0 && stmt_is_after_inc)
4940 {
4941 if (POINTER_TYPE_P (ctype))
4942 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4943 else
4944 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4945 }
4946 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4947 build_int_cst (ctype, ratio));
4948 cost = difference_cost (data,
4949 ubase, real_cbase,
4950 &symbol_present, &var_present, &offset,
4951 depends_on);
4952 cost /= avg_loop_niter (data->current_loop);
4953 }
4954 else
4955 {
4956 cost = force_var_cost (data, cbase, depends_on);
4957 cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4958 &symbol_present, &var_present, &offset,
4959 depends_on);
4960 cost /= avg_loop_niter (data->current_loop);
4961 cost += add_cost (data->speed, TYPE_MODE (ctype));
4962 }
4963
4964 /* Record setup cost in scratch field. */
4965 cost.scratch = cost.cost;
4966
4967 if (inv_expr && depends_on && *depends_on)
4968 {
4969 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4970 address_p);
4971 /* Clear depends on. */
4972 if (*inv_expr != NULL)
4973 bitmap_clear (*depends_on);
4974 }
4975
4976 /* If we are after the increment, the value of the candidate is higher by
4977 one iteration. */
4978 if (stmt_is_after_inc)
4979 offset -= ratio * cstepi;
4980
4981 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4982 (symbol/var1/const parts may be omitted). If we are looking for an
4983 address, find the cost of addressing this. */
4984 if (address_p)
4985 return cost + get_address_cost (symbol_present, var_present,
4986 offset, ratio, cstepi,
4987 mem_mode,
4988 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4989 speed, stmt_is_after_inc, can_autoinc);
4990
4991 /* Otherwise estimate the costs for computing the expression. */
4992 if (!symbol_present && !var_present && !offset)
4993 {
4994 if (ratio != 1)
4995 cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4996 return cost;
4997 }
4998
4999 /* Symbol + offset should be compile-time computable so consider that they
5000 are added once to the variable, if present. */
5001 if (var_present && (symbol_present || offset))
5002 cost += adjust_setup_cost (data,
5003 add_cost (speed, TYPE_MODE (ctype)));
5004
5005 /* Having offset does not affect runtime cost in case it is added to
5006 symbol, but it increases complexity. */
5007 if (offset)
5008 cost.complexity++;
5009
5010 cost += add_cost (speed, TYPE_MODE (ctype));
5011
5012 aratio = ratio > 0 ? ratio : -ratio;
5013 if (aratio != 1)
5014 cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5015 return cost;
5016
5017 fallback:
5018 if (can_autoinc)
5019 *can_autoinc = false;
5020
5021 {
5022 /* Just get the expression, expand it and measure the cost. */
5023 tree comp = get_computation_at (data->current_loop, use, cand, at);
5024
5025 if (!comp)
5026 return infinite_cost;
5027
5028 if (address_p)
5029 comp = build_simple_mem_ref (comp);
5030
5031 return comp_cost (computation_cost (comp, speed), 0);
5032 }
5033 }
5034
5035 /* Determines the cost of the computation by that USE is expressed
5036 from induction variable CAND. If ADDRESS_P is true, we just need
5037 to create an address from it, otherwise we want to get it into
5038 register. A set of invariants we depend on is stored in
5039 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5040 autoinc addressing is likely. */
5041
5042 static comp_cost
5043 get_computation_cost (struct ivopts_data *data,
5044 struct iv_use *use, struct iv_cand *cand,
5045 bool address_p, bitmap *depends_on,
5046 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5047 {
5048 return get_computation_cost_at (data,
5049 use, cand, address_p, depends_on, use->stmt,
5050 can_autoinc, inv_expr);
5051 }
5052
5053 /* Determines cost of computing the use in GROUP with CAND in a generic
5054 expression. */
5055
5056 static bool
5057 determine_group_iv_cost_generic (struct ivopts_data *data,
5058 struct iv_group *group, struct iv_cand *cand)
5059 {
5060 comp_cost cost;
5061 iv_inv_expr_ent *inv_expr = NULL;
5062 bitmap depends_on = NULL;
5063 struct iv_use *use = group->vuses[0];
5064
5065 /* The simple case first -- if we need to express value of the preserved
5066 original biv, the cost is 0. This also prevents us from counting the
5067 cost of increment twice -- once at this use and once in the cost of
5068 the candidate. */
5069 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5070 cost = no_cost;
5071 else
5072 cost = get_computation_cost (data, use, cand, false,
5073 &depends_on, NULL, &inv_expr);
5074
5075 set_group_iv_cost (data, group, cand, cost, depends_on,
5076 NULL_TREE, ERROR_MARK, inv_expr);
5077 return !cost.infinite_cost_p ();
5078 }
5079
5080 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5081
5082 static bool
5083 determine_group_iv_cost_address (struct ivopts_data *data,
5084 struct iv_group *group, struct iv_cand *cand)
5085 {
5086 unsigned i;
5087 bitmap depends_on;
5088 bool can_autoinc, first = true;
5089 iv_inv_expr_ent *inv_expr = NULL;
5090 struct iv_use *use = group->vuses[0];
5091 comp_cost sum_cost = no_cost, cost;
5092
5093 cost = get_computation_cost (data, use, cand, true,
5094 &depends_on, &can_autoinc, &inv_expr);
5095
5096 sum_cost = cost;
5097 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5098 {
5099 if (can_autoinc)
5100 sum_cost -= cand->cost_step;
5101 /* If we generated the candidate solely for exploiting autoincrement
5102 opportunities, and it turns out it can't be used, set the cost to
5103 infinity to make sure we ignore it. */
5104 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5105 sum_cost = infinite_cost;
5106 }
5107
5108 /* Uses in a group can share setup code, so only add setup cost once. */
5109 cost -= cost.scratch;
5110 /* Compute and add costs for rest uses of this group. */
5111 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5112 {
5113 struct iv_use *next = group->vuses[i];
5114
5115 /* Compute cost for the first use with different offset to the main
5116 use and add it afterwards. Costs for these uses could be quite
5117 different. Given below uses in a group:
5118 use 0 : {base + A + offset_0, step}
5119 use 0.1: {base + A + offset_0, step}
5120 use 0.2: {base + A + offset_1, step}
5121 use 0.3: {base + A + offset_2, step}
5122 when we need to compute costs with candidate:
5123 cand 1 : {base + B + offset_0, step}
5124
5125 The first use with different offset is use 0.2, its cost is larger
5126 than cost of use 0/0.1 because we need to compute:
5127 A - B + offset_1 - offset_0
5128 rather than:
5129 A - B. */
5130 if (first && next->addr_offset != use->addr_offset)
5131 {
5132 first = false;
5133 cost = get_computation_cost (data, next, cand, true,
5134 NULL, &can_autoinc, NULL);
5135 /* Remove setup cost. */
5136 if (!cost.infinite_cost_p ())
5137 cost -= cost.scratch;
5138 }
5139 sum_cost += cost;
5140 }
5141 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5142 NULL_TREE, ERROR_MARK, inv_expr);
5143
5144 return !sum_cost.infinite_cost_p ();
5145 }
5146
5147 /* Computes value of candidate CAND at position AT in iteration NITER, and
5148 stores it to VAL. */
5149
5150 static void
5151 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5152 aff_tree *val)
5153 {
5154 aff_tree step, delta, nit;
5155 struct iv *iv = cand->iv;
5156 tree type = TREE_TYPE (iv->base);
5157 tree steptype = type;
5158 if (POINTER_TYPE_P (type))
5159 steptype = sizetype;
5160 steptype = unsigned_type_for (type);
5161
5162 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5163 aff_combination_convert (&step, steptype);
5164 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5165 aff_combination_convert (&nit, steptype);
5166 aff_combination_mult (&nit, &step, &delta);
5167 if (stmt_after_increment (loop, cand, at))
5168 aff_combination_add (&delta, &step);
5169
5170 tree_to_aff_combination (iv->base, type, val);
5171 if (!POINTER_TYPE_P (type))
5172 aff_combination_convert (val, steptype);
5173 aff_combination_add (val, &delta);
5174 }
5175
5176 /* Returns period of induction variable iv. */
5177
5178 static tree
5179 iv_period (struct iv *iv)
5180 {
5181 tree step = iv->step, period, type;
5182 tree pow2div;
5183
5184 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5185
5186 type = unsigned_type_for (TREE_TYPE (step));
5187 /* Period of the iv is lcm (step, type_range)/step -1,
5188 i.e., N*type_range/step - 1. Since type range is power
5189 of two, N == (step >> num_of_ending_zeros_binary (step),
5190 so the final result is
5191
5192 (type_range >> num_of_ending_zeros_binary (step)) - 1
5193
5194 */
5195 pow2div = num_ending_zeros (step);
5196
5197 period = build_low_bits_mask (type,
5198 (TYPE_PRECISION (type)
5199 - tree_to_uhwi (pow2div)));
5200
5201 return period;
5202 }
5203
5204 /* Returns the comparison operator used when eliminating the iv USE. */
5205
5206 static enum tree_code
5207 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5208 {
5209 struct loop *loop = data->current_loop;
5210 basic_block ex_bb;
5211 edge exit;
5212
5213 ex_bb = gimple_bb (use->stmt);
5214 exit = EDGE_SUCC (ex_bb, 0);
5215 if (flow_bb_inside_loop_p (loop, exit->dest))
5216 exit = EDGE_SUCC (ex_bb, 1);
5217
5218 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5219 }
5220
5221 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5222 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5223 calculation is performed in non-wrapping type.
5224
5225 TODO: More generally, we could test for the situation that
5226 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5227 This would require knowing the sign of OFFSET. */
5228
5229 static bool
5230 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5231 {
5232 enum tree_code code;
5233 tree e1, e2;
5234 aff_tree aff_e1, aff_e2, aff_offset;
5235
5236 if (!nowrap_type_p (TREE_TYPE (base)))
5237 return false;
5238
5239 base = expand_simple_operations (base);
5240
5241 if (TREE_CODE (base) == SSA_NAME)
5242 {
5243 gimple *stmt = SSA_NAME_DEF_STMT (base);
5244
5245 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5246 return false;
5247
5248 code = gimple_assign_rhs_code (stmt);
5249 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5250 return false;
5251
5252 e1 = gimple_assign_rhs1 (stmt);
5253 e2 = gimple_assign_rhs2 (stmt);
5254 }
5255 else
5256 {
5257 code = TREE_CODE (base);
5258 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5259 return false;
5260 e1 = TREE_OPERAND (base, 0);
5261 e2 = TREE_OPERAND (base, 1);
5262 }
5263
5264 /* Use affine expansion as deeper inspection to prove the equality. */
5265 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5266 &aff_e2, &data->name_expansion_cache);
5267 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5268 &aff_offset, &data->name_expansion_cache);
5269 aff_combination_scale (&aff_offset, -1);
5270 switch (code)
5271 {
5272 case PLUS_EXPR:
5273 aff_combination_add (&aff_e2, &aff_offset);
5274 if (aff_combination_zero_p (&aff_e2))
5275 return true;
5276
5277 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5278 &aff_e1, &data->name_expansion_cache);
5279 aff_combination_add (&aff_e1, &aff_offset);
5280 return aff_combination_zero_p (&aff_e1);
5281
5282 case POINTER_PLUS_EXPR:
5283 aff_combination_add (&aff_e2, &aff_offset);
5284 return aff_combination_zero_p (&aff_e2);
5285
5286 default:
5287 return false;
5288 }
5289 }
5290
5291 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5292 comparison with CAND. NITER describes the number of iterations of
5293 the loops. If successful, the comparison in COMP_P is altered accordingly.
5294
5295 We aim to handle the following situation:
5296
5297 sometype *base, *p;
5298 int a, b, i;
5299
5300 i = a;
5301 p = p_0 = base + a;
5302
5303 do
5304 {
5305 bla (*p);
5306 p++;
5307 i++;
5308 }
5309 while (i < b);
5310
5311 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5312 We aim to optimize this to
5313
5314 p = p_0 = base + a;
5315 do
5316 {
5317 bla (*p);
5318 p++;
5319 }
5320 while (p < p_0 - a + b);
5321
5322 This preserves the correctness, since the pointer arithmetics does not
5323 overflow. More precisely:
5324
5325 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5326 overflow in computing it or the values of p.
5327 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5328 overflow. To prove this, we use the fact that p_0 = base + a. */
5329
5330 static bool
5331 iv_elimination_compare_lt (struct ivopts_data *data,
5332 struct iv_cand *cand, enum tree_code *comp_p,
5333 struct tree_niter_desc *niter)
5334 {
5335 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5336 struct aff_tree nit, tmpa, tmpb;
5337 enum tree_code comp;
5338 HOST_WIDE_INT step;
5339
5340 /* We need to know that the candidate induction variable does not overflow.
5341 While more complex analysis may be used to prove this, for now just
5342 check that the variable appears in the original program and that it
5343 is computed in a type that guarantees no overflows. */
5344 cand_type = TREE_TYPE (cand->iv->base);
5345 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5346 return false;
5347
5348 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5349 the calculation of the BOUND could overflow, making the comparison
5350 invalid. */
5351 if (!data->loop_single_exit_p)
5352 return false;
5353
5354 /* We need to be able to decide whether candidate is increasing or decreasing
5355 in order to choose the right comparison operator. */
5356 if (!cst_and_fits_in_hwi (cand->iv->step))
5357 return false;
5358 step = int_cst_value (cand->iv->step);
5359
5360 /* Check that the number of iterations matches the expected pattern:
5361 a + 1 > b ? 0 : b - a - 1. */
5362 mbz = niter->may_be_zero;
5363 if (TREE_CODE (mbz) == GT_EXPR)
5364 {
5365 /* Handle a + 1 > b. */
5366 tree op0 = TREE_OPERAND (mbz, 0);
5367 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5368 {
5369 a = TREE_OPERAND (op0, 0);
5370 b = TREE_OPERAND (mbz, 1);
5371 }
5372 else
5373 return false;
5374 }
5375 else if (TREE_CODE (mbz) == LT_EXPR)
5376 {
5377 tree op1 = TREE_OPERAND (mbz, 1);
5378
5379 /* Handle b < a + 1. */
5380 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5381 {
5382 a = TREE_OPERAND (op1, 0);
5383 b = TREE_OPERAND (mbz, 0);
5384 }
5385 else
5386 return false;
5387 }
5388 else
5389 return false;
5390
5391 /* Expected number of iterations is B - A - 1. Check that it matches
5392 the actual number, i.e., that B - A - NITER = 1. */
5393 tree_to_aff_combination (niter->niter, nit_type, &nit);
5394 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5395 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5396 aff_combination_scale (&nit, -1);
5397 aff_combination_scale (&tmpa, -1);
5398 aff_combination_add (&tmpb, &tmpa);
5399 aff_combination_add (&tmpb, &nit);
5400 if (tmpb.n != 0 || tmpb.offset != 1)
5401 return false;
5402
5403 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5404 overflow. */
5405 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5406 cand->iv->step,
5407 fold_convert (TREE_TYPE (cand->iv->step), a));
5408 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5409 return false;
5410
5411 /* Determine the new comparison operator. */
5412 comp = step < 0 ? GT_EXPR : LT_EXPR;
5413 if (*comp_p == NE_EXPR)
5414 *comp_p = comp;
5415 else if (*comp_p == EQ_EXPR)
5416 *comp_p = invert_tree_comparison (comp, false);
5417 else
5418 gcc_unreachable ();
5419
5420 return true;
5421 }
5422
5423 /* Check whether it is possible to express the condition in USE by comparison
5424 of candidate CAND. If so, store the value compared with to BOUND, and the
5425 comparison operator to COMP. */
5426
5427 static bool
5428 may_eliminate_iv (struct ivopts_data *data,
5429 struct iv_use *use, struct iv_cand *cand, tree *bound,
5430 enum tree_code *comp)
5431 {
5432 basic_block ex_bb;
5433 edge exit;
5434 tree period;
5435 struct loop *loop = data->current_loop;
5436 aff_tree bnd;
5437 struct tree_niter_desc *desc = NULL;
5438
5439 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5440 return false;
5441
5442 /* For now works only for exits that dominate the loop latch.
5443 TODO: extend to other conditions inside loop body. */
5444 ex_bb = gimple_bb (use->stmt);
5445 if (use->stmt != last_stmt (ex_bb)
5446 || gimple_code (use->stmt) != GIMPLE_COND
5447 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5448 return false;
5449
5450 exit = EDGE_SUCC (ex_bb, 0);
5451 if (flow_bb_inside_loop_p (loop, exit->dest))
5452 exit = EDGE_SUCC (ex_bb, 1);
5453 if (flow_bb_inside_loop_p (loop, exit->dest))
5454 return false;
5455
5456 desc = niter_for_exit (data, exit);
5457 if (!desc)
5458 return false;
5459
5460 /* Determine whether we can use the variable to test the exit condition.
5461 This is the case iff the period of the induction variable is greater
5462 than the number of iterations for which the exit condition is true. */
5463 period = iv_period (cand->iv);
5464
5465 /* If the number of iterations is constant, compare against it directly. */
5466 if (TREE_CODE (desc->niter) == INTEGER_CST)
5467 {
5468 /* See cand_value_at. */
5469 if (stmt_after_increment (loop, cand, use->stmt))
5470 {
5471 if (!tree_int_cst_lt (desc->niter, period))
5472 return false;
5473 }
5474 else
5475 {
5476 if (tree_int_cst_lt (period, desc->niter))
5477 return false;
5478 }
5479 }
5480
5481 /* If not, and if this is the only possible exit of the loop, see whether
5482 we can get a conservative estimate on the number of iterations of the
5483 entire loop and compare against that instead. */
5484 else
5485 {
5486 widest_int period_value, max_niter;
5487
5488 max_niter = desc->max;
5489 if (stmt_after_increment (loop, cand, use->stmt))
5490 max_niter += 1;
5491 period_value = wi::to_widest (period);
5492 if (wi::gtu_p (max_niter, period_value))
5493 {
5494 /* See if we can take advantage of inferred loop bound
5495 information. */
5496 if (data->loop_single_exit_p)
5497 {
5498 if (!max_loop_iterations (loop, &max_niter))
5499 return false;
5500 /* The loop bound is already adjusted by adding 1. */
5501 if (wi::gtu_p (max_niter, period_value))
5502 return false;
5503 }
5504 else
5505 return false;
5506 }
5507 }
5508
5509 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5510
5511 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5512 aff_combination_to_tree (&bnd));
5513 *comp = iv_elimination_compare (data, use);
5514
5515 /* It is unlikely that computing the number of iterations using division
5516 would be more profitable than keeping the original induction variable. */
5517 if (expression_expensive_p (*bound))
5518 return false;
5519
5520 /* Sometimes, it is possible to handle the situation that the number of
5521 iterations may be zero unless additional assumtions by using <
5522 instead of != in the exit condition.
5523
5524 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5525 base the exit condition on it. However, that is often too
5526 expensive. */
5527 if (!integer_zerop (desc->may_be_zero))
5528 return iv_elimination_compare_lt (data, cand, comp, desc);
5529
5530 return true;
5531 }
5532
5533 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5534 be copied, if it is used in the loop body and DATA->body_includes_call. */
5535
5536 static int
5537 parm_decl_cost (struct ivopts_data *data, tree bound)
5538 {
5539 tree sbound = bound;
5540 STRIP_NOPS (sbound);
5541
5542 if (TREE_CODE (sbound) == SSA_NAME
5543 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5544 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5545 && data->body_includes_call)
5546 return COSTS_N_INSNS (1);
5547
5548 return 0;
5549 }
5550
5551 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5552
5553 static bool
5554 determine_group_iv_cost_cond (struct ivopts_data *data,
5555 struct iv_group *group, struct iv_cand *cand)
5556 {
5557 tree bound = NULL_TREE;
5558 struct iv *cmp_iv;
5559 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5560 comp_cost elim_cost, express_cost, cost, bound_cost;
5561 bool ok;
5562 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5563 tree *control_var, *bound_cst;
5564 enum tree_code comp = ERROR_MARK;
5565 struct iv_use *use = group->vuses[0];
5566
5567 gcc_assert (cand->iv);
5568
5569 /* Try iv elimination. */
5570 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5571 {
5572 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5573 if (elim_cost.cost == 0)
5574 elim_cost.cost = parm_decl_cost (data, bound);
5575 else if (TREE_CODE (bound) == INTEGER_CST)
5576 elim_cost.cost = 0;
5577 /* If we replace a loop condition 'i < n' with 'p < base + n',
5578 depends_on_elim will have 'base' and 'n' set, which implies
5579 that both 'base' and 'n' will be live during the loop. More likely,
5580 'base + n' will be loop invariant, resulting in only one live value
5581 during the loop. So in that case we clear depends_on_elim and set
5582 elim_inv_expr_id instead. */
5583 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5584 {
5585 elim_inv_expr = record_inv_expr (data, bound);
5586 bitmap_clear (depends_on_elim);
5587 }
5588 /* The bound is a loop invariant, so it will be only computed
5589 once. */
5590 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5591 }
5592 else
5593 elim_cost = infinite_cost;
5594
5595 /* Try expressing the original giv. If it is compared with an invariant,
5596 note that we cannot get rid of it. */
5597 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5598 NULL, &cmp_iv);
5599 gcc_assert (ok);
5600
5601 /* When the condition is a comparison of the candidate IV against
5602 zero, prefer this IV.
5603
5604 TODO: The constant that we're subtracting from the cost should
5605 be target-dependent. This information should be added to the
5606 target costs for each backend. */
5607 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5608 && integer_zerop (*bound_cst)
5609 && (operand_equal_p (*control_var, cand->var_after, 0)
5610 || operand_equal_p (*control_var, cand->var_before, 0)))
5611 elim_cost -= 1;
5612
5613 express_cost = get_computation_cost (data, use, cand, false,
5614 &depends_on_express, NULL,
5615 &express_inv_expr);
5616 fd_ivopts_data = data;
5617 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5618
5619 /* Count the cost of the original bound as well. */
5620 bound_cost = force_var_cost (data, *bound_cst, NULL);
5621 if (bound_cost.cost == 0)
5622 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5623 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5624 bound_cost.cost = 0;
5625 express_cost += bound_cost;
5626
5627 /* Choose the better approach, preferring the eliminated IV. */
5628 if (elim_cost <= express_cost)
5629 {
5630 cost = elim_cost;
5631 depends_on = depends_on_elim;
5632 depends_on_elim = NULL;
5633 inv_expr = elim_inv_expr;
5634 }
5635 else
5636 {
5637 cost = express_cost;
5638 depends_on = depends_on_express;
5639 depends_on_express = NULL;
5640 bound = NULL_TREE;
5641 comp = ERROR_MARK;
5642 inv_expr = express_inv_expr;
5643 }
5644
5645 set_group_iv_cost (data, group, cand, cost,
5646 depends_on, bound, comp, inv_expr);
5647
5648 if (depends_on_elim)
5649 BITMAP_FREE (depends_on_elim);
5650 if (depends_on_express)
5651 BITMAP_FREE (depends_on_express);
5652
5653 return !cost.infinite_cost_p ();
5654 }
5655
5656 /* Determines cost of computing uses in GROUP with CAND. Returns false
5657 if USE cannot be represented with CAND. */
5658
5659 static bool
5660 determine_group_iv_cost (struct ivopts_data *data,
5661 struct iv_group *group, struct iv_cand *cand)
5662 {
5663 switch (group->type)
5664 {
5665 case USE_NONLINEAR_EXPR:
5666 return determine_group_iv_cost_generic (data, group, cand);
5667
5668 case USE_ADDRESS:
5669 return determine_group_iv_cost_address (data, group, cand);
5670
5671 case USE_COMPARE:
5672 return determine_group_iv_cost_cond (data, group, cand);
5673
5674 default:
5675 gcc_unreachable ();
5676 }
5677 }
5678
5679 /* Return true if get_computation_cost indicates that autoincrement is
5680 a possibility for the pair of USE and CAND, false otherwise. */
5681
5682 static bool
5683 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5684 struct iv_cand *cand)
5685 {
5686 bitmap depends_on;
5687 bool can_autoinc;
5688 comp_cost cost;
5689
5690 if (use->type != USE_ADDRESS)
5691 return false;
5692
5693 cost = get_computation_cost (data, use, cand, true, &depends_on,
5694 &can_autoinc, NULL);
5695
5696 BITMAP_FREE (depends_on);
5697
5698 return !cost.infinite_cost_p () && can_autoinc;
5699 }
5700
5701 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5702 use that allows autoincrement, and set their AINC_USE if possible. */
5703
5704 static void
5705 set_autoinc_for_original_candidates (struct ivopts_data *data)
5706 {
5707 unsigned i, j;
5708
5709 for (i = 0; i < data->vcands.length (); i++)
5710 {
5711 struct iv_cand *cand = data->vcands[i];
5712 struct iv_use *closest_before = NULL;
5713 struct iv_use *closest_after = NULL;
5714 if (cand->pos != IP_ORIGINAL)
5715 continue;
5716
5717 for (j = 0; j < data->vgroups.length (); j++)
5718 {
5719 struct iv_group *group = data->vgroups[j];
5720 struct iv_use *use = group->vuses[0];
5721 unsigned uid = gimple_uid (use->stmt);
5722
5723 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5724 continue;
5725
5726 if (uid < gimple_uid (cand->incremented_at)
5727 && (closest_before == NULL
5728 || uid > gimple_uid (closest_before->stmt)))
5729 closest_before = use;
5730
5731 if (uid > gimple_uid (cand->incremented_at)
5732 && (closest_after == NULL
5733 || uid < gimple_uid (closest_after->stmt)))
5734 closest_after = use;
5735 }
5736
5737 if (closest_before != NULL
5738 && autoinc_possible_for_pair (data, closest_before, cand))
5739 cand->ainc_use = closest_before;
5740 else if (closest_after != NULL
5741 && autoinc_possible_for_pair (data, closest_after, cand))
5742 cand->ainc_use = closest_after;
5743 }
5744 }
5745
5746 /* Finds the candidates for the induction variables. */
5747
5748 static void
5749 find_iv_candidates (struct ivopts_data *data)
5750 {
5751 /* Add commonly used ivs. */
5752 add_standard_iv_candidates (data);
5753
5754 /* Add old induction variables. */
5755 add_iv_candidate_for_bivs (data);
5756
5757 /* Add induction variables derived from uses. */
5758 add_iv_candidate_for_groups (data);
5759
5760 set_autoinc_for_original_candidates (data);
5761
5762 /* Record the important candidates. */
5763 record_important_candidates (data);
5764
5765 if (dump_file && (dump_flags & TDF_DETAILS))
5766 {
5767 unsigned i;
5768
5769 fprintf (dump_file, "\n<Important Candidates>:\t");
5770 for (i = 0; i < data->vcands.length (); i++)
5771 if (data->vcands[i]->important)
5772 fprintf (dump_file, " %d,", data->vcands[i]->id);
5773 fprintf (dump_file, "\n");
5774
5775 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5776 for (i = 0; i < data->vgroups.length (); i++)
5777 {
5778 struct iv_group *group = data->vgroups[i];
5779
5780 if (group->related_cands)
5781 {
5782 fprintf (dump_file, " Group %d:\t", group->id);
5783 dump_bitmap (dump_file, group->related_cands);
5784 }
5785 }
5786 fprintf (dump_file, "\n");
5787 }
5788 }
5789
5790 /* Determines costs of computing use of iv with an iv candidate. */
5791
5792 static void
5793 determine_group_iv_costs (struct ivopts_data *data)
5794 {
5795 unsigned i, j;
5796 struct iv_cand *cand;
5797 struct iv_group *group;
5798 bitmap to_clear = BITMAP_ALLOC (NULL);
5799
5800 alloc_use_cost_map (data);
5801
5802 for (i = 0; i < data->vgroups.length (); i++)
5803 {
5804 group = data->vgroups[i];
5805
5806 if (data->consider_all_candidates)
5807 {
5808 for (j = 0; j < data->vcands.length (); j++)
5809 {
5810 cand = data->vcands[j];
5811 determine_group_iv_cost (data, group, cand);
5812 }
5813 }
5814 else
5815 {
5816 bitmap_iterator bi;
5817
5818 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5819 {
5820 cand = data->vcands[j];
5821 if (!determine_group_iv_cost (data, group, cand))
5822 bitmap_set_bit (to_clear, j);
5823 }
5824
5825 /* Remove the candidates for that the cost is infinite from
5826 the list of related candidates. */
5827 bitmap_and_compl_into (group->related_cands, to_clear);
5828 bitmap_clear (to_clear);
5829 }
5830 }
5831
5832 BITMAP_FREE (to_clear);
5833
5834 if (dump_file && (dump_flags & TDF_DETAILS))
5835 {
5836 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5837 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5838
5839 for (hash_table<iv_inv_expr_hasher>::iterator it
5840 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5841 ++it)
5842 list.safe_push (*it);
5843
5844 list.qsort (sort_iv_inv_expr_ent);
5845
5846 for (i = 0; i < list.length (); ++i)
5847 {
5848 fprintf (dump_file, "inv_expr %d: \t", i);
5849 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5850 fprintf (dump_file, "\n");
5851 }
5852
5853 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5854
5855 for (i = 0; i < data->vgroups.length (); i++)
5856 {
5857 group = data->vgroups[i];
5858
5859 fprintf (dump_file, "Group %d:\n", i);
5860 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5861 for (j = 0; j < group->n_map_members; j++)
5862 {
5863 if (!group->cost_map[j].cand
5864 || group->cost_map[j].cost.infinite_cost_p ())
5865 continue;
5866
5867 fprintf (dump_file, " %d\t%d\t%d\t",
5868 group->cost_map[j].cand->id,
5869 group->cost_map[j].cost.cost,
5870 group->cost_map[j].cost.complexity);
5871 if (group->cost_map[j].inv_expr != NULL)
5872 fprintf (dump_file, "%d\t",
5873 group->cost_map[j].inv_expr->id);
5874 else
5875 fprintf (dump_file, "\t");
5876 if (group->cost_map[j].depends_on)
5877 bitmap_print (dump_file,
5878 group->cost_map[j].depends_on, "","");
5879 fprintf (dump_file, "\n");
5880 }
5881
5882 fprintf (dump_file, "\n");
5883 }
5884 fprintf (dump_file, "\n");
5885 }
5886 }
5887
5888 /* Determines cost of the candidate CAND. */
5889
5890 static void
5891 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5892 {
5893 comp_cost cost_base;
5894 unsigned cost, cost_step;
5895 tree base;
5896
5897 if (!cand->iv)
5898 {
5899 cand->cost = 0;
5900 return;
5901 }
5902
5903 /* There are two costs associated with the candidate -- its increment
5904 and its initialization. The second is almost negligible for any loop
5905 that rolls enough, so we take it just very little into account. */
5906
5907 base = cand->iv->base;
5908 cost_base = force_var_cost (data, base, NULL);
5909 /* It will be exceptional that the iv register happens to be initialized with
5910 the proper value at no cost. In general, there will at least be a regcopy
5911 or a const set. */
5912 if (cost_base.cost == 0)
5913 cost_base.cost = COSTS_N_INSNS (1);
5914 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5915
5916 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5917
5918 /* Prefer the original ivs unless we may gain something by replacing it.
5919 The reason is to make debugging simpler; so this is not relevant for
5920 artificial ivs created by other optimization passes. */
5921 if (cand->pos != IP_ORIGINAL
5922 || !SSA_NAME_VAR (cand->var_before)
5923 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5924 cost++;
5925
5926 /* Prefer not to insert statements into latch unless there are some
5927 already (so that we do not create unnecessary jumps). */
5928 if (cand->pos == IP_END
5929 && empty_block_p (ip_end_pos (data->current_loop)))
5930 cost++;
5931
5932 cand->cost = cost;
5933 cand->cost_step = cost_step;
5934 }
5935
5936 /* Determines costs of computation of the candidates. */
5937
5938 static void
5939 determine_iv_costs (struct ivopts_data *data)
5940 {
5941 unsigned i;
5942
5943 if (dump_file && (dump_flags & TDF_DETAILS))
5944 {
5945 fprintf (dump_file, "<Candidate Costs>:\n");
5946 fprintf (dump_file, " cand\tcost\n");
5947 }
5948
5949 for (i = 0; i < data->vcands.length (); i++)
5950 {
5951 struct iv_cand *cand = data->vcands[i];
5952
5953 determine_iv_cost (data, cand);
5954
5955 if (dump_file && (dump_flags & TDF_DETAILS))
5956 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5957 }
5958
5959 if (dump_file && (dump_flags & TDF_DETAILS))
5960 fprintf (dump_file, "\n");
5961 }
5962
5963 /* Calculates cost for having SIZE induction variables. */
5964
5965 static unsigned
5966 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5967 {
5968 /* We add size to the cost, so that we prefer eliminating ivs
5969 if possible. */
5970 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5971 data->body_includes_call);
5972 }
5973
5974 /* For each size of the induction variable set determine the penalty. */
5975
5976 static void
5977 determine_set_costs (struct ivopts_data *data)
5978 {
5979 unsigned j, n;
5980 gphi *phi;
5981 gphi_iterator psi;
5982 tree op;
5983 struct loop *loop = data->current_loop;
5984 bitmap_iterator bi;
5985
5986 if (dump_file && (dump_flags & TDF_DETAILS))
5987 {
5988 fprintf (dump_file, "<Global Costs>:\n");
5989 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5990 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5991 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5992 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5993 }
5994
5995 n = 0;
5996 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5997 {
5998 phi = psi.phi ();
5999 op = PHI_RESULT (phi);
6000
6001 if (virtual_operand_p (op))
6002 continue;
6003
6004 if (get_iv (data, op))
6005 continue;
6006
6007 n++;
6008 }
6009
6010 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6011 {
6012 struct version_info *info = ver_info (data, j);
6013
6014 if (info->inv_id && info->has_nonlin_use)
6015 n++;
6016 }
6017
6018 data->regs_used = n;
6019 if (dump_file && (dump_flags & TDF_DETAILS))
6020 fprintf (dump_file, " regs_used %d\n", n);
6021
6022 if (dump_file && (dump_flags & TDF_DETAILS))
6023 {
6024 fprintf (dump_file, " cost for size:\n");
6025 fprintf (dump_file, " ivs\tcost\n");
6026 for (j = 0; j <= 2 * target_avail_regs; j++)
6027 fprintf (dump_file, " %d\t%d\n", j,
6028 ivopts_global_cost_for_size (data, j));
6029 fprintf (dump_file, "\n");
6030 }
6031 }
6032
6033 /* Returns true if A is a cheaper cost pair than B. */
6034
6035 static bool
6036 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6037 {
6038 if (!a)
6039 return false;
6040
6041 if (!b)
6042 return true;
6043
6044 if (a->cost < b->cost)
6045 return true;
6046
6047 if (b->cost < a->cost)
6048 return false;
6049
6050 /* In case the costs are the same, prefer the cheaper candidate. */
6051 if (a->cand->cost < b->cand->cost)
6052 return true;
6053
6054 return false;
6055 }
6056
6057
6058 /* Returns candidate by that USE is expressed in IVS. */
6059
6060 static struct cost_pair *
6061 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6062 {
6063 return ivs->cand_for_group[group->id];
6064 }
6065
6066 /* Computes the cost field of IVS structure. */
6067
6068 static void
6069 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6070 {
6071 comp_cost cost = ivs->cand_use_cost;
6072
6073 cost += ivs->cand_cost;
6074
6075 cost += ivopts_global_cost_for_size (data,
6076 ivs->n_regs
6077 + ivs->used_inv_exprs->elements ());
6078
6079 ivs->cost = cost;
6080 }
6081
6082 /* Remove invariants in set INVS to set IVS. */
6083
6084 static void
6085 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6086 {
6087 bitmap_iterator bi;
6088 unsigned iid;
6089
6090 if (!invs)
6091 return;
6092
6093 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6094 {
6095 ivs->n_invariant_uses[iid]--;
6096 if (ivs->n_invariant_uses[iid] == 0)
6097 ivs->n_regs--;
6098 }
6099 }
6100
6101 /* Set USE not to be expressed by any candidate in IVS. */
6102
6103 static void
6104 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6105 struct iv_group *group)
6106 {
6107 unsigned gid = group->id, cid;
6108 struct cost_pair *cp;
6109
6110 cp = ivs->cand_for_group[gid];
6111 if (!cp)
6112 return;
6113 cid = cp->cand->id;
6114
6115 ivs->bad_groups++;
6116 ivs->cand_for_group[gid] = NULL;
6117 ivs->n_cand_uses[cid]--;
6118
6119 if (ivs->n_cand_uses[cid] == 0)
6120 {
6121 bitmap_clear_bit (ivs->cands, cid);
6122 /* Do not count the pseudocandidates. */
6123 if (cp->cand->iv)
6124 ivs->n_regs--;
6125 ivs->n_cands--;
6126 ivs->cand_cost -= cp->cand->cost;
6127
6128 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6129 }
6130
6131 ivs->cand_use_cost -= cp->cost;
6132
6133 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6134
6135 if (cp->inv_expr != NULL)
6136 {
6137 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6138 --(*slot);
6139 if (*slot == 0)
6140 ivs->used_inv_exprs->remove (cp->inv_expr);
6141 }
6142 iv_ca_recount_cost (data, ivs);
6143 }
6144
6145 /* Add invariants in set INVS to set IVS. */
6146
6147 static void
6148 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6149 {
6150 bitmap_iterator bi;
6151 unsigned iid;
6152
6153 if (!invs)
6154 return;
6155
6156 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6157 {
6158 ivs->n_invariant_uses[iid]++;
6159 if (ivs->n_invariant_uses[iid] == 1)
6160 ivs->n_regs++;
6161 }
6162 }
6163
6164 /* Set cost pair for GROUP in set IVS to CP. */
6165
6166 static void
6167 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6168 struct iv_group *group, struct cost_pair *cp)
6169 {
6170 unsigned gid = group->id, cid;
6171
6172 if (ivs->cand_for_group[gid] == cp)
6173 return;
6174
6175 if (ivs->cand_for_group[gid])
6176 iv_ca_set_no_cp (data, ivs, group);
6177
6178 if (cp)
6179 {
6180 cid = cp->cand->id;
6181
6182 ivs->bad_groups--;
6183 ivs->cand_for_group[gid] = cp;
6184 ivs->n_cand_uses[cid]++;
6185 if (ivs->n_cand_uses[cid] == 1)
6186 {
6187 bitmap_set_bit (ivs->cands, cid);
6188 /* Do not count the pseudocandidates. */
6189 if (cp->cand->iv)
6190 ivs->n_regs++;
6191 ivs->n_cands++;
6192 ivs->cand_cost += cp->cand->cost;
6193
6194 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6195 }
6196
6197 ivs->cand_use_cost += cp->cost;
6198 iv_ca_set_add_invariants (ivs, cp->depends_on);
6199
6200 if (cp->inv_expr != NULL)
6201 {
6202 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6203 ++(*slot);
6204 }
6205 iv_ca_recount_cost (data, ivs);
6206 }
6207 }
6208
6209 /* Extend set IVS by expressing USE by some of the candidates in it
6210 if possible. Consider all important candidates if candidates in
6211 set IVS don't give any result. */
6212
6213 static void
6214 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6215 struct iv_group *group)
6216 {
6217 struct cost_pair *best_cp = NULL, *cp;
6218 bitmap_iterator bi;
6219 unsigned i;
6220 struct iv_cand *cand;
6221
6222 gcc_assert (ivs->upto >= group->id);
6223 ivs->upto++;
6224 ivs->bad_groups++;
6225
6226 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6227 {
6228 cand = data->vcands[i];
6229 cp = get_group_iv_cost (data, group, cand);
6230 if (cheaper_cost_pair (cp, best_cp))
6231 best_cp = cp;
6232 }
6233
6234 if (best_cp == NULL)
6235 {
6236 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6237 {
6238 cand = data->vcands[i];
6239 cp = get_group_iv_cost (data, group, cand);
6240 if (cheaper_cost_pair (cp, best_cp))
6241 best_cp = cp;
6242 }
6243 }
6244
6245 iv_ca_set_cp (data, ivs, group, best_cp);
6246 }
6247
6248 /* Get cost for assignment IVS. */
6249
6250 static comp_cost
6251 iv_ca_cost (struct iv_ca *ivs)
6252 {
6253 /* This was a conditional expression but it triggered a bug in
6254 Sun C 5.5. */
6255 if (ivs->bad_groups)
6256 return infinite_cost;
6257 else
6258 return ivs->cost;
6259 }
6260
6261 /* Returns true if all dependences of CP are among invariants in IVS. */
6262
6263 static bool
6264 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6265 {
6266 unsigned i;
6267 bitmap_iterator bi;
6268
6269 if (!cp->depends_on)
6270 return true;
6271
6272 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6273 {
6274 if (ivs->n_invariant_uses[i] == 0)
6275 return false;
6276 }
6277
6278 return true;
6279 }
6280
6281 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6282 it before NEXT. */
6283
6284 static struct iv_ca_delta *
6285 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6286 struct cost_pair *new_cp, struct iv_ca_delta *next)
6287 {
6288 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6289
6290 change->group = group;
6291 change->old_cp = old_cp;
6292 change->new_cp = new_cp;
6293 change->next = next;
6294
6295 return change;
6296 }
6297
6298 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6299 are rewritten. */
6300
6301 static struct iv_ca_delta *
6302 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6303 {
6304 struct iv_ca_delta *last;
6305
6306 if (!l2)
6307 return l1;
6308
6309 if (!l1)
6310 return l2;
6311
6312 for (last = l1; last->next; last = last->next)
6313 continue;
6314 last->next = l2;
6315
6316 return l1;
6317 }
6318
6319 /* Reverse the list of changes DELTA, forming the inverse to it. */
6320
6321 static struct iv_ca_delta *
6322 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6323 {
6324 struct iv_ca_delta *act, *next, *prev = NULL;
6325
6326 for (act = delta; act; act = next)
6327 {
6328 next = act->next;
6329 act->next = prev;
6330 prev = act;
6331
6332 std::swap (act->old_cp, act->new_cp);
6333 }
6334
6335 return prev;
6336 }
6337
6338 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6339 reverted instead. */
6340
6341 static void
6342 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6343 struct iv_ca_delta *delta, bool forward)
6344 {
6345 struct cost_pair *from, *to;
6346 struct iv_ca_delta *act;
6347
6348 if (!forward)
6349 delta = iv_ca_delta_reverse (delta);
6350
6351 for (act = delta; act; act = act->next)
6352 {
6353 from = act->old_cp;
6354 to = act->new_cp;
6355 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6356 iv_ca_set_cp (data, ivs, act->group, to);
6357 }
6358
6359 if (!forward)
6360 iv_ca_delta_reverse (delta);
6361 }
6362
6363 /* Returns true if CAND is used in IVS. */
6364
6365 static bool
6366 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6367 {
6368 return ivs->n_cand_uses[cand->id] > 0;
6369 }
6370
6371 /* Returns number of induction variable candidates in the set IVS. */
6372
6373 static unsigned
6374 iv_ca_n_cands (struct iv_ca *ivs)
6375 {
6376 return ivs->n_cands;
6377 }
6378
6379 /* Free the list of changes DELTA. */
6380
6381 static void
6382 iv_ca_delta_free (struct iv_ca_delta **delta)
6383 {
6384 struct iv_ca_delta *act, *next;
6385
6386 for (act = *delta; act; act = next)
6387 {
6388 next = act->next;
6389 free (act);
6390 }
6391
6392 *delta = NULL;
6393 }
6394
6395 /* Allocates new iv candidates assignment. */
6396
6397 static struct iv_ca *
6398 iv_ca_new (struct ivopts_data *data)
6399 {
6400 struct iv_ca *nw = XNEW (struct iv_ca);
6401
6402 nw->upto = 0;
6403 nw->bad_groups = 0;
6404 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6405 data->vgroups.length ());
6406 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6407 nw->cands = BITMAP_ALLOC (NULL);
6408 nw->n_cands = 0;
6409 nw->n_regs = 0;
6410 nw->cand_use_cost = no_cost;
6411 nw->cand_cost = 0;
6412 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6413 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6414 nw->cost = no_cost;
6415
6416 return nw;
6417 }
6418
6419 /* Free memory occupied by the set IVS. */
6420
6421 static void
6422 iv_ca_free (struct iv_ca **ivs)
6423 {
6424 free ((*ivs)->cand_for_group);
6425 free ((*ivs)->n_cand_uses);
6426 BITMAP_FREE ((*ivs)->cands);
6427 free ((*ivs)->n_invariant_uses);
6428 delete ((*ivs)->used_inv_exprs);
6429 free (*ivs);
6430 *ivs = NULL;
6431 }
6432
6433 /* Dumps IVS to FILE. */
6434
6435 static void
6436 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6437 {
6438 unsigned i;
6439 comp_cost cost = iv_ca_cost (ivs);
6440
6441 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6442 cost.complexity);
6443 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6444 ivs->cand_cost, ivs->cand_use_cost.cost,
6445 ivs->cand_use_cost.complexity);
6446 bitmap_print (file, ivs->cands, " candidates: ","\n");
6447
6448 for (i = 0; i < ivs->upto; i++)
6449 {
6450 struct iv_group *group = data->vgroups[i];
6451 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6452 if (cp)
6453 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6454 group->id, cp->cand->id, cp->cost.cost,
6455 cp->cost.complexity);
6456 else
6457 fprintf (file, " group:%d --> ??\n", group->id);
6458 }
6459
6460 const char *pref = "";
6461 fprintf (file, " invariant variables: ");
6462 for (i = 1; i <= data->max_inv_id; i++)
6463 if (ivs->n_invariant_uses[i])
6464 {
6465 fprintf (file, "%s%d", pref, i);
6466 pref = ", ";
6467 }
6468
6469 pref = "";
6470 fprintf (file, "\n invariant expressions: ");
6471 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6472 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6473 {
6474 fprintf (file, "%s%d", pref, (*it).first->id);
6475 pref = ", ";
6476 }
6477
6478 fprintf (file, "\n\n");
6479 }
6480
6481 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6482 new set, and store differences in DELTA. Number of induction variables
6483 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6484 the function will try to find a solution with mimimal iv candidates. */
6485
6486 static comp_cost
6487 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6488 struct iv_cand *cand, struct iv_ca_delta **delta,
6489 unsigned *n_ivs, bool min_ncand)
6490 {
6491 unsigned i;
6492 comp_cost cost;
6493 struct iv_group *group;
6494 struct cost_pair *old_cp, *new_cp;
6495
6496 *delta = NULL;
6497 for (i = 0; i < ivs->upto; i++)
6498 {
6499 group = data->vgroups[i];
6500 old_cp = iv_ca_cand_for_group (ivs, group);
6501
6502 if (old_cp
6503 && old_cp->cand == cand)
6504 continue;
6505
6506 new_cp = get_group_iv_cost (data, group, cand);
6507 if (!new_cp)
6508 continue;
6509
6510 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6511 continue;
6512
6513 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6514 continue;
6515
6516 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6517 }
6518
6519 iv_ca_delta_commit (data, ivs, *delta, true);
6520 cost = iv_ca_cost (ivs);
6521 if (n_ivs)
6522 *n_ivs = iv_ca_n_cands (ivs);
6523 iv_ca_delta_commit (data, ivs, *delta, false);
6524
6525 return cost;
6526 }
6527
6528 /* Try narrowing set IVS by removing CAND. Return the cost of
6529 the new set and store the differences in DELTA. START is
6530 the candidate with which we start narrowing. */
6531
6532 static comp_cost
6533 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6534 struct iv_cand *cand, struct iv_cand *start,
6535 struct iv_ca_delta **delta)
6536 {
6537 unsigned i, ci;
6538 struct iv_group *group;
6539 struct cost_pair *old_cp, *new_cp, *cp;
6540 bitmap_iterator bi;
6541 struct iv_cand *cnd;
6542 comp_cost cost, best_cost, acost;
6543
6544 *delta = NULL;
6545 for (i = 0; i < data->vgroups.length (); i++)
6546 {
6547 group = data->vgroups[i];
6548
6549 old_cp = iv_ca_cand_for_group (ivs, group);
6550 if (old_cp->cand != cand)
6551 continue;
6552
6553 best_cost = iv_ca_cost (ivs);
6554 /* Start narrowing with START. */
6555 new_cp = get_group_iv_cost (data, group, start);
6556
6557 if (data->consider_all_candidates)
6558 {
6559 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6560 {
6561 if (ci == cand->id || (start && ci == start->id))
6562 continue;
6563
6564 cnd = data->vcands[ci];
6565
6566 cp = get_group_iv_cost (data, group, cnd);
6567 if (!cp)
6568 continue;
6569
6570 iv_ca_set_cp (data, ivs, group, cp);
6571 acost = iv_ca_cost (ivs);
6572
6573 if (acost < best_cost)
6574 {
6575 best_cost = acost;
6576 new_cp = cp;
6577 }
6578 }
6579 }
6580 else
6581 {
6582 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6583 {
6584 if (ci == cand->id || (start && ci == start->id))
6585 continue;
6586
6587 cnd = data->vcands[ci];
6588
6589 cp = get_group_iv_cost (data, group, cnd);
6590 if (!cp)
6591 continue;
6592
6593 iv_ca_set_cp (data, ivs, group, cp);
6594 acost = iv_ca_cost (ivs);
6595
6596 if (acost < best_cost)
6597 {
6598 best_cost = acost;
6599 new_cp = cp;
6600 }
6601 }
6602 }
6603 /* Restore to old cp for use. */
6604 iv_ca_set_cp (data, ivs, group, old_cp);
6605
6606 if (!new_cp)
6607 {
6608 iv_ca_delta_free (delta);
6609 return infinite_cost;
6610 }
6611
6612 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6613 }
6614
6615 iv_ca_delta_commit (data, ivs, *delta, true);
6616 cost = iv_ca_cost (ivs);
6617 iv_ca_delta_commit (data, ivs, *delta, false);
6618
6619 return cost;
6620 }
6621
6622 /* Try optimizing the set of candidates IVS by removing candidates different
6623 from to EXCEPT_CAND from it. Return cost of the new set, and store
6624 differences in DELTA. */
6625
6626 static comp_cost
6627 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6628 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6629 {
6630 bitmap_iterator bi;
6631 struct iv_ca_delta *act_delta, *best_delta;
6632 unsigned i;
6633 comp_cost best_cost, acost;
6634 struct iv_cand *cand;
6635
6636 best_delta = NULL;
6637 best_cost = iv_ca_cost (ivs);
6638
6639 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6640 {
6641 cand = data->vcands[i];
6642
6643 if (cand == except_cand)
6644 continue;
6645
6646 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6647
6648 if (acost < best_cost)
6649 {
6650 best_cost = acost;
6651 iv_ca_delta_free (&best_delta);
6652 best_delta = act_delta;
6653 }
6654 else
6655 iv_ca_delta_free (&act_delta);
6656 }
6657
6658 if (!best_delta)
6659 {
6660 *delta = NULL;
6661 return best_cost;
6662 }
6663
6664 /* Recurse to possibly remove other unnecessary ivs. */
6665 iv_ca_delta_commit (data, ivs, best_delta, true);
6666 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6667 iv_ca_delta_commit (data, ivs, best_delta, false);
6668 *delta = iv_ca_delta_join (best_delta, *delta);
6669 return best_cost;
6670 }
6671
6672 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6673 cheaper local cost for GROUP than BEST_CP. Return pointer to
6674 the corresponding cost_pair, otherwise just return BEST_CP. */
6675
6676 static struct cost_pair*
6677 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6678 unsigned int cand_idx, struct iv_cand *old_cand,
6679 struct cost_pair *best_cp)
6680 {
6681 struct iv_cand *cand;
6682 struct cost_pair *cp;
6683
6684 gcc_assert (old_cand != NULL && best_cp != NULL);
6685 if (cand_idx == old_cand->id)
6686 return best_cp;
6687
6688 cand = data->vcands[cand_idx];
6689 cp = get_group_iv_cost (data, group, cand);
6690 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6691 return cp;
6692
6693 return best_cp;
6694 }
6695
6696 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6697 which are used by more than one iv uses. For each of those candidates,
6698 this function tries to represent iv uses under that candidate using
6699 other ones with lower local cost, then tries to prune the new set.
6700 If the new set has lower cost, It returns the new cost after recording
6701 candidate replacement in list DELTA. */
6702
6703 static comp_cost
6704 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6705 struct iv_ca_delta **delta)
6706 {
6707 bitmap_iterator bi, bj;
6708 unsigned int i, j, k;
6709 struct iv_cand *cand;
6710 comp_cost orig_cost, acost;
6711 struct iv_ca_delta *act_delta, *tmp_delta;
6712 struct cost_pair *old_cp, *best_cp = NULL;
6713
6714 *delta = NULL;
6715 orig_cost = iv_ca_cost (ivs);
6716
6717 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6718 {
6719 if (ivs->n_cand_uses[i] == 1
6720 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6721 continue;
6722
6723 cand = data->vcands[i];
6724
6725 act_delta = NULL;
6726 /* Represent uses under current candidate using other ones with
6727 lower local cost. */
6728 for (j = 0; j < ivs->upto; j++)
6729 {
6730 struct iv_group *group = data->vgroups[j];
6731 old_cp = iv_ca_cand_for_group (ivs, group);
6732
6733 if (old_cp->cand != cand)
6734 continue;
6735
6736 best_cp = old_cp;
6737 if (data->consider_all_candidates)
6738 for (k = 0; k < data->vcands.length (); k++)
6739 best_cp = cheaper_cost_with_cand (data, group, k,
6740 old_cp->cand, best_cp);
6741 else
6742 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6743 best_cp = cheaper_cost_with_cand (data, group, k,
6744 old_cp->cand, best_cp);
6745
6746 if (best_cp == old_cp)
6747 continue;
6748
6749 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6750 }
6751 /* No need for further prune. */
6752 if (!act_delta)
6753 continue;
6754
6755 /* Prune the new candidate set. */
6756 iv_ca_delta_commit (data, ivs, act_delta, true);
6757 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6758 iv_ca_delta_commit (data, ivs, act_delta, false);
6759 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6760
6761 if (acost < orig_cost)
6762 {
6763 *delta = act_delta;
6764 return acost;
6765 }
6766 else
6767 iv_ca_delta_free (&act_delta);
6768 }
6769
6770 return orig_cost;
6771 }
6772
6773 /* Tries to extend the sets IVS in the best possible way in order to
6774 express the GROUP. If ORIGINALP is true, prefer candidates from
6775 the original set of IVs, otherwise favor important candidates not
6776 based on any memory object. */
6777
6778 static bool
6779 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6780 struct iv_group *group, bool originalp)
6781 {
6782 comp_cost best_cost, act_cost;
6783 unsigned i;
6784 bitmap_iterator bi;
6785 struct iv_cand *cand;
6786 struct iv_ca_delta *best_delta = NULL, *act_delta;
6787 struct cost_pair *cp;
6788
6789 iv_ca_add_group (data, ivs, group);
6790 best_cost = iv_ca_cost (ivs);
6791 cp = iv_ca_cand_for_group (ivs, group);
6792 if (cp)
6793 {
6794 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6795 iv_ca_set_no_cp (data, ivs, group);
6796 }
6797
6798 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6799 first try important candidates not based on any memory object. Only if
6800 this fails, try the specific ones. Rationale -- in loops with many
6801 variables the best choice often is to use just one generic biv. If we
6802 added here many ivs specific to the uses, the optimization algorithm later
6803 would be likely to get stuck in a local minimum, thus causing us to create
6804 too many ivs. The approach from few ivs to more seems more likely to be
6805 successful -- starting from few ivs, replacing an expensive use by a
6806 specific iv should always be a win. */
6807 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6808 {
6809 cand = data->vcands[i];
6810
6811 if (originalp && cand->pos !=IP_ORIGINAL)
6812 continue;
6813
6814 if (!originalp && cand->iv->base_object != NULL_TREE)
6815 continue;
6816
6817 if (iv_ca_cand_used_p (ivs, cand))
6818 continue;
6819
6820 cp = get_group_iv_cost (data, group, cand);
6821 if (!cp)
6822 continue;
6823
6824 iv_ca_set_cp (data, ivs, group, cp);
6825 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6826 true);
6827 iv_ca_set_no_cp (data, ivs, group);
6828 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6829
6830 if (act_cost < best_cost)
6831 {
6832 best_cost = act_cost;
6833
6834 iv_ca_delta_free (&best_delta);
6835 best_delta = act_delta;
6836 }
6837 else
6838 iv_ca_delta_free (&act_delta);
6839 }
6840
6841 if (best_cost.infinite_cost_p ())
6842 {
6843 for (i = 0; i < group->n_map_members; i++)
6844 {
6845 cp = group->cost_map + i;
6846 cand = cp->cand;
6847 if (!cand)
6848 continue;
6849
6850 /* Already tried this. */
6851 if (cand->important)
6852 {
6853 if (originalp && cand->pos == IP_ORIGINAL)
6854 continue;
6855 if (!originalp && cand->iv->base_object == NULL_TREE)
6856 continue;
6857 }
6858
6859 if (iv_ca_cand_used_p (ivs, cand))
6860 continue;
6861
6862 act_delta = NULL;
6863 iv_ca_set_cp (data, ivs, group, cp);
6864 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6865 iv_ca_set_no_cp (data, ivs, group);
6866 act_delta = iv_ca_delta_add (group,
6867 iv_ca_cand_for_group (ivs, group),
6868 cp, act_delta);
6869
6870 if (act_cost < best_cost)
6871 {
6872 best_cost = act_cost;
6873
6874 if (best_delta)
6875 iv_ca_delta_free (&best_delta);
6876 best_delta = act_delta;
6877 }
6878 else
6879 iv_ca_delta_free (&act_delta);
6880 }
6881 }
6882
6883 iv_ca_delta_commit (data, ivs, best_delta, true);
6884 iv_ca_delta_free (&best_delta);
6885
6886 return !best_cost.infinite_cost_p ();
6887 }
6888
6889 /* Finds an initial assignment of candidates to uses. */
6890
6891 static struct iv_ca *
6892 get_initial_solution (struct ivopts_data *data, bool originalp)
6893 {
6894 unsigned i;
6895 struct iv_ca *ivs = iv_ca_new (data);
6896
6897 for (i = 0; i < data->vgroups.length (); i++)
6898 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6899 {
6900 iv_ca_free (&ivs);
6901 return NULL;
6902 }
6903
6904 return ivs;
6905 }
6906
6907 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6908 points to a bool variable, this function tries to break local
6909 optimal fixed-point by replacing candidates in IVS if it's true. */
6910
6911 static bool
6912 try_improve_iv_set (struct ivopts_data *data,
6913 struct iv_ca *ivs, bool *try_replace_p)
6914 {
6915 unsigned i, n_ivs;
6916 comp_cost acost, best_cost = iv_ca_cost (ivs);
6917 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6918 struct iv_cand *cand;
6919
6920 /* Try extending the set of induction variables by one. */
6921 for (i = 0; i < data->vcands.length (); i++)
6922 {
6923 cand = data->vcands[i];
6924
6925 if (iv_ca_cand_used_p (ivs, cand))
6926 continue;
6927
6928 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6929 if (!act_delta)
6930 continue;
6931
6932 /* If we successfully added the candidate and the set is small enough,
6933 try optimizing it by removing other candidates. */
6934 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6935 {
6936 iv_ca_delta_commit (data, ivs, act_delta, true);
6937 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6938 iv_ca_delta_commit (data, ivs, act_delta, false);
6939 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6940 }
6941
6942 if (acost < best_cost)
6943 {
6944 best_cost = acost;
6945 iv_ca_delta_free (&best_delta);
6946 best_delta = act_delta;
6947 }
6948 else
6949 iv_ca_delta_free (&act_delta);
6950 }
6951
6952 if (!best_delta)
6953 {
6954 /* Try removing the candidates from the set instead. */
6955 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6956
6957 if (!best_delta && *try_replace_p)
6958 {
6959 *try_replace_p = false;
6960 /* So far candidate selecting algorithm tends to choose fewer IVs
6961 so that it can handle cases in which loops have many variables
6962 but the best choice is often to use only one general biv. One
6963 weakness is it can't handle opposite cases, in which different
6964 candidates should be chosen with respect to each use. To solve
6965 the problem, we replace candidates in a manner described by the
6966 comments of iv_ca_replace, thus give general algorithm a chance
6967 to break local optimal fixed-point in these cases. */
6968 best_cost = iv_ca_replace (data, ivs, &best_delta);
6969 }
6970
6971 if (!best_delta)
6972 return false;
6973 }
6974
6975 iv_ca_delta_commit (data, ivs, best_delta, true);
6976 gcc_assert (best_cost == iv_ca_cost (ivs));
6977 iv_ca_delta_free (&best_delta);
6978 return true;
6979 }
6980
6981 /* Attempts to find the optimal set of induction variables. We do simple
6982 greedy heuristic -- we try to replace at most one candidate in the selected
6983 solution and remove the unused ivs while this improves the cost. */
6984
6985 static struct iv_ca *
6986 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6987 {
6988 struct iv_ca *set;
6989 bool try_replace_p = true;
6990
6991 /* Get the initial solution. */
6992 set = get_initial_solution (data, originalp);
6993 if (!set)
6994 {
6995 if (dump_file && (dump_flags & TDF_DETAILS))
6996 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6997 return NULL;
6998 }
6999
7000 if (dump_file && (dump_flags & TDF_DETAILS))
7001 {
7002 fprintf (dump_file, "Initial set of candidates:\n");
7003 iv_ca_dump (data, dump_file, set);
7004 }
7005
7006 while (try_improve_iv_set (data, set, &try_replace_p))
7007 {
7008 if (dump_file && (dump_flags & TDF_DETAILS))
7009 {
7010 fprintf (dump_file, "Improved to:\n");
7011 iv_ca_dump (data, dump_file, set);
7012 }
7013 }
7014
7015 return set;
7016 }
7017
7018 static struct iv_ca *
7019 find_optimal_iv_set (struct ivopts_data *data)
7020 {
7021 unsigned i;
7022 comp_cost cost, origcost;
7023 struct iv_ca *set, *origset;
7024
7025 /* Determine the cost based on a strategy that starts with original IVs,
7026 and try again using a strategy that prefers candidates not based
7027 on any IVs. */
7028 origset = find_optimal_iv_set_1 (data, true);
7029 set = find_optimal_iv_set_1 (data, false);
7030
7031 if (!origset && !set)
7032 return NULL;
7033
7034 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7035 cost = set ? iv_ca_cost (set) : infinite_cost;
7036
7037 if (dump_file && (dump_flags & TDF_DETAILS))
7038 {
7039 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7040 origcost.cost, origcost.complexity);
7041 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7042 cost.cost, cost.complexity);
7043 }
7044
7045 /* Choose the one with the best cost. */
7046 if (origcost <= cost)
7047 {
7048 if (set)
7049 iv_ca_free (&set);
7050 set = origset;
7051 }
7052 else if (origset)
7053 iv_ca_free (&origset);
7054
7055 for (i = 0; i < data->vgroups.length (); i++)
7056 {
7057 struct iv_group *group = data->vgroups[i];
7058 group->selected = iv_ca_cand_for_group (set, group)->cand;
7059 }
7060
7061 return set;
7062 }
7063
7064 /* Creates a new induction variable corresponding to CAND. */
7065
7066 static void
7067 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7068 {
7069 gimple_stmt_iterator incr_pos;
7070 tree base;
7071 struct iv_use *use;
7072 struct iv_group *group;
7073 bool after = false;
7074
7075 if (!cand->iv)
7076 return;
7077
7078 switch (cand->pos)
7079 {
7080 case IP_NORMAL:
7081 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7082 break;
7083
7084 case IP_END:
7085 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7086 after = true;
7087 break;
7088
7089 case IP_AFTER_USE:
7090 after = true;
7091 /* fall through */
7092 case IP_BEFORE_USE:
7093 incr_pos = gsi_for_stmt (cand->incremented_at);
7094 break;
7095
7096 case IP_ORIGINAL:
7097 /* Mark that the iv is preserved. */
7098 name_info (data, cand->var_before)->preserve_biv = true;
7099 name_info (data, cand->var_after)->preserve_biv = true;
7100
7101 /* Rewrite the increment so that it uses var_before directly. */
7102 use = find_interesting_uses_op (data, cand->var_after);
7103 group = data->vgroups[use->group_id];
7104 group->selected = cand;
7105 return;
7106 }
7107
7108 gimple_add_tmp_var (cand->var_before);
7109
7110 base = unshare_expr (cand->iv->base);
7111
7112 create_iv (base, unshare_expr (cand->iv->step),
7113 cand->var_before, data->current_loop,
7114 &incr_pos, after, &cand->var_before, &cand->var_after);
7115 }
7116
7117 /* Creates new induction variables described in SET. */
7118
7119 static void
7120 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7121 {
7122 unsigned i;
7123 struct iv_cand *cand;
7124 bitmap_iterator bi;
7125
7126 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7127 {
7128 cand = data->vcands[i];
7129 create_new_iv (data, cand);
7130 }
7131
7132 if (dump_file && (dump_flags & TDF_DETAILS))
7133 {
7134 fprintf (dump_file, "Selected IV set for loop %d",
7135 data->current_loop->num);
7136 if (data->loop_loc != UNKNOWN_LOCATION)
7137 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7138 LOCATION_LINE (data->loop_loc));
7139 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7140 avg_loop_niter (data->current_loop));
7141 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7142 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7143 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7144 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7145 {
7146 cand = data->vcands[i];
7147 dump_cand (dump_file, cand);
7148 }
7149 fprintf (dump_file, "\n");
7150 }
7151 }
7152
7153 /* Rewrites USE (definition of iv used in a nonlinear expression)
7154 using candidate CAND. */
7155
7156 static void
7157 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7158 struct iv_use *use, struct iv_cand *cand)
7159 {
7160 tree comp;
7161 tree op, tgt;
7162 gassign *ass;
7163 gimple_stmt_iterator bsi;
7164
7165 /* An important special case -- if we are asked to express value of
7166 the original iv by itself, just exit; there is no need to
7167 introduce a new computation (that might also need casting the
7168 variable to unsigned and back). */
7169 if (cand->pos == IP_ORIGINAL
7170 && cand->incremented_at == use->stmt)
7171 {
7172 enum tree_code stmt_code;
7173
7174 gcc_assert (is_gimple_assign (use->stmt));
7175 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7176
7177 /* Check whether we may leave the computation unchanged.
7178 This is the case only if it does not rely on other
7179 computations in the loop -- otherwise, the computation
7180 we rely upon may be removed in remove_unused_ivs,
7181 thus leading to ICE. */
7182 stmt_code = gimple_assign_rhs_code (use->stmt);
7183 if (stmt_code == PLUS_EXPR
7184 || stmt_code == MINUS_EXPR
7185 || stmt_code == POINTER_PLUS_EXPR)
7186 {
7187 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7188 op = gimple_assign_rhs2 (use->stmt);
7189 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7190 op = gimple_assign_rhs1 (use->stmt);
7191 else
7192 op = NULL_TREE;
7193 }
7194 else
7195 op = NULL_TREE;
7196
7197 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7198 return;
7199 }
7200
7201 comp = get_computation (data->current_loop, use, cand);
7202 gcc_assert (comp != NULL_TREE);
7203
7204 switch (gimple_code (use->stmt))
7205 {
7206 case GIMPLE_PHI:
7207 tgt = PHI_RESULT (use->stmt);
7208
7209 /* If we should keep the biv, do not replace it. */
7210 if (name_info (data, tgt)->preserve_biv)
7211 return;
7212
7213 bsi = gsi_after_labels (gimple_bb (use->stmt));
7214 break;
7215
7216 case GIMPLE_ASSIGN:
7217 tgt = gimple_assign_lhs (use->stmt);
7218 bsi = gsi_for_stmt (use->stmt);
7219 break;
7220
7221 default:
7222 gcc_unreachable ();
7223 }
7224
7225 if (!valid_gimple_rhs_p (comp)
7226 || (gimple_code (use->stmt) != GIMPLE_PHI
7227 /* We can't allow re-allocating the stmt as it might be pointed
7228 to still. */
7229 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7230 >= gimple_num_ops (gsi_stmt (bsi)))))
7231 {
7232 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7233 true, GSI_SAME_STMT);
7234 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7235 {
7236 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7237 /* As this isn't a plain copy we have to reset alignment
7238 information. */
7239 if (SSA_NAME_PTR_INFO (comp))
7240 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7241 }
7242 }
7243
7244 if (gimple_code (use->stmt) == GIMPLE_PHI)
7245 {
7246 ass = gimple_build_assign (tgt, comp);
7247 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7248
7249 bsi = gsi_for_stmt (use->stmt);
7250 remove_phi_node (&bsi, false);
7251 }
7252 else
7253 {
7254 gimple_assign_set_rhs_from_tree (&bsi, comp);
7255 use->stmt = gsi_stmt (bsi);
7256 }
7257 }
7258
7259 /* Performs a peephole optimization to reorder the iv update statement with
7260 a mem ref to enable instruction combining in later phases. The mem ref uses
7261 the iv value before the update, so the reordering transformation requires
7262 adjustment of the offset. CAND is the selected IV_CAND.
7263
7264 Example:
7265
7266 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7267 iv2 = iv1 + 1;
7268
7269 if (t < val) (1)
7270 goto L;
7271 goto Head;
7272
7273
7274 directly propagating t over to (1) will introduce overlapping live range
7275 thus increase register pressure. This peephole transform it into:
7276
7277
7278 iv2 = iv1 + 1;
7279 t = MEM_REF (base, iv2, 8, 8);
7280 if (t < val)
7281 goto L;
7282 goto Head;
7283 */
7284
7285 static void
7286 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7287 {
7288 tree var_after;
7289 gimple *iv_update, *stmt;
7290 basic_block bb;
7291 gimple_stmt_iterator gsi, gsi_iv;
7292
7293 if (cand->pos != IP_NORMAL)
7294 return;
7295
7296 var_after = cand->var_after;
7297 iv_update = SSA_NAME_DEF_STMT (var_after);
7298
7299 bb = gimple_bb (iv_update);
7300 gsi = gsi_last_nondebug_bb (bb);
7301 stmt = gsi_stmt (gsi);
7302
7303 /* Only handle conditional statement for now. */
7304 if (gimple_code (stmt) != GIMPLE_COND)
7305 return;
7306
7307 gsi_prev_nondebug (&gsi);
7308 stmt = gsi_stmt (gsi);
7309 if (stmt != iv_update)
7310 return;
7311
7312 gsi_prev_nondebug (&gsi);
7313 if (gsi_end_p (gsi))
7314 return;
7315
7316 stmt = gsi_stmt (gsi);
7317 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7318 return;
7319
7320 if (stmt != use->stmt)
7321 return;
7322
7323 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7324 return;
7325
7326 if (dump_file && (dump_flags & TDF_DETAILS))
7327 {
7328 fprintf (dump_file, "Reordering \n");
7329 print_gimple_stmt (dump_file, iv_update, 0, 0);
7330 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7331 fprintf (dump_file, "\n");
7332 }
7333
7334 gsi = gsi_for_stmt (use->stmt);
7335 gsi_iv = gsi_for_stmt (iv_update);
7336 gsi_move_before (&gsi_iv, &gsi);
7337
7338 cand->pos = IP_BEFORE_USE;
7339 cand->incremented_at = use->stmt;
7340 }
7341
7342 /* Rewrites USE (address that is an iv) using candidate CAND. */
7343
7344 static void
7345 rewrite_use_address (struct ivopts_data *data,
7346 struct iv_use *use, struct iv_cand *cand)
7347 {
7348 aff_tree aff;
7349 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7350 tree base_hint = NULL_TREE;
7351 tree ref, iv;
7352 bool ok;
7353
7354 adjust_iv_update_pos (cand, use);
7355 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7356 gcc_assert (ok);
7357 unshare_aff_combination (&aff);
7358
7359 /* To avoid undefined overflow problems, all IV candidates use unsigned
7360 integer types. The drawback is that this makes it impossible for
7361 create_mem_ref to distinguish an IV that is based on a memory object
7362 from one that represents simply an offset.
7363
7364 To work around this problem, we pass a hint to create_mem_ref that
7365 indicates which variable (if any) in aff is an IV based on a memory
7366 object. Note that we only consider the candidate. If this is not
7367 based on an object, the base of the reference is in some subexpression
7368 of the use -- but these will use pointer types, so they are recognized
7369 by the create_mem_ref heuristics anyway. */
7370 if (cand->iv->base_object)
7371 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7372
7373 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7374 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7375 reference_alias_ptr_type (*use->op_p),
7376 iv, base_hint, data->speed);
7377 copy_ref_info (ref, *use->op_p);
7378 *use->op_p = ref;
7379 }
7380
7381 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7382 candidate CAND. */
7383
7384 static void
7385 rewrite_use_compare (struct ivopts_data *data,
7386 struct iv_use *use, struct iv_cand *cand)
7387 {
7388 tree comp, *var_p, op, bound;
7389 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7390 enum tree_code compare;
7391 struct iv_group *group = data->vgroups[use->group_id];
7392 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7393 bool ok;
7394
7395 bound = cp->value;
7396 if (bound)
7397 {
7398 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7399 tree var_type = TREE_TYPE (var);
7400 gimple_seq stmts;
7401
7402 if (dump_file && (dump_flags & TDF_DETAILS))
7403 {
7404 fprintf (dump_file, "Replacing exit test: ");
7405 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7406 }
7407 compare = cp->comp;
7408 bound = unshare_expr (fold_convert (var_type, bound));
7409 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7410 if (stmts)
7411 gsi_insert_seq_on_edge_immediate (
7412 loop_preheader_edge (data->current_loop),
7413 stmts);
7414
7415 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7416 gimple_cond_set_lhs (cond_stmt, var);
7417 gimple_cond_set_code (cond_stmt, compare);
7418 gimple_cond_set_rhs (cond_stmt, op);
7419 return;
7420 }
7421
7422 /* The induction variable elimination failed; just express the original
7423 giv. */
7424 comp = get_computation (data->current_loop, use, cand);
7425 gcc_assert (comp != NULL_TREE);
7426
7427 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7428 gcc_assert (ok);
7429
7430 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7431 true, GSI_SAME_STMT);
7432 }
7433
7434 /* Rewrite the groups using the selected induction variables. */
7435
7436 static void
7437 rewrite_groups (struct ivopts_data *data)
7438 {
7439 unsigned i, j;
7440
7441 for (i = 0; i < data->vgroups.length (); i++)
7442 {
7443 struct iv_group *group = data->vgroups[i];
7444 struct iv_cand *cand = group->selected;
7445
7446 gcc_assert (cand);
7447
7448 if (group->type == USE_NONLINEAR_EXPR)
7449 {
7450 for (j = 0; j < group->vuses.length (); j++)
7451 {
7452 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7453 update_stmt (group->vuses[j]->stmt);
7454 }
7455 }
7456 else if (group->type == USE_ADDRESS)
7457 {
7458 for (j = 0; j < group->vuses.length (); j++)
7459 {
7460 rewrite_use_address (data, group->vuses[j], cand);
7461 update_stmt (group->vuses[j]->stmt);
7462 }
7463 }
7464 else
7465 {
7466 gcc_assert (group->type == USE_COMPARE);
7467
7468 for (j = 0; j < group->vuses.length (); j++)
7469 {
7470 rewrite_use_compare (data, group->vuses[j], cand);
7471 update_stmt (group->vuses[j]->stmt);
7472 }
7473 }
7474 }
7475 }
7476
7477 /* Removes the ivs that are not used after rewriting. */
7478
7479 static void
7480 remove_unused_ivs (struct ivopts_data *data)
7481 {
7482 unsigned j;
7483 bitmap_iterator bi;
7484 bitmap toremove = BITMAP_ALLOC (NULL);
7485
7486 /* Figure out an order in which to release SSA DEFs so that we don't
7487 release something that we'd have to propagate into a debug stmt
7488 afterwards. */
7489 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7490 {
7491 struct version_info *info;
7492
7493 info = ver_info (data, j);
7494 if (info->iv
7495 && !integer_zerop (info->iv->step)
7496 && !info->inv_id
7497 && !info->iv->nonlin_use
7498 && !info->preserve_biv)
7499 {
7500 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7501
7502 tree def = info->iv->ssa_name;
7503
7504 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7505 {
7506 imm_use_iterator imm_iter;
7507 use_operand_p use_p;
7508 gimple *stmt;
7509 int count = 0;
7510
7511 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7512 {
7513 if (!gimple_debug_bind_p (stmt))
7514 continue;
7515
7516 /* We just want to determine whether to do nothing
7517 (count == 0), to substitute the computed
7518 expression into a single use of the SSA DEF by
7519 itself (count == 1), or to use a debug temp
7520 because the SSA DEF is used multiple times or as
7521 part of a larger expression (count > 1). */
7522 count++;
7523 if (gimple_debug_bind_get_value (stmt) != def)
7524 count++;
7525
7526 if (count > 1)
7527 BREAK_FROM_IMM_USE_STMT (imm_iter);
7528 }
7529
7530 if (!count)
7531 continue;
7532
7533 struct iv_use dummy_use;
7534 struct iv_cand *best_cand = NULL, *cand;
7535 unsigned i, best_pref = 0, cand_pref;
7536
7537 memset (&dummy_use, 0, sizeof (dummy_use));
7538 dummy_use.iv = info->iv;
7539 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7540 {
7541 cand = data->vgroups[i]->selected;
7542 if (cand == best_cand)
7543 continue;
7544 cand_pref = operand_equal_p (cand->iv->step,
7545 info->iv->step, 0)
7546 ? 4 : 0;
7547 cand_pref
7548 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7549 == TYPE_MODE (TREE_TYPE (info->iv->base))
7550 ? 2 : 0;
7551 cand_pref
7552 += TREE_CODE (cand->iv->base) == INTEGER_CST
7553 ? 1 : 0;
7554 if (best_cand == NULL || best_pref < cand_pref)
7555 {
7556 best_cand = cand;
7557 best_pref = cand_pref;
7558 }
7559 }
7560
7561 if (!best_cand)
7562 continue;
7563
7564 tree comp = get_computation_at (data->current_loop,
7565 &dummy_use, best_cand,
7566 SSA_NAME_DEF_STMT (def));
7567 if (!comp)
7568 continue;
7569
7570 if (count > 1)
7571 {
7572 tree vexpr = make_node (DEBUG_EXPR_DECL);
7573 DECL_ARTIFICIAL (vexpr) = 1;
7574 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7575 if (SSA_NAME_VAR (def))
7576 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7577 else
7578 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7579 gdebug *def_temp
7580 = gimple_build_debug_bind (vexpr, comp, NULL);
7581 gimple_stmt_iterator gsi;
7582
7583 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7584 gsi = gsi_after_labels (gimple_bb
7585 (SSA_NAME_DEF_STMT (def)));
7586 else
7587 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7588
7589 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7590 comp = vexpr;
7591 }
7592
7593 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7594 {
7595 if (!gimple_debug_bind_p (stmt))
7596 continue;
7597
7598 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7599 SET_USE (use_p, comp);
7600
7601 update_stmt (stmt);
7602 }
7603 }
7604 }
7605 }
7606
7607 release_defs_bitset (toremove);
7608
7609 BITMAP_FREE (toremove);
7610 }
7611
7612 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7613 for hash_map::traverse. */
7614
7615 bool
7616 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7617 {
7618 free (value);
7619 return true;
7620 }
7621
7622 /* Frees data allocated by the optimization of a single loop. */
7623
7624 static void
7625 free_loop_data (struct ivopts_data *data)
7626 {
7627 unsigned i, j;
7628 bitmap_iterator bi;
7629 tree obj;
7630
7631 if (data->niters)
7632 {
7633 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7634 delete data->niters;
7635 data->niters = NULL;
7636 }
7637
7638 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7639 {
7640 struct version_info *info;
7641
7642 info = ver_info (data, i);
7643 info->iv = NULL;
7644 info->has_nonlin_use = false;
7645 info->preserve_biv = false;
7646 info->inv_id = 0;
7647 }
7648 bitmap_clear (data->relevant);
7649 bitmap_clear (data->important_candidates);
7650
7651 for (i = 0; i < data->vgroups.length (); i++)
7652 {
7653 struct iv_group *group = data->vgroups[i];
7654
7655 for (j = 0; j < group->vuses.length (); j++)
7656 free (group->vuses[j]);
7657 group->vuses.release ();
7658
7659 BITMAP_FREE (group->related_cands);
7660 for (j = 0; j < group->n_map_members; j++)
7661 if (group->cost_map[j].depends_on)
7662 BITMAP_FREE (group->cost_map[j].depends_on);
7663
7664 free (group->cost_map);
7665 free (group);
7666 }
7667 data->vgroups.truncate (0);
7668
7669 for (i = 0; i < data->vcands.length (); i++)
7670 {
7671 struct iv_cand *cand = data->vcands[i];
7672
7673 if (cand->depends_on)
7674 BITMAP_FREE (cand->depends_on);
7675 free (cand);
7676 }
7677 data->vcands.truncate (0);
7678
7679 if (data->version_info_size < num_ssa_names)
7680 {
7681 data->version_info_size = 2 * num_ssa_names;
7682 free (data->version_info);
7683 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7684 }
7685
7686 data->max_inv_id = 0;
7687
7688 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7689 SET_DECL_RTL (obj, NULL_RTX);
7690
7691 decl_rtl_to_reset.truncate (0);
7692
7693 data->inv_expr_tab->empty ();
7694 data->max_inv_expr_id = 0;
7695
7696 data->iv_common_cand_tab->empty ();
7697 data->iv_common_cands.truncate (0);
7698 }
7699
7700 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7701 loop tree. */
7702
7703 static void
7704 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7705 {
7706 free_loop_data (data);
7707 free (data->version_info);
7708 BITMAP_FREE (data->relevant);
7709 BITMAP_FREE (data->important_candidates);
7710
7711 decl_rtl_to_reset.release ();
7712 data->vgroups.release ();
7713 data->vcands.release ();
7714 delete data->inv_expr_tab;
7715 data->inv_expr_tab = NULL;
7716 free_affine_expand_cache (&data->name_expansion_cache);
7717 delete data->iv_common_cand_tab;
7718 data->iv_common_cand_tab = NULL;
7719 data->iv_common_cands.release ();
7720 obstack_free (&data->iv_obstack, NULL);
7721 }
7722
7723 /* Returns true if the loop body BODY includes any function calls. */
7724
7725 static bool
7726 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7727 {
7728 gimple_stmt_iterator gsi;
7729 unsigned i;
7730
7731 for (i = 0; i < num_nodes; i++)
7732 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7733 {
7734 gimple *stmt = gsi_stmt (gsi);
7735 if (is_gimple_call (stmt)
7736 && !gimple_call_internal_p (stmt)
7737 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7738 return true;
7739 }
7740 return false;
7741 }
7742
7743 /* Optimizes the LOOP. Returns true if anything changed. */
7744
7745 static bool
7746 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7747 {
7748 bool changed = false;
7749 struct iv_ca *iv_ca;
7750 edge exit = single_dom_exit (loop);
7751 basic_block *body;
7752
7753 gcc_assert (!data->niters);
7754 data->current_loop = loop;
7755 data->loop_loc = find_loop_location (loop);
7756 data->speed = optimize_loop_for_speed_p (loop);
7757
7758 if (dump_file && (dump_flags & TDF_DETAILS))
7759 {
7760 fprintf (dump_file, "Processing loop %d", loop->num);
7761 if (data->loop_loc != UNKNOWN_LOCATION)
7762 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7763 LOCATION_LINE (data->loop_loc));
7764 fprintf (dump_file, "\n");
7765
7766 if (exit)
7767 {
7768 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7769 exit->src->index, exit->dest->index);
7770 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7771 fprintf (dump_file, "\n");
7772 }
7773
7774 fprintf (dump_file, "\n");
7775 }
7776
7777 body = get_loop_body (loop);
7778 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7779 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7780 free (body);
7781
7782 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7783
7784 /* For each ssa name determines whether it behaves as an induction variable
7785 in some loop. */
7786 if (!find_induction_variables (data))
7787 goto finish;
7788
7789 /* Finds interesting uses (item 1). */
7790 find_interesting_uses (data);
7791 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7792 goto finish;
7793
7794 /* Finds candidates for the induction variables (item 2). */
7795 find_iv_candidates (data);
7796
7797 /* Calculates the costs (item 3, part 1). */
7798 determine_iv_costs (data);
7799 determine_group_iv_costs (data);
7800 determine_set_costs (data);
7801
7802 /* Find the optimal set of induction variables (item 3, part 2). */
7803 iv_ca = find_optimal_iv_set (data);
7804 if (!iv_ca)
7805 goto finish;
7806 changed = true;
7807
7808 /* Create the new induction variables (item 4, part 1). */
7809 create_new_ivs (data, iv_ca);
7810 iv_ca_free (&iv_ca);
7811
7812 /* Rewrite the uses (item 4, part 2). */
7813 rewrite_groups (data);
7814
7815 /* Remove the ivs that are unused after rewriting. */
7816 remove_unused_ivs (data);
7817
7818 /* We have changed the structure of induction variables; it might happen
7819 that definitions in the scev database refer to some of them that were
7820 eliminated. */
7821 scev_reset ();
7822
7823 finish:
7824 free_loop_data (data);
7825
7826 return changed;
7827 }
7828
7829 /* Main entry point. Optimizes induction variables in loops. */
7830
7831 void
7832 tree_ssa_iv_optimize (void)
7833 {
7834 struct loop *loop;
7835 struct ivopts_data data;
7836
7837 tree_ssa_iv_optimize_init (&data);
7838
7839 /* Optimize the loops starting with the innermost ones. */
7840 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7841 {
7842 if (dump_file && (dump_flags & TDF_DETAILS))
7843 flow_loop_dump (loop, dump_file, NULL, 1);
7844
7845 tree_ssa_iv_optimize_loop (&data, loop);
7846 }
7847
7848 tree_ssa_iv_optimize_finalize (&data);
7849 }