re PR tree-optimization/81162 (UBSAN switch triggers incorrect optimization in SLSR)
[gcc.git] / gcc / gimple-ssa-strength-reduction.c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "rtl.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "cfghooks.h"
44 #include "tree-pass.h"
45 #include "ssa.h"
46 #include "expmed.h"
47 #include "gimple-pretty-print.h"
48 #include "fold-const.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-ssa-address.h"
57 #include "tree-affine.h"
58 #include "builtins.h"
59 \f
60 /* Information about a strength reduction candidate. Each statement
61 in the candidate table represents an expression of one of the
62 following forms (the special case of CAND_REF will be described
63 later):
64
65 (CAND_MULT) S1: X = (B + i) * S
66 (CAND_ADD) S1: X = B + (i * S)
67
68 Here X and B are SSA names, i is an integer constant, and S is
69 either an SSA name or a constant. We call B the "base," i the
70 "index", and S the "stride."
71
72 Any statement S0 that dominates S1 and is of the form:
73
74 (CAND_MULT) S0: Y = (B + i') * S
75 (CAND_ADD) S0: Y = B + (i' * S)
76
77 is called a "basis" for S1. In both cases, S1 may be replaced by
78
79 S1': X = Y + (i - i') * S,
80
81 where (i - i') * S is folded to the extent possible.
82
83 All gimple statements are visited in dominator order, and each
84 statement that may contribute to one of the forms of S1 above is
85 given at least one entry in the candidate table. Such statements
86 include addition, pointer addition, subtraction, multiplication,
87 negation, copies, and nontrivial type casts. If a statement may
88 represent more than one expression of the forms of S1 above,
89 multiple "interpretations" are stored in the table and chained
90 together. Examples:
91
92 * An add of two SSA names may treat either operand as the base.
93 * A multiply of two SSA names, likewise.
94 * A copy or cast may be thought of as either a CAND_MULT with
95 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96
97 Candidate records are allocated from an obstack. They are addressed
98 both from a hash table keyed on S1, and from a vector of candidate
99 pointers arranged in predominator order.
100
101 Opportunity note
102 ----------------
103 Currently we don't recognize:
104
105 S0: Y = (S * i') - B
106 S1: X = (S * i) - B
107
108 as a strength reduction opportunity, even though this S1 would
109 also be replaceable by the S1' above. This can be added if it
110 comes up in practice.
111
112 Strength reduction in addressing
113 --------------------------------
114 There is another kind of candidate known as CAND_REF. A CAND_REF
115 describes a statement containing a memory reference having
116 complex addressing that might benefit from strength reduction.
117 Specifically, we are interested in references for which
118 get_inner_reference returns a base address, offset, and bitpos as
119 follows:
120
121 base: MEM_REF (T1, C1)
122 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
123 bitpos: C4 * BITS_PER_UNIT
124
125 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
126 arbitrary integer constants. Note that C2 may be zero, in which
127 case the offset will be MULT_EXPR (T2, C3).
128
129 When this pattern is recognized, the original memory reference
130 can be replaced with:
131
132 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133 C1 + (C2 * C3) + C4)
134
135 which distributes the multiply to allow constant folding. When
136 two or more addressing expressions can be represented by MEM_REFs
137 of this form, differing only in the constants C1, C2, and C4,
138 making this substitution produces more efficient addressing during
139 the RTL phases. When there are not at least two expressions with
140 the same values of T1, T2, and C3, there is nothing to be gained
141 by the replacement.
142
143 Strength reduction of CAND_REFs uses the same infrastructure as
144 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
145 field, MULT_EXPR (T2, C3) in the stride (S) field, and
146 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
147 is thus another CAND_REF with the same B and S values. When at
148 least two CAND_REFs are chained together using the basis relation,
149 each of them is replaced as above, resulting in improved code
150 generation for addressing.
151
152 Conditional candidates
153 ======================
154
155 Conditional candidates are best illustrated with an example.
156 Consider the code sequence:
157
158 (1) x_0 = ...;
159 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
160 if (...)
161 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
162 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
163 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
164 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
165
166 Here strength reduction is complicated by the uncertain value of x_2.
167 A legitimate transformation is:
168
169 (1) x_0 = ...;
170 (2) a_0 = x_0 * 5;
171 if (...)
172 {
173 (3) [x_1 = x_0 + 1;]
174 (3a) t_1 = a_0 + 5;
175 }
176 (4) [x_2 = PHI <x_0, x_1>;]
177 (4a) t_2 = PHI <a_0, t_1>;
178 (5) [x_3 = x_2 + 1;]
179 (6r) a_1 = t_2 + 5;
180
181 where the bracketed instructions may go dead.
182
183 To recognize this opportunity, we have to observe that statement (6)
184 has a "hidden basis" (2). The hidden basis is unlike a normal basis
185 in that the statement and the hidden basis have different base SSA
186 names (x_2 and x_0, respectively). The relationship is established
187 when a statement's base name (x_2) is defined by a phi statement (4),
188 each argument of which (x_0, x_1) has an identical "derived base name."
189 If the argument is defined by a candidate (as x_1 is by (3)) that is a
190 CAND_ADD having a stride of 1, the derived base name of the argument is
191 the base name of the candidate (x_0). Otherwise, the argument itself
192 is its derived base name (as is the case with argument x_0).
193
194 The hidden basis for statement (6) is the nearest dominating candidate
195 whose base name is the derived base name (x_0) of the feeding phi (4),
196 and whose stride is identical to that of the statement. We can then
197 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
198 allowing the final replacement of (6) by the strength-reduced (6r).
199
200 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
201 A CAND_PHI is not a candidate for replacement, but is maintained in the
202 candidate table to ease discovery of hidden bases. Any phi statement
203 whose arguments share a common derived base name is entered into the
204 table with the derived base name, an (arbitrary) index of zero, and a
205 stride of 1. A statement with a hidden basis can then be detected by
206 simply looking up its feeding phi definition in the candidate table,
207 extracting the derived base name, and searching for a basis in the
208 usual manner after substituting the derived base name.
209
210 Note that the transformation is only valid when the original phi and
211 the statements that define the phi's arguments are all at the same
212 position in the loop hierarchy. */
213
214
215 /* Index into the candidate vector, offset by 1. VECs are zero-based,
216 while cand_idx's are one-based, with zero indicating null. */
217 typedef unsigned cand_idx;
218
219 /* The kind of candidate. */
220 enum cand_kind
221 {
222 CAND_MULT,
223 CAND_ADD,
224 CAND_REF,
225 CAND_PHI
226 };
227
228 struct slsr_cand_d
229 {
230 /* The candidate statement S1. */
231 gimple *cand_stmt;
232
233 /* The base expression B: often an SSA name, but not always. */
234 tree base_expr;
235
236 /* The stride S. */
237 tree stride;
238
239 /* The index constant i. */
240 widest_int index;
241
242 /* The type of the candidate. This is normally the type of base_expr,
243 but casts may have occurred when combining feeding instructions.
244 A candidate can only be a basis for candidates of the same final type.
245 (For CAND_REFs, this is the type to be used for operand 1 of the
246 replacement MEM_REF.) */
247 tree cand_type;
248
249 /* The type to be used to interpret the stride field when the stride
250 is not a constant. Normally the same as the type of the recorded
251 stride, but when the stride has been cast we need to maintain that
252 knowledge in order to make legal substitutions without losing
253 precision. When the stride is a constant, this will be sizetype. */
254 tree stride_type;
255
256 /* The kind of candidate (CAND_MULT, etc.). */
257 enum cand_kind kind;
258
259 /* Index of this candidate in the candidate vector. */
260 cand_idx cand_num;
261
262 /* Index of the next candidate record for the same statement.
263 A statement may be useful in more than one way (e.g., due to
264 commutativity). So we can have multiple "interpretations"
265 of a statement. */
266 cand_idx next_interp;
267
268 /* Index of the basis statement S0, if any, in the candidate vector. */
269 cand_idx basis;
270
271 /* First candidate for which this candidate is a basis, if one exists. */
272 cand_idx dependent;
273
274 /* Next candidate having the same basis as this one. */
275 cand_idx sibling;
276
277 /* If this is a conditional candidate, the CAND_PHI candidate
278 that defines the base SSA name B. */
279 cand_idx def_phi;
280
281 /* Savings that can be expected from eliminating dead code if this
282 candidate is replaced. */
283 int dead_savings;
284 };
285
286 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
287 typedef const struct slsr_cand_d *const_slsr_cand_t;
288
289 /* Pointers to candidates are chained together as part of a mapping
290 from base expressions to the candidates that use them. */
291
292 struct cand_chain_d
293 {
294 /* Base expression for the chain of candidates: often, but not
295 always, an SSA name. */
296 tree base_expr;
297
298 /* Pointer to a candidate. */
299 slsr_cand_t cand;
300
301 /* Chain pointer. */
302 struct cand_chain_d *next;
303
304 };
305
306 typedef struct cand_chain_d cand_chain, *cand_chain_t;
307 typedef const struct cand_chain_d *const_cand_chain_t;
308
309 /* Information about a unique "increment" associated with candidates
310 having an SSA name for a stride. An increment is the difference
311 between the index of the candidate and the index of its basis,
312 i.e., (i - i') as discussed in the module commentary.
313
314 When we are not going to generate address arithmetic we treat
315 increments that differ only in sign as the same, allowing sharing
316 of the cost of initializers. The absolute value of the increment
317 is stored in the incr_info. */
318
319 struct incr_info_d
320 {
321 /* The increment that relates a candidate to its basis. */
322 widest_int incr;
323
324 /* How many times the increment occurs in the candidate tree. */
325 unsigned count;
326
327 /* Cost of replacing candidates using this increment. Negative and
328 zero costs indicate replacement should be performed. */
329 int cost;
330
331 /* If this increment is profitable but is not -1, 0, or 1, it requires
332 an initializer T_0 = stride * incr to be found or introduced in the
333 nearest common dominator of all candidates. This field holds T_0
334 for subsequent use. */
335 tree initializer;
336
337 /* If the initializer was found to already exist, this is the block
338 where it was found. */
339 basic_block init_bb;
340 };
341
342 typedef struct incr_info_d incr_info, *incr_info_t;
343
344 /* Candidates are maintained in a vector. If candidate X dominates
345 candidate Y, then X appears before Y in the vector; but the
346 converse does not necessarily hold. */
347 static vec<slsr_cand_t> cand_vec;
348
349 enum cost_consts
350 {
351 COST_NEUTRAL = 0,
352 COST_INFINITE = 1000
353 };
354
355 enum stride_status
356 {
357 UNKNOWN_STRIDE = 0,
358 KNOWN_STRIDE = 1
359 };
360
361 enum phi_adjust_status
362 {
363 NOT_PHI_ADJUST = 0,
364 PHI_ADJUST = 1
365 };
366
367 enum count_phis_status
368 {
369 DONT_COUNT_PHIS = 0,
370 COUNT_PHIS = 1
371 };
372
373 /* Pointer map embodying a mapping from statements to candidates. */
374 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
375
376 /* Obstack for candidates. */
377 static struct obstack cand_obstack;
378
379 /* Obstack for candidate chains. */
380 static struct obstack chain_obstack;
381
382 /* An array INCR_VEC of incr_infos is used during analysis of related
383 candidates having an SSA name for a stride. INCR_VEC_LEN describes
384 its current length. MAX_INCR_VEC_LEN is used to avoid costly
385 pathological cases. */
386 static incr_info_t incr_vec;
387 static unsigned incr_vec_len;
388 const int MAX_INCR_VEC_LEN = 16;
389
390 /* For a chain of candidates with unknown stride, indicates whether or not
391 we must generate pointer arithmetic when replacing statements. */
392 static bool address_arithmetic_p;
393
394 /* Forward function declarations. */
395 static slsr_cand_t base_cand_from_table (tree);
396 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
397 static bool legal_cast_p_1 (tree, tree);
398 \f
399 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
400
401 static slsr_cand_t
402 lookup_cand (cand_idx idx)
403 {
404 return cand_vec[idx - 1];
405 }
406
407 /* Helper for hashing a candidate chain header. */
408
409 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
410 {
411 static inline hashval_t hash (const cand_chain *);
412 static inline bool equal (const cand_chain *, const cand_chain *);
413 };
414
415 inline hashval_t
416 cand_chain_hasher::hash (const cand_chain *p)
417 {
418 tree base_expr = p->base_expr;
419 return iterative_hash_expr (base_expr, 0);
420 }
421
422 inline bool
423 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
424 {
425 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
426 }
427
428 /* Hash table embodying a mapping from base exprs to chains of candidates. */
429 static hash_table<cand_chain_hasher> *base_cand_map;
430 \f
431 /* Pointer map used by tree_to_aff_combination_expand. */
432 static hash_map<tree, name_expansion *> *name_expansions;
433 /* Pointer map embodying a mapping from bases to alternative bases. */
434 static hash_map<tree, tree> *alt_base_map;
435
436 /* Given BASE, use the tree affine combiniation facilities to
437 find the underlying tree expression for BASE, with any
438 immediate offset excluded.
439
440 N.B. we should eliminate this backtracking with better forward
441 analysis in a future release. */
442
443 static tree
444 get_alternative_base (tree base)
445 {
446 tree *result = alt_base_map->get (base);
447
448 if (result == NULL)
449 {
450 tree expr;
451 aff_tree aff;
452
453 tree_to_aff_combination_expand (base, TREE_TYPE (base),
454 &aff, &name_expansions);
455 aff.offset = 0;
456 expr = aff_combination_to_tree (&aff);
457
458 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
459
460 return expr == base ? NULL : expr;
461 }
462
463 return *result;
464 }
465
466 /* Look in the candidate table for a CAND_PHI that defines BASE and
467 return it if found; otherwise return NULL. */
468
469 static cand_idx
470 find_phi_def (tree base)
471 {
472 slsr_cand_t c;
473
474 if (TREE_CODE (base) != SSA_NAME)
475 return 0;
476
477 c = base_cand_from_table (base);
478
479 if (!c || c->kind != CAND_PHI)
480 return 0;
481
482 return c->cand_num;
483 }
484
485 /* Determine whether all uses of NAME are directly or indirectly
486 used by STMT. That is, we want to know whether if STMT goes
487 dead, the definition of NAME also goes dead. */
488 static bool
489 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
490 {
491 gimple *use_stmt;
492 imm_use_iterator iter;
493 bool retval = true;
494
495 FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
496 {
497 if (use_stmt == stmt || is_gimple_debug (use_stmt))
498 continue;
499
500 if (!is_gimple_assign (use_stmt)
501 || !gimple_get_lhs (use_stmt)
502 || !is_gimple_reg (gimple_get_lhs (use_stmt))
503 || recurse >= 10
504 || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
505 recurse + 1))
506 {
507 retval = false;
508 BREAK_FROM_IMM_USE_STMT (iter);
509 }
510 }
511
512 return retval;
513 }
514
515 /* Helper routine for find_basis_for_candidate. May be called twice:
516 once for the candidate's base expr, and optionally again either for
517 the candidate's phi definition or for a CAND_REF's alternative base
518 expression. */
519
520 static slsr_cand_t
521 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
522 {
523 cand_chain mapping_key;
524 cand_chain_t chain;
525 slsr_cand_t basis = NULL;
526
527 // Limit potential of N^2 behavior for long candidate chains.
528 int iters = 0;
529 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
530
531 mapping_key.base_expr = base_expr;
532 chain = base_cand_map->find (&mapping_key);
533
534 for (; chain && iters < max_iters; chain = chain->next, ++iters)
535 {
536 slsr_cand_t one_basis = chain->cand;
537
538 if (one_basis->kind != c->kind
539 || one_basis->cand_stmt == c->cand_stmt
540 || !operand_equal_p (one_basis->stride, c->stride, 0)
541 || !types_compatible_p (one_basis->cand_type, c->cand_type)
542 || !types_compatible_p (one_basis->stride_type, c->stride_type)
543 || !dominated_by_p (CDI_DOMINATORS,
544 gimple_bb (c->cand_stmt),
545 gimple_bb (one_basis->cand_stmt)))
546 continue;
547
548 if (!basis || basis->cand_num < one_basis->cand_num)
549 basis = one_basis;
550 }
551
552 return basis;
553 }
554
555 /* Use the base expr from candidate C to look for possible candidates
556 that can serve as a basis for C. Each potential basis must also
557 appear in a block that dominates the candidate statement and have
558 the same stride and type. If more than one possible basis exists,
559 the one with highest index in the vector is chosen; this will be
560 the most immediately dominating basis. */
561
562 static int
563 find_basis_for_candidate (slsr_cand_t c)
564 {
565 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
566
567 /* If a candidate doesn't have a basis using its base expression,
568 it may have a basis hidden by one or more intervening phis. */
569 if (!basis && c->def_phi)
570 {
571 basic_block basis_bb, phi_bb;
572 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
573 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
574
575 if (basis)
576 {
577 /* A hidden basis must dominate the phi-definition of the
578 candidate's base name. */
579 phi_bb = gimple_bb (phi_cand->cand_stmt);
580 basis_bb = gimple_bb (basis->cand_stmt);
581
582 if (phi_bb == basis_bb
583 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
584 {
585 basis = NULL;
586 c->basis = 0;
587 }
588
589 /* If we found a hidden basis, estimate additional dead-code
590 savings if the phi and its feeding statements can be removed. */
591 tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
592 if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
593 c->dead_savings += phi_cand->dead_savings;
594 }
595 }
596
597 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
598 {
599 tree alt_base_expr = get_alternative_base (c->base_expr);
600 if (alt_base_expr)
601 basis = find_basis_for_base_expr (c, alt_base_expr);
602 }
603
604 if (basis)
605 {
606 c->sibling = basis->dependent;
607 basis->dependent = c->cand_num;
608 return basis->cand_num;
609 }
610
611 return 0;
612 }
613
614 /* Record a mapping from BASE to C, indicating that C may potentially serve
615 as a basis using that base expression. BASE may be the same as
616 C->BASE_EXPR; alternatively BASE can be a different tree that share the
617 underlining expression of C->BASE_EXPR. */
618
619 static void
620 record_potential_basis (slsr_cand_t c, tree base)
621 {
622 cand_chain_t node;
623 cand_chain **slot;
624
625 gcc_assert (base);
626
627 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
628 node->base_expr = base;
629 node->cand = c;
630 node->next = NULL;
631 slot = base_cand_map->find_slot (node, INSERT);
632
633 if (*slot)
634 {
635 cand_chain_t head = (cand_chain_t) (*slot);
636 node->next = head->next;
637 head->next = node;
638 }
639 else
640 *slot = node;
641 }
642
643 /* Allocate storage for a new candidate and initialize its fields.
644 Attempt to find a basis for the candidate.
645
646 For CAND_REF, an alternative base may also be recorded and used
647 to find a basis. This helps cases where the expression hidden
648 behind BASE (which is usually an SSA_NAME) has immediate offset,
649 e.g.
650
651 a2[i][j] = 1;
652 a2[i + 20][j] = 2; */
653
654 static slsr_cand_t
655 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
656 const widest_int &index, tree stride, tree ctype,
657 tree stype, unsigned savings)
658 {
659 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
660 sizeof (slsr_cand));
661 c->cand_stmt = gs;
662 c->base_expr = base;
663 c->stride = stride;
664 c->index = index;
665 c->cand_type = ctype;
666 c->stride_type = stype;
667 c->kind = kind;
668 c->cand_num = cand_vec.length () + 1;
669 c->next_interp = 0;
670 c->dependent = 0;
671 c->sibling = 0;
672 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
673 c->dead_savings = savings;
674
675 cand_vec.safe_push (c);
676
677 if (kind == CAND_PHI)
678 c->basis = 0;
679 else
680 c->basis = find_basis_for_candidate (c);
681
682 record_potential_basis (c, base);
683 if (flag_expensive_optimizations && kind == CAND_REF)
684 {
685 tree alt_base = get_alternative_base (base);
686 if (alt_base)
687 record_potential_basis (c, alt_base);
688 }
689
690 return c;
691 }
692
693 /* Determine the target cost of statement GS when compiling according
694 to SPEED. */
695
696 static int
697 stmt_cost (gimple *gs, bool speed)
698 {
699 tree lhs, rhs1, rhs2;
700 machine_mode lhs_mode;
701
702 gcc_assert (is_gimple_assign (gs));
703 lhs = gimple_assign_lhs (gs);
704 rhs1 = gimple_assign_rhs1 (gs);
705 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
706
707 switch (gimple_assign_rhs_code (gs))
708 {
709 case MULT_EXPR:
710 rhs2 = gimple_assign_rhs2 (gs);
711
712 if (tree_fits_shwi_p (rhs2))
713 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
714
715 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
716 return mul_cost (speed, lhs_mode);
717
718 case PLUS_EXPR:
719 case POINTER_PLUS_EXPR:
720 case MINUS_EXPR:
721 return add_cost (speed, lhs_mode);
722
723 case NEGATE_EXPR:
724 return neg_cost (speed, lhs_mode);
725
726 CASE_CONVERT:
727 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
728
729 /* Note that we don't assign costs to copies that in most cases
730 will go away. */
731 case SSA_NAME:
732 return 0;
733
734 default:
735 ;
736 }
737
738 gcc_unreachable ();
739 return 0;
740 }
741
742 /* Look up the defining statement for BASE_IN and return a pointer
743 to its candidate in the candidate table, if any; otherwise NULL.
744 Only CAND_ADD and CAND_MULT candidates are returned. */
745
746 static slsr_cand_t
747 base_cand_from_table (tree base_in)
748 {
749 slsr_cand_t *result;
750
751 gimple *def = SSA_NAME_DEF_STMT (base_in);
752 if (!def)
753 return (slsr_cand_t) NULL;
754
755 result = stmt_cand_map->get (def);
756
757 if (result && (*result)->kind != CAND_REF)
758 return *result;
759
760 return (slsr_cand_t) NULL;
761 }
762
763 /* Add an entry to the statement-to-candidate mapping. */
764
765 static void
766 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
767 {
768 gcc_assert (!stmt_cand_map->put (gs, c));
769 }
770 \f
771 /* Given PHI which contains a phi statement, determine whether it
772 satisfies all the requirements of a phi candidate. If so, create
773 a candidate. Note that a CAND_PHI never has a basis itself, but
774 is used to help find a basis for subsequent candidates. */
775
776 static void
777 slsr_process_phi (gphi *phi, bool speed)
778 {
779 unsigned i;
780 tree arg0_base = NULL_TREE, base_type;
781 slsr_cand_t c;
782 struct loop *cand_loop = gimple_bb (phi)->loop_father;
783 unsigned savings = 0;
784
785 /* A CAND_PHI requires each of its arguments to have the same
786 derived base name. (See the module header commentary for a
787 definition of derived base names.) Furthermore, all feeding
788 definitions must be in the same position in the loop hierarchy
789 as PHI. */
790
791 for (i = 0; i < gimple_phi_num_args (phi); i++)
792 {
793 slsr_cand_t arg_cand;
794 tree arg = gimple_phi_arg_def (phi, i);
795 tree derived_base_name = NULL_TREE;
796 gimple *arg_stmt = NULL;
797 basic_block arg_bb = NULL;
798
799 if (TREE_CODE (arg) != SSA_NAME)
800 return;
801
802 arg_cand = base_cand_from_table (arg);
803
804 if (arg_cand)
805 {
806 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
807 {
808 if (!arg_cand->next_interp)
809 return;
810
811 arg_cand = lookup_cand (arg_cand->next_interp);
812 }
813
814 if (!integer_onep (arg_cand->stride))
815 return;
816
817 derived_base_name = arg_cand->base_expr;
818 arg_stmt = arg_cand->cand_stmt;
819 arg_bb = gimple_bb (arg_stmt);
820
821 /* Gather potential dead code savings if the phi statement
822 can be removed later on. */
823 if (uses_consumed_by_stmt (arg, phi))
824 {
825 if (gimple_code (arg_stmt) == GIMPLE_PHI)
826 savings += arg_cand->dead_savings;
827 else
828 savings += stmt_cost (arg_stmt, speed);
829 }
830 }
831 else if (SSA_NAME_IS_DEFAULT_DEF (arg))
832 {
833 derived_base_name = arg;
834 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
835 }
836
837 if (!arg_bb || arg_bb->loop_father != cand_loop)
838 return;
839
840 if (i == 0)
841 arg0_base = derived_base_name;
842 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
843 return;
844 }
845
846 /* Create the candidate. "alloc_cand_and_find_basis" is named
847 misleadingly for this case, as no basis will be sought for a
848 CAND_PHI. */
849 base_type = TREE_TYPE (arg0_base);
850
851 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
852 0, integer_one_node, base_type,
853 sizetype, savings);
854
855 /* Add the candidate to the statement-candidate mapping. */
856 add_cand_for_stmt (phi, c);
857 }
858
859 /* Given PBASE which is a pointer to tree, look up the defining
860 statement for it and check whether the candidate is in the
861 form of:
862
863 X = B + (1 * S), S is integer constant
864 X = B + (i * S), S is integer one
865
866 If so, set PBASE to the candidate's base_expr and return double
867 int (i * S).
868 Otherwise, just return double int zero. */
869
870 static widest_int
871 backtrace_base_for_ref (tree *pbase)
872 {
873 tree base_in = *pbase;
874 slsr_cand_t base_cand;
875
876 STRIP_NOPS (base_in);
877
878 /* Strip off widening conversion(s) to handle cases where
879 e.g. 'B' is widened from an 'int' in order to calculate
880 a 64-bit address. */
881 if (CONVERT_EXPR_P (base_in)
882 && legal_cast_p_1 (TREE_TYPE (base_in),
883 TREE_TYPE (TREE_OPERAND (base_in, 0))))
884 base_in = get_unwidened (base_in, NULL_TREE);
885
886 if (TREE_CODE (base_in) != SSA_NAME)
887 return 0;
888
889 base_cand = base_cand_from_table (base_in);
890
891 while (base_cand && base_cand->kind != CAND_PHI)
892 {
893 if (base_cand->kind == CAND_ADD
894 && base_cand->index == 1
895 && TREE_CODE (base_cand->stride) == INTEGER_CST)
896 {
897 /* X = B + (1 * S), S is integer constant. */
898 *pbase = base_cand->base_expr;
899 return wi::to_widest (base_cand->stride);
900 }
901 else if (base_cand->kind == CAND_ADD
902 && TREE_CODE (base_cand->stride) == INTEGER_CST
903 && integer_onep (base_cand->stride))
904 {
905 /* X = B + (i * S), S is integer one. */
906 *pbase = base_cand->base_expr;
907 return base_cand->index;
908 }
909
910 if (base_cand->next_interp)
911 base_cand = lookup_cand (base_cand->next_interp);
912 else
913 base_cand = NULL;
914 }
915
916 return 0;
917 }
918
919 /* Look for the following pattern:
920
921 *PBASE: MEM_REF (T1, C1)
922
923 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
924 or
925 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
926 or
927 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
928
929 *PINDEX: C4 * BITS_PER_UNIT
930
931 If not present, leave the input values unchanged and return FALSE.
932 Otherwise, modify the input values as follows and return TRUE:
933
934 *PBASE: T1
935 *POFFSET: MULT_EXPR (T2, C3)
936 *PINDEX: C1 + (C2 * C3) + C4
937
938 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
939 will be further restructured to:
940
941 *PBASE: T1
942 *POFFSET: MULT_EXPR (T2', C3)
943 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
944
945 static bool
946 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
947 tree *ptype)
948 {
949 tree base = *pbase, offset = *poffset;
950 widest_int index = *pindex;
951 tree mult_op0, t1, t2, type;
952 widest_int c1, c2, c3, c4, c5;
953
954 if (!base
955 || !offset
956 || TREE_CODE (base) != MEM_REF
957 || TREE_CODE (offset) != MULT_EXPR
958 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
959 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
960 return false;
961
962 t1 = TREE_OPERAND (base, 0);
963 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
964 type = TREE_TYPE (TREE_OPERAND (base, 1));
965
966 mult_op0 = TREE_OPERAND (offset, 0);
967 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
968
969 if (TREE_CODE (mult_op0) == PLUS_EXPR)
970
971 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
972 {
973 t2 = TREE_OPERAND (mult_op0, 0);
974 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
975 }
976 else
977 return false;
978
979 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
980
981 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
982 {
983 t2 = TREE_OPERAND (mult_op0, 0);
984 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
985 }
986 else
987 return false;
988
989 else
990 {
991 t2 = mult_op0;
992 c2 = 0;
993 }
994
995 c4 = index >> LOG2_BITS_PER_UNIT;
996 c5 = backtrace_base_for_ref (&t2);
997
998 *pbase = t1;
999 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1000 wide_int_to_tree (sizetype, c3));
1001 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1002 *ptype = type;
1003
1004 return true;
1005 }
1006
1007 /* Given GS which contains a data reference, create a CAND_REF entry in
1008 the candidate table and attempt to find a basis. */
1009
1010 static void
1011 slsr_process_ref (gimple *gs)
1012 {
1013 tree ref_expr, base, offset, type;
1014 HOST_WIDE_INT bitsize, bitpos;
1015 machine_mode mode;
1016 int unsignedp, reversep, volatilep;
1017 slsr_cand_t c;
1018
1019 if (gimple_vdef (gs))
1020 ref_expr = gimple_assign_lhs (gs);
1021 else
1022 ref_expr = gimple_assign_rhs1 (gs);
1023
1024 if (!handled_component_p (ref_expr)
1025 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1026 || (TREE_CODE (ref_expr) == COMPONENT_REF
1027 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1028 return;
1029
1030 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1031 &unsignedp, &reversep, &volatilep);
1032 if (reversep)
1033 return;
1034 widest_int index = bitpos;
1035
1036 if (!restructure_reference (&base, &offset, &index, &type))
1037 return;
1038
1039 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1040 type, sizetype, 0);
1041
1042 /* Add the candidate to the statement-candidate mapping. */
1043 add_cand_for_stmt (gs, c);
1044 }
1045
1046 /* Create a candidate entry for a statement GS, where GS multiplies
1047 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1048 about the two SSA names into the new candidate. Return the new
1049 candidate. */
1050
1051 static slsr_cand_t
1052 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1053 {
1054 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1055 tree stype = NULL_TREE;
1056 widest_int index;
1057 unsigned savings = 0;
1058 slsr_cand_t c;
1059 slsr_cand_t base_cand = base_cand_from_table (base_in);
1060
1061 /* Look at all interpretations of the base candidate, if necessary,
1062 to find information to propagate into this candidate. */
1063 while (base_cand && !base && base_cand->kind != CAND_PHI)
1064 {
1065
1066 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1067 {
1068 /* Y = (B + i') * 1
1069 X = Y * Z
1070 ================
1071 X = (B + i') * Z */
1072 base = base_cand->base_expr;
1073 index = base_cand->index;
1074 stride = stride_in;
1075 ctype = base_cand->cand_type;
1076 stype = TREE_TYPE (stride_in);
1077 if (has_single_use (base_in))
1078 savings = (base_cand->dead_savings
1079 + stmt_cost (base_cand->cand_stmt, speed));
1080 }
1081 else if (base_cand->kind == CAND_ADD
1082 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1083 {
1084 /* Y = B + (i' * S), S constant
1085 X = Y * Z
1086 ============================
1087 X = B + ((i' * S) * Z) */
1088 base = base_cand->base_expr;
1089 index = base_cand->index * wi::to_widest (base_cand->stride);
1090 stride = stride_in;
1091 ctype = base_cand->cand_type;
1092 stype = TREE_TYPE (stride_in);
1093 if (has_single_use (base_in))
1094 savings = (base_cand->dead_savings
1095 + stmt_cost (base_cand->cand_stmt, speed));
1096 }
1097
1098 if (base_cand->next_interp)
1099 base_cand = lookup_cand (base_cand->next_interp);
1100 else
1101 base_cand = NULL;
1102 }
1103
1104 if (!base)
1105 {
1106 /* No interpretations had anything useful to propagate, so
1107 produce X = (Y + 0) * Z. */
1108 base = base_in;
1109 index = 0;
1110 stride = stride_in;
1111 ctype = TREE_TYPE (base_in);
1112 stype = TREE_TYPE (stride_in);
1113 }
1114
1115 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1116 ctype, stype, savings);
1117 return c;
1118 }
1119
1120 /* Create a candidate entry for a statement GS, where GS multiplies
1121 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1122 information about BASE_IN into the new candidate. Return the new
1123 candidate. */
1124
1125 static slsr_cand_t
1126 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1127 {
1128 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1129 widest_int index, temp;
1130 unsigned savings = 0;
1131 slsr_cand_t c;
1132 slsr_cand_t base_cand = base_cand_from_table (base_in);
1133
1134 /* Look at all interpretations of the base candidate, if necessary,
1135 to find information to propagate into this candidate. */
1136 while (base_cand && !base && base_cand->kind != CAND_PHI)
1137 {
1138 if (base_cand->kind == CAND_MULT
1139 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1140 {
1141 /* Y = (B + i') * S, S constant
1142 X = Y * c
1143 ============================
1144 X = (B + i') * (S * c) */
1145 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1146 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1147 {
1148 base = base_cand->base_expr;
1149 index = base_cand->index;
1150 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1151 ctype = base_cand->cand_type;
1152 if (has_single_use (base_in))
1153 savings = (base_cand->dead_savings
1154 + stmt_cost (base_cand->cand_stmt, speed));
1155 }
1156 }
1157 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1158 {
1159 /* Y = B + (i' * 1)
1160 X = Y * c
1161 ===========================
1162 X = (B + i') * c */
1163 base = base_cand->base_expr;
1164 index = base_cand->index;
1165 stride = stride_in;
1166 ctype = base_cand->cand_type;
1167 if (has_single_use (base_in))
1168 savings = (base_cand->dead_savings
1169 + stmt_cost (base_cand->cand_stmt, speed));
1170 }
1171 else if (base_cand->kind == CAND_ADD
1172 && base_cand->index == 1
1173 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1174 {
1175 /* Y = B + (1 * S), S constant
1176 X = Y * c
1177 ===========================
1178 X = (B + S) * c */
1179 base = base_cand->base_expr;
1180 index = wi::to_widest (base_cand->stride);
1181 stride = stride_in;
1182 ctype = base_cand->cand_type;
1183 if (has_single_use (base_in))
1184 savings = (base_cand->dead_savings
1185 + stmt_cost (base_cand->cand_stmt, speed));
1186 }
1187
1188 if (base_cand->next_interp)
1189 base_cand = lookup_cand (base_cand->next_interp);
1190 else
1191 base_cand = NULL;
1192 }
1193
1194 if (!base)
1195 {
1196 /* No interpretations had anything useful to propagate, so
1197 produce X = (Y + 0) * c. */
1198 base = base_in;
1199 index = 0;
1200 stride = stride_in;
1201 ctype = TREE_TYPE (base_in);
1202 }
1203
1204 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1205 ctype, sizetype, savings);
1206 return c;
1207 }
1208
1209 /* Given GS which is a multiply of scalar integers, make an appropriate
1210 entry in the candidate table. If this is a multiply of two SSA names,
1211 create two CAND_MULT interpretations and attempt to find a basis for
1212 each of them. Otherwise, create a single CAND_MULT and attempt to
1213 find a basis. */
1214
1215 static void
1216 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1217 {
1218 slsr_cand_t c, c2;
1219
1220 /* If this is a multiply of an SSA name with itself, it is highly
1221 unlikely that we will get a strength reduction opportunity, so
1222 don't record it as a candidate. This simplifies the logic for
1223 finding a basis, so if this is removed that must be considered. */
1224 if (rhs1 == rhs2)
1225 return;
1226
1227 if (TREE_CODE (rhs2) == SSA_NAME)
1228 {
1229 /* Record an interpretation of this statement in the candidate table
1230 assuming RHS1 is the base expression and RHS2 is the stride. */
1231 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1232
1233 /* Add the first interpretation to the statement-candidate mapping. */
1234 add_cand_for_stmt (gs, c);
1235
1236 /* Record another interpretation of this statement assuming RHS1
1237 is the stride and RHS2 is the base expression. */
1238 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1239 c->next_interp = c2->cand_num;
1240 }
1241 else
1242 {
1243 /* Record an interpretation for the multiply-immediate. */
1244 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1245
1246 /* Add the interpretation to the statement-candidate mapping. */
1247 add_cand_for_stmt (gs, c);
1248 }
1249 }
1250
1251 /* Create a candidate entry for a statement GS, where GS adds two
1252 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1253 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1254 information about the two SSA names into the new candidate.
1255 Return the new candidate. */
1256
1257 static slsr_cand_t
1258 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1259 bool subtract_p, bool speed)
1260 {
1261 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1262 tree stype = NULL_TREE;
1263 widest_int index;
1264 unsigned savings = 0;
1265 slsr_cand_t c;
1266 slsr_cand_t base_cand = base_cand_from_table (base_in);
1267 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1268
1269 /* The most useful transformation is a multiply-immediate feeding
1270 an add or subtract. Look for that first. */
1271 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1272 {
1273 if (addend_cand->kind == CAND_MULT
1274 && addend_cand->index == 0
1275 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1276 {
1277 /* Z = (B + 0) * S, S constant
1278 X = Y +/- Z
1279 ===========================
1280 X = Y + ((+/-1 * S) * B) */
1281 base = base_in;
1282 index = wi::to_widest (addend_cand->stride);
1283 if (subtract_p)
1284 index = -index;
1285 stride = addend_cand->base_expr;
1286 ctype = TREE_TYPE (base_in);
1287 stype = addend_cand->cand_type;
1288 if (has_single_use (addend_in))
1289 savings = (addend_cand->dead_savings
1290 + stmt_cost (addend_cand->cand_stmt, speed));
1291 }
1292
1293 if (addend_cand->next_interp)
1294 addend_cand = lookup_cand (addend_cand->next_interp);
1295 else
1296 addend_cand = NULL;
1297 }
1298
1299 while (base_cand && !base && base_cand->kind != CAND_PHI)
1300 {
1301 if (base_cand->kind == CAND_ADD
1302 && (base_cand->index == 0
1303 || operand_equal_p (base_cand->stride,
1304 integer_zero_node, 0)))
1305 {
1306 /* Y = B + (i' * S), i' * S = 0
1307 X = Y +/- Z
1308 ============================
1309 X = B + (+/-1 * Z) */
1310 base = base_cand->base_expr;
1311 index = subtract_p ? -1 : 1;
1312 stride = addend_in;
1313 ctype = base_cand->cand_type;
1314 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1315 : TREE_TYPE (addend_in));
1316 if (has_single_use (base_in))
1317 savings = (base_cand->dead_savings
1318 + stmt_cost (base_cand->cand_stmt, speed));
1319 }
1320 else if (subtract_p)
1321 {
1322 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1323
1324 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1325 {
1326 if (subtrahend_cand->kind == CAND_MULT
1327 && subtrahend_cand->index == 0
1328 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1329 {
1330 /* Z = (B + 0) * S, S constant
1331 X = Y - Z
1332 ===========================
1333 Value: X = Y + ((-1 * S) * B) */
1334 base = base_in;
1335 index = wi::to_widest (subtrahend_cand->stride);
1336 index = -index;
1337 stride = subtrahend_cand->base_expr;
1338 ctype = TREE_TYPE (base_in);
1339 stype = subtrahend_cand->cand_type;
1340 if (has_single_use (addend_in))
1341 savings = (subtrahend_cand->dead_savings
1342 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1343 }
1344
1345 if (subtrahend_cand->next_interp)
1346 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1347 else
1348 subtrahend_cand = NULL;
1349 }
1350 }
1351
1352 if (base_cand->next_interp)
1353 base_cand = lookup_cand (base_cand->next_interp);
1354 else
1355 base_cand = NULL;
1356 }
1357
1358 if (!base)
1359 {
1360 /* No interpretations had anything useful to propagate, so
1361 produce X = Y + (1 * Z). */
1362 base = base_in;
1363 index = subtract_p ? -1 : 1;
1364 stride = addend_in;
1365 ctype = TREE_TYPE (base_in);
1366 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1367 : TREE_TYPE (addend_in));
1368 }
1369
1370 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1371 ctype, stype, savings);
1372 return c;
1373 }
1374
1375 /* Create a candidate entry for a statement GS, where GS adds SSA
1376 name BASE_IN to constant INDEX_IN. Propagate any known information
1377 about BASE_IN into the new candidate. Return the new candidate. */
1378
1379 static slsr_cand_t
1380 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
1381 bool speed)
1382 {
1383 enum cand_kind kind = CAND_ADD;
1384 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1385 tree stype = NULL_TREE;
1386 widest_int index, multiple;
1387 unsigned savings = 0;
1388 slsr_cand_t c;
1389 slsr_cand_t base_cand = base_cand_from_table (base_in);
1390
1391 while (base_cand && !base && base_cand->kind != CAND_PHI)
1392 {
1393 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1394
1395 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1396 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1397 sign, &multiple))
1398 {
1399 /* Y = (B + i') * S, S constant, c = kS for some integer k
1400 X = Y + c
1401 ============================
1402 X = (B + (i'+ k)) * S
1403 OR
1404 Y = B + (i' * S), S constant, c = kS for some integer k
1405 X = Y + c
1406 ============================
1407 X = (B + (i'+ k)) * S */
1408 kind = base_cand->kind;
1409 base = base_cand->base_expr;
1410 index = base_cand->index + multiple;
1411 stride = base_cand->stride;
1412 ctype = base_cand->cand_type;
1413 stype = base_cand->stride_type;
1414 if (has_single_use (base_in))
1415 savings = (base_cand->dead_savings
1416 + stmt_cost (base_cand->cand_stmt, speed));
1417 }
1418
1419 if (base_cand->next_interp)
1420 base_cand = lookup_cand (base_cand->next_interp);
1421 else
1422 base_cand = NULL;
1423 }
1424
1425 if (!base)
1426 {
1427 /* No interpretations had anything useful to propagate, so
1428 produce X = Y + (c * 1). */
1429 kind = CAND_ADD;
1430 base = base_in;
1431 index = index_in;
1432 stride = integer_one_node;
1433 ctype = TREE_TYPE (base_in);
1434 stype = sizetype;
1435 }
1436
1437 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1438 ctype, stype, savings);
1439 return c;
1440 }
1441
1442 /* Given GS which is an add or subtract of scalar integers or pointers,
1443 make at least one appropriate entry in the candidate table. */
1444
1445 static void
1446 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1447 {
1448 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1449 slsr_cand_t c = NULL, c2;
1450
1451 if (TREE_CODE (rhs2) == SSA_NAME)
1452 {
1453 /* First record an interpretation assuming RHS1 is the base expression
1454 and RHS2 is the stride. But it doesn't make sense for the
1455 stride to be a pointer, so don't record a candidate in that case. */
1456 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1457 {
1458 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1459
1460 /* Add the first interpretation to the statement-candidate
1461 mapping. */
1462 add_cand_for_stmt (gs, c);
1463 }
1464
1465 /* If the two RHS operands are identical, or this is a subtract,
1466 we're done. */
1467 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1468 return;
1469
1470 /* Otherwise, record another interpretation assuming RHS2 is the
1471 base expression and RHS1 is the stride, again provided that the
1472 stride is not a pointer. */
1473 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1474 {
1475 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1476 if (c)
1477 c->next_interp = c2->cand_num;
1478 else
1479 add_cand_for_stmt (gs, c2);
1480 }
1481 }
1482 else
1483 {
1484 /* Record an interpretation for the add-immediate. */
1485 widest_int index = wi::to_widest (rhs2);
1486 if (subtract_p)
1487 index = -index;
1488
1489 c = create_add_imm_cand (gs, rhs1, index, speed);
1490
1491 /* Add the interpretation to the statement-candidate mapping. */
1492 add_cand_for_stmt (gs, c);
1493 }
1494 }
1495
1496 /* Given GS which is a negate of a scalar integer, make an appropriate
1497 entry in the candidate table. A negate is equivalent to a multiply
1498 by -1. */
1499
1500 static void
1501 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1502 {
1503 /* Record a CAND_MULT interpretation for the multiply by -1. */
1504 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1505
1506 /* Add the interpretation to the statement-candidate mapping. */
1507 add_cand_for_stmt (gs, c);
1508 }
1509
1510 /* Help function for legal_cast_p, operating on two trees. Checks
1511 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1512 for more details. */
1513
1514 static bool
1515 legal_cast_p_1 (tree lhs_type, tree rhs_type)
1516 {
1517 unsigned lhs_size, rhs_size;
1518 bool lhs_wraps, rhs_wraps;
1519
1520 lhs_size = TYPE_PRECISION (lhs_type);
1521 rhs_size = TYPE_PRECISION (rhs_type);
1522 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1523 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1524
1525 if (lhs_size < rhs_size
1526 || (rhs_wraps && !lhs_wraps)
1527 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1528 return false;
1529
1530 return true;
1531 }
1532
1533 /* Return TRUE if GS is a statement that defines an SSA name from
1534 a conversion and is legal for us to combine with an add and multiply
1535 in the candidate table. For example, suppose we have:
1536
1537 A = B + i;
1538 C = (type) A;
1539 D = C * S;
1540
1541 Without the type-cast, we would create a CAND_MULT for D with base B,
1542 index i, and stride S. We want to record this candidate only if it
1543 is equivalent to apply the type cast following the multiply:
1544
1545 A = B + i;
1546 E = A * S;
1547 D = (type) E;
1548
1549 We will record the type with the candidate for D. This allows us
1550 to use a similar previous candidate as a basis. If we have earlier seen
1551
1552 A' = B + i';
1553 C' = (type) A';
1554 D' = C' * S;
1555
1556 we can replace D with
1557
1558 D = D' + (i - i') * S;
1559
1560 But if moving the type-cast would change semantics, we mustn't do this.
1561
1562 This is legitimate for casts from a non-wrapping integral type to
1563 any integral type of the same or larger size. It is not legitimate
1564 to convert a wrapping type to a non-wrapping type, or to a wrapping
1565 type of a different size. I.e., with a wrapping type, we must
1566 assume that the addition B + i could wrap, in which case performing
1567 the multiply before or after one of the "illegal" type casts will
1568 have different semantics. */
1569
1570 static bool
1571 legal_cast_p (gimple *gs, tree rhs)
1572 {
1573 if (!is_gimple_assign (gs)
1574 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1575 return false;
1576
1577 return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1578 }
1579
1580 /* Given GS which is a cast to a scalar integer type, determine whether
1581 the cast is legal for strength reduction. If so, make at least one
1582 appropriate entry in the candidate table. */
1583
1584 static void
1585 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1586 {
1587 tree lhs, ctype;
1588 slsr_cand_t base_cand, c = NULL, c2;
1589 unsigned savings = 0;
1590
1591 if (!legal_cast_p (gs, rhs1))
1592 return;
1593
1594 lhs = gimple_assign_lhs (gs);
1595 base_cand = base_cand_from_table (rhs1);
1596 ctype = TREE_TYPE (lhs);
1597
1598 if (base_cand && base_cand->kind != CAND_PHI)
1599 {
1600 while (base_cand)
1601 {
1602 /* Propagate all data from the base candidate except the type,
1603 which comes from the cast, and the base candidate's cast,
1604 which is no longer applicable. */
1605 if (has_single_use (rhs1))
1606 savings = (base_cand->dead_savings
1607 + stmt_cost (base_cand->cand_stmt, speed));
1608
1609 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1610 base_cand->base_expr,
1611 base_cand->index, base_cand->stride,
1612 ctype, base_cand->stride_type,
1613 savings);
1614 if (base_cand->next_interp)
1615 base_cand = lookup_cand (base_cand->next_interp);
1616 else
1617 base_cand = NULL;
1618 }
1619 }
1620 else
1621 {
1622 /* If nothing is known about the RHS, create fresh CAND_ADD and
1623 CAND_MULT interpretations:
1624
1625 X = Y + (0 * 1)
1626 X = (Y + 0) * 1
1627
1628 The first of these is somewhat arbitrary, but the choice of
1629 1 for the stride simplifies the logic for propagating casts
1630 into their uses. */
1631 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1632 integer_one_node, ctype, sizetype, 0);
1633 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1634 integer_one_node, ctype, sizetype, 0);
1635 c->next_interp = c2->cand_num;
1636 }
1637
1638 /* Add the first (or only) interpretation to the statement-candidate
1639 mapping. */
1640 add_cand_for_stmt (gs, c);
1641 }
1642
1643 /* Given GS which is a copy of a scalar integer type, make at least one
1644 appropriate entry in the candidate table.
1645
1646 This interface is included for completeness, but is unnecessary
1647 if this pass immediately follows a pass that performs copy
1648 propagation, such as DOM. */
1649
1650 static void
1651 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1652 {
1653 slsr_cand_t base_cand, c = NULL, c2;
1654 unsigned savings = 0;
1655
1656 base_cand = base_cand_from_table (rhs1);
1657
1658 if (base_cand && base_cand->kind != CAND_PHI)
1659 {
1660 while (base_cand)
1661 {
1662 /* Propagate all data from the base candidate. */
1663 if (has_single_use (rhs1))
1664 savings = (base_cand->dead_savings
1665 + stmt_cost (base_cand->cand_stmt, speed));
1666
1667 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1668 base_cand->base_expr,
1669 base_cand->index, base_cand->stride,
1670 base_cand->cand_type,
1671 base_cand->stride_type, savings);
1672 if (base_cand->next_interp)
1673 base_cand = lookup_cand (base_cand->next_interp);
1674 else
1675 base_cand = NULL;
1676 }
1677 }
1678 else
1679 {
1680 /* If nothing is known about the RHS, create fresh CAND_ADD and
1681 CAND_MULT interpretations:
1682
1683 X = Y + (0 * 1)
1684 X = (Y + 0) * 1
1685
1686 The first of these is somewhat arbitrary, but the choice of
1687 1 for the stride simplifies the logic for propagating casts
1688 into their uses. */
1689 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1690 integer_one_node, TREE_TYPE (rhs1),
1691 sizetype, 0);
1692 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1693 integer_one_node, TREE_TYPE (rhs1),
1694 sizetype, 0);
1695 c->next_interp = c2->cand_num;
1696 }
1697
1698 /* Add the first (or only) interpretation to the statement-candidate
1699 mapping. */
1700 add_cand_for_stmt (gs, c);
1701 }
1702 \f
1703 class find_candidates_dom_walker : public dom_walker
1704 {
1705 public:
1706 find_candidates_dom_walker (cdi_direction direction)
1707 : dom_walker (direction) {}
1708 virtual edge before_dom_children (basic_block);
1709 };
1710
1711 /* Find strength-reduction candidates in block BB. */
1712
1713 edge
1714 find_candidates_dom_walker::before_dom_children (basic_block bb)
1715 {
1716 bool speed = optimize_bb_for_speed_p (bb);
1717
1718 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1719 gsi_next (&gsi))
1720 slsr_process_phi (gsi.phi (), speed);
1721
1722 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1723 gsi_next (&gsi))
1724 {
1725 gimple *gs = gsi_stmt (gsi);
1726
1727 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1728 slsr_process_ref (gs);
1729
1730 else if (is_gimple_assign (gs)
1731 && SCALAR_INT_MODE_P
1732 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1733 {
1734 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1735
1736 switch (gimple_assign_rhs_code (gs))
1737 {
1738 case MULT_EXPR:
1739 case PLUS_EXPR:
1740 rhs1 = gimple_assign_rhs1 (gs);
1741 rhs2 = gimple_assign_rhs2 (gs);
1742 /* Should never happen, but currently some buggy situations
1743 in earlier phases put constants in rhs1. */
1744 if (TREE_CODE (rhs1) != SSA_NAME)
1745 continue;
1746 break;
1747
1748 /* Possible future opportunity: rhs1 of a ptr+ can be
1749 an ADDR_EXPR. */
1750 case POINTER_PLUS_EXPR:
1751 case MINUS_EXPR:
1752 rhs2 = gimple_assign_rhs2 (gs);
1753 gcc_fallthrough ();
1754
1755 CASE_CONVERT:
1756 case SSA_NAME:
1757 case NEGATE_EXPR:
1758 rhs1 = gimple_assign_rhs1 (gs);
1759 if (TREE_CODE (rhs1) != SSA_NAME)
1760 continue;
1761 break;
1762
1763 default:
1764 ;
1765 }
1766
1767 switch (gimple_assign_rhs_code (gs))
1768 {
1769 case MULT_EXPR:
1770 slsr_process_mul (gs, rhs1, rhs2, speed);
1771 break;
1772
1773 case PLUS_EXPR:
1774 case POINTER_PLUS_EXPR:
1775 case MINUS_EXPR:
1776 slsr_process_add (gs, rhs1, rhs2, speed);
1777 break;
1778
1779 case NEGATE_EXPR:
1780 slsr_process_neg (gs, rhs1, speed);
1781 break;
1782
1783 CASE_CONVERT:
1784 slsr_process_cast (gs, rhs1, speed);
1785 break;
1786
1787 case SSA_NAME:
1788 slsr_process_copy (gs, rhs1, speed);
1789 break;
1790
1791 default:
1792 ;
1793 }
1794 }
1795 }
1796 return NULL;
1797 }
1798 \f
1799 /* Dump a candidate for debug. */
1800
1801 static void
1802 dump_candidate (slsr_cand_t c)
1803 {
1804 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1805 gimple_bb (c->cand_stmt)->index);
1806 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1807 switch (c->kind)
1808 {
1809 case CAND_MULT:
1810 fputs (" MULT : (", dump_file);
1811 print_generic_expr (dump_file, c->base_expr);
1812 fputs (" + ", dump_file);
1813 print_decs (c->index, dump_file);
1814 fputs (") * ", dump_file);
1815 if (TREE_CODE (c->stride) != INTEGER_CST
1816 && c->stride_type != TREE_TYPE (c->stride))
1817 {
1818 fputs ("(", dump_file);
1819 print_generic_expr (dump_file, c->stride_type);
1820 fputs (")", dump_file);
1821 }
1822 print_generic_expr (dump_file, c->stride);
1823 fputs (" : ", dump_file);
1824 break;
1825 case CAND_ADD:
1826 fputs (" ADD : ", dump_file);
1827 print_generic_expr (dump_file, c->base_expr);
1828 fputs (" + (", dump_file);
1829 print_decs (c->index, dump_file);
1830 fputs (" * ", dump_file);
1831 if (TREE_CODE (c->stride) != INTEGER_CST
1832 && c->stride_type != TREE_TYPE (c->stride))
1833 {
1834 fputs ("(", dump_file);
1835 print_generic_expr (dump_file, c->stride_type);
1836 fputs (")", dump_file);
1837 }
1838 print_generic_expr (dump_file, c->stride);
1839 fputs (") : ", dump_file);
1840 break;
1841 case CAND_REF:
1842 fputs (" REF : ", dump_file);
1843 print_generic_expr (dump_file, c->base_expr);
1844 fputs (" + (", dump_file);
1845 print_generic_expr (dump_file, c->stride);
1846 fputs (") + ", dump_file);
1847 print_decs (c->index, dump_file);
1848 fputs (" : ", dump_file);
1849 break;
1850 case CAND_PHI:
1851 fputs (" PHI : ", dump_file);
1852 print_generic_expr (dump_file, c->base_expr);
1853 fputs (" + (unknown * ", dump_file);
1854 print_generic_expr (dump_file, c->stride);
1855 fputs (") : ", dump_file);
1856 break;
1857 default:
1858 gcc_unreachable ();
1859 }
1860 print_generic_expr (dump_file, c->cand_type);
1861 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1862 c->basis, c->dependent, c->sibling);
1863 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1864 c->next_interp, c->dead_savings);
1865 if (c->def_phi)
1866 fprintf (dump_file, " phi: %d\n", c->def_phi);
1867 fputs ("\n", dump_file);
1868 }
1869
1870 /* Dump the candidate vector for debug. */
1871
1872 static void
1873 dump_cand_vec (void)
1874 {
1875 unsigned i;
1876 slsr_cand_t c;
1877
1878 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1879
1880 FOR_EACH_VEC_ELT (cand_vec, i, c)
1881 dump_candidate (c);
1882 }
1883
1884 /* Callback used to dump the candidate chains hash table. */
1885
1886 int
1887 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1888 {
1889 const_cand_chain_t chain = *slot;
1890 cand_chain_t p;
1891
1892 print_generic_expr (dump_file, chain->base_expr);
1893 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1894
1895 for (p = chain->next; p; p = p->next)
1896 fprintf (dump_file, " -> %d", p->cand->cand_num);
1897
1898 fputs ("\n", dump_file);
1899 return 1;
1900 }
1901
1902 /* Dump the candidate chains. */
1903
1904 static void
1905 dump_cand_chains (void)
1906 {
1907 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1908 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1909 (NULL);
1910 fputs ("\n", dump_file);
1911 }
1912
1913 /* Dump the increment vector for debug. */
1914
1915 static void
1916 dump_incr_vec (void)
1917 {
1918 if (dump_file && (dump_flags & TDF_DETAILS))
1919 {
1920 unsigned i;
1921
1922 fprintf (dump_file, "\nIncrement vector:\n\n");
1923
1924 for (i = 0; i < incr_vec_len; i++)
1925 {
1926 fprintf (dump_file, "%3d increment: ", i);
1927 print_decs (incr_vec[i].incr, dump_file);
1928 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1929 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1930 fputs ("\n initializer: ", dump_file);
1931 print_generic_expr (dump_file, incr_vec[i].initializer);
1932 fputs ("\n\n", dump_file);
1933 }
1934 }
1935 }
1936 \f
1937 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1938 data reference. */
1939
1940 static void
1941 replace_ref (tree *expr, slsr_cand_t c)
1942 {
1943 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1944 unsigned HOST_WIDE_INT misalign;
1945 unsigned align;
1946
1947 /* Ensure the memory reference carries the minimum alignment
1948 requirement for the data type. See PR58041. */
1949 get_object_alignment_1 (*expr, &align, &misalign);
1950 if (misalign != 0)
1951 align = least_bit_hwi (misalign);
1952 if (align < TYPE_ALIGN (acc_type))
1953 acc_type = build_aligned_type (acc_type, align);
1954
1955 add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1956 c->base_expr, c->stride);
1957 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1958 wide_int_to_tree (c->cand_type, c->index));
1959
1960 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1961 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1962 TREE_OPERAND (mem_ref, 0)
1963 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1964 /*simple_p=*/true, NULL,
1965 /*before=*/true, GSI_SAME_STMT);
1966 copy_ref_info (mem_ref, *expr);
1967 *expr = mem_ref;
1968 update_stmt (c->cand_stmt);
1969 }
1970
1971 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1972 dependent of candidate C with an equivalent strength-reduced data
1973 reference. */
1974
1975 static void
1976 replace_refs (slsr_cand_t c)
1977 {
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1979 {
1980 fputs ("Replacing reference: ", dump_file);
1981 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1982 }
1983
1984 if (gimple_vdef (c->cand_stmt))
1985 {
1986 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1987 replace_ref (lhs, c);
1988 }
1989 else
1990 {
1991 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1992 replace_ref (rhs, c);
1993 }
1994
1995 if (dump_file && (dump_flags & TDF_DETAILS))
1996 {
1997 fputs ("With: ", dump_file);
1998 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1999 fputs ("\n", dump_file);
2000 }
2001
2002 if (c->sibling)
2003 replace_refs (lookup_cand (c->sibling));
2004
2005 if (c->dependent)
2006 replace_refs (lookup_cand (c->dependent));
2007 }
2008
2009 /* Return TRUE if candidate C is dependent upon a PHI. */
2010
2011 static bool
2012 phi_dependent_cand_p (slsr_cand_t c)
2013 {
2014 /* A candidate is not necessarily dependent upon a PHI just because
2015 it has a phi definition for its base name. It may have a basis
2016 that relies upon the same phi definition, in which case the PHI
2017 is irrelevant to this candidate. */
2018 return (c->def_phi
2019 && c->basis
2020 && lookup_cand (c->basis)->def_phi != c->def_phi);
2021 }
2022
2023 /* Calculate the increment required for candidate C relative to
2024 its basis. */
2025
2026 static widest_int
2027 cand_increment (slsr_cand_t c)
2028 {
2029 slsr_cand_t basis;
2030
2031 /* If the candidate doesn't have a basis, just return its own
2032 index. This is useful in record_increments to help us find
2033 an existing initializer. Also, if the candidate's basis is
2034 hidden by a phi, then its own index will be the increment
2035 from the newly introduced phi basis. */
2036 if (!c->basis || phi_dependent_cand_p (c))
2037 return c->index;
2038
2039 basis = lookup_cand (c->basis);
2040 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2041 return c->index - basis->index;
2042 }
2043
2044 /* Calculate the increment required for candidate C relative to
2045 its basis. If we aren't going to generate pointer arithmetic
2046 for this candidate, return the absolute value of that increment
2047 instead. */
2048
2049 static inline widest_int
2050 cand_abs_increment (slsr_cand_t c)
2051 {
2052 widest_int increment = cand_increment (c);
2053
2054 if (!address_arithmetic_p && wi::neg_p (increment))
2055 increment = -increment;
2056
2057 return increment;
2058 }
2059
2060 /* Return TRUE iff candidate C has already been replaced under
2061 another interpretation. */
2062
2063 static inline bool
2064 cand_already_replaced (slsr_cand_t c)
2065 {
2066 return (gimple_bb (c->cand_stmt) == 0);
2067 }
2068
2069 /* Common logic used by replace_unconditional_candidate and
2070 replace_conditional_candidate. */
2071
2072 static void
2073 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2074 {
2075 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2076 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2077
2078 /* It is highly unlikely, but possible, that the resulting
2079 bump doesn't fit in a HWI. Abandon the replacement
2080 in this case. This does not affect siblings or dependents
2081 of C. Restriction to signed HWI is conservative for unsigned
2082 types but allows for safe negation without twisted logic. */
2083 if (wi::fits_shwi_p (bump)
2084 && bump.to_shwi () != HOST_WIDE_INT_MIN
2085 /* It is not useful to replace casts, copies, negates, or adds of
2086 an SSA name and a constant. */
2087 && cand_code != SSA_NAME
2088 && !CONVERT_EXPR_CODE_P (cand_code)
2089 && cand_code != PLUS_EXPR
2090 && cand_code != POINTER_PLUS_EXPR
2091 && cand_code != MINUS_EXPR
2092 && cand_code != NEGATE_EXPR)
2093 {
2094 enum tree_code code = PLUS_EXPR;
2095 tree bump_tree;
2096 gimple *stmt_to_print = NULL;
2097
2098 /* If the basis name and the candidate's LHS have incompatible
2099 types, introduce a cast. */
2100 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2101 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2102 if (wi::neg_p (bump))
2103 {
2104 code = MINUS_EXPR;
2105 bump = -bump;
2106 }
2107
2108 bump_tree = wide_int_to_tree (target_type, bump);
2109
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 {
2112 fputs ("Replacing: ", dump_file);
2113 print_gimple_stmt (dump_file, c->cand_stmt, 0);
2114 }
2115
2116 if (bump == 0)
2117 {
2118 tree lhs = gimple_assign_lhs (c->cand_stmt);
2119 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2120 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2121 slsr_cand_t cc = c;
2122 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2123 gsi_replace (&gsi, copy_stmt, false);
2124 c->cand_stmt = copy_stmt;
2125 while (cc->next_interp)
2126 {
2127 cc = lookup_cand (cc->next_interp);
2128 cc->cand_stmt = copy_stmt;
2129 }
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 stmt_to_print = copy_stmt;
2132 }
2133 else
2134 {
2135 tree rhs1, rhs2;
2136 if (cand_code != NEGATE_EXPR) {
2137 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2138 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2139 }
2140 if (cand_code != NEGATE_EXPR
2141 && ((operand_equal_p (rhs1, basis_name, 0)
2142 && operand_equal_p (rhs2, bump_tree, 0))
2143 || (operand_equal_p (rhs1, bump_tree, 0)
2144 && operand_equal_p (rhs2, basis_name, 0))))
2145 {
2146 if (dump_file && (dump_flags & TDF_DETAILS))
2147 {
2148 fputs ("(duplicate, not actually replacing)", dump_file);
2149 stmt_to_print = c->cand_stmt;
2150 }
2151 }
2152 else
2153 {
2154 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2155 slsr_cand_t cc = c;
2156 gimple_assign_set_rhs_with_ops (&gsi, code,
2157 basis_name, bump_tree);
2158 update_stmt (gsi_stmt (gsi));
2159 c->cand_stmt = gsi_stmt (gsi);
2160 while (cc->next_interp)
2161 {
2162 cc = lookup_cand (cc->next_interp);
2163 cc->cand_stmt = gsi_stmt (gsi);
2164 }
2165 if (dump_file && (dump_flags & TDF_DETAILS))
2166 stmt_to_print = gsi_stmt (gsi);
2167 }
2168 }
2169
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2171 {
2172 fputs ("With: ", dump_file);
2173 print_gimple_stmt (dump_file, stmt_to_print, 0);
2174 fputs ("\n", dump_file);
2175 }
2176 }
2177 }
2178
2179 /* Replace candidate C with an add or subtract. Note that we only
2180 operate on CAND_MULTs with known strides, so we will never generate
2181 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2182 X = Y + ((i - i') * S), as described in the module commentary. The
2183 folded value ((i - i') * S) is referred to here as the "bump." */
2184
2185 static void
2186 replace_unconditional_candidate (slsr_cand_t c)
2187 {
2188 slsr_cand_t basis;
2189
2190 if (cand_already_replaced (c))
2191 return;
2192
2193 basis = lookup_cand (c->basis);
2194 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2195
2196 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2197 }
2198 \f
2199 /* Return the index in the increment vector of the given INCREMENT,
2200 or -1 if not found. The latter can occur if more than
2201 MAX_INCR_VEC_LEN increments have been found. */
2202
2203 static inline int
2204 incr_vec_index (const widest_int &increment)
2205 {
2206 unsigned i;
2207
2208 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2209 ;
2210
2211 if (i < incr_vec_len)
2212 return i;
2213 else
2214 return -1;
2215 }
2216
2217 /* Create a new statement along edge E to add BASIS_NAME to the product
2218 of INCREMENT and the stride of candidate C. Create and return a new
2219 SSA name from *VAR to be used as the LHS of the new statement.
2220 KNOWN_STRIDE is true iff C's stride is a constant. */
2221
2222 static tree
2223 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2224 widest_int increment, edge e, location_t loc,
2225 bool known_stride)
2226 {
2227 basic_block insert_bb;
2228 gimple_stmt_iterator gsi;
2229 tree lhs, basis_type;
2230 gassign *new_stmt, *cast_stmt = NULL;
2231
2232 /* If the add candidate along this incoming edge has the same
2233 index as C's hidden basis, the hidden basis represents this
2234 edge correctly. */
2235 if (increment == 0)
2236 return basis_name;
2237
2238 basis_type = TREE_TYPE (basis_name);
2239 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2240
2241 /* Occasionally people convert integers to pointers without a
2242 cast, leading us into trouble if we aren't careful. */
2243 enum tree_code plus_code
2244 = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2245
2246 if (known_stride)
2247 {
2248 tree bump_tree;
2249 enum tree_code code = plus_code;
2250 widest_int bump = increment * wi::to_widest (c->stride);
2251 if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2252 {
2253 code = MINUS_EXPR;
2254 bump = -bump;
2255 }
2256
2257 tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2258 bump_tree = wide_int_to_tree (stride_type, bump);
2259 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2260 }
2261 else
2262 {
2263 int i;
2264 bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2265 i = incr_vec_index (negate_incr ? -increment : increment);
2266 gcc_assert (i >= 0);
2267
2268 if (incr_vec[i].initializer)
2269 {
2270 enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2271 new_stmt = gimple_build_assign (lhs, code, basis_name,
2272 incr_vec[i].initializer);
2273 }
2274 else {
2275 tree stride;
2276
2277 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2278 {
2279 tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2280 "slsr");
2281 cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2282 c->stride);
2283 stride = cast_stride;
2284 }
2285 else
2286 stride = c->stride;
2287
2288 if (increment == 1)
2289 new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2290 else if (increment == -1)
2291 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2292 else
2293 gcc_unreachable ();
2294 }
2295 }
2296
2297 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2298 gsi = gsi_last_bb (insert_bb);
2299
2300 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
2301 {
2302 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
2303 if (cast_stmt)
2304 {
2305 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2306 gimple_set_location (cast_stmt, loc);
2307 }
2308 }
2309 else
2310 {
2311 if (cast_stmt)
2312 {
2313 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
2314 gimple_set_location (cast_stmt, loc);
2315 }
2316 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2317 }
2318
2319 gimple_set_location (new_stmt, loc);
2320
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2322 {
2323 if (cast_stmt)
2324 {
2325 fprintf (dump_file, "Inserting cast in block %d: ",
2326 insert_bb->index);
2327 print_gimple_stmt (dump_file, cast_stmt, 0);
2328 }
2329 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2330 print_gimple_stmt (dump_file, new_stmt, 0);
2331 }
2332
2333 return lhs;
2334 }
2335
2336 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2337 is hidden by the phi node FROM_PHI, create a new phi node in the same
2338 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2339 with its phi arguments representing conditional adjustments to the
2340 hidden basis along conditional incoming paths. Those adjustments are
2341 made by creating add statements (and sometimes recursively creating
2342 phis) along those incoming paths. LOC is the location to attach to
2343 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2344 constant. */
2345
2346 static tree
2347 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2348 location_t loc, bool known_stride)
2349 {
2350 int i;
2351 tree name, phi_arg;
2352 gphi *phi;
2353 slsr_cand_t basis = lookup_cand (c->basis);
2354 int nargs = gimple_phi_num_args (from_phi);
2355 basic_block phi_bb = gimple_bb (from_phi);
2356 slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2357 auto_vec<tree> phi_args (nargs);
2358
2359 /* Process each argument of the existing phi that represents
2360 conditionally-executed add candidates. */
2361 for (i = 0; i < nargs; i++)
2362 {
2363 edge e = (*phi_bb->preds)[i];
2364 tree arg = gimple_phi_arg_def (from_phi, i);
2365 tree feeding_def;
2366
2367 /* If the phi argument is the base name of the CAND_PHI, then
2368 this incoming arc should use the hidden basis. */
2369 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2370 if (basis->index == 0)
2371 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2372 else
2373 {
2374 widest_int incr = -basis->index;
2375 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2376 e, loc, known_stride);
2377 }
2378 else
2379 {
2380 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2381
2382 /* If there is another phi along this incoming edge, we must
2383 process it in the same fashion to ensure that all basis
2384 adjustments are made along its incoming edges. */
2385 if (gimple_code (arg_def) == GIMPLE_PHI)
2386 feeding_def = create_phi_basis (c, arg_def, basis_name,
2387 loc, known_stride);
2388 else
2389 {
2390 slsr_cand_t arg_cand = base_cand_from_table (arg);
2391 widest_int diff = arg_cand->index - basis->index;
2392 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2393 e, loc, known_stride);
2394 }
2395 }
2396
2397 /* Because of recursion, we need to save the arguments in a vector
2398 so we can create the PHI statement all at once. Otherwise the
2399 storage for the half-created PHI can be reclaimed. */
2400 phi_args.safe_push (feeding_def);
2401 }
2402
2403 /* Create the new phi basis. */
2404 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2405 phi = create_phi_node (name, phi_bb);
2406 SSA_NAME_DEF_STMT (name) = phi;
2407
2408 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2409 {
2410 edge e = (*phi_bb->preds)[i];
2411 add_phi_arg (phi, phi_arg, e, loc);
2412 }
2413
2414 update_stmt (phi);
2415
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 {
2418 fputs ("Introducing new phi basis: ", dump_file);
2419 print_gimple_stmt (dump_file, phi, 0);
2420 }
2421
2422 return name;
2423 }
2424
2425 /* Given a candidate C whose basis is hidden by at least one intervening
2426 phi, introduce a matching number of new phis to represent its basis
2427 adjusted by conditional increments along possible incoming paths. Then
2428 replace C as though it were an unconditional candidate, using the new
2429 basis. */
2430
2431 static void
2432 replace_conditional_candidate (slsr_cand_t c)
2433 {
2434 tree basis_name, name;
2435 slsr_cand_t basis;
2436 location_t loc;
2437
2438 /* Look up the LHS SSA name from C's basis. This will be the
2439 RHS1 of the adds we will introduce to create new phi arguments. */
2440 basis = lookup_cand (c->basis);
2441 basis_name = gimple_assign_lhs (basis->cand_stmt);
2442
2443 /* Create a new phi statement which will represent C's true basis
2444 after the transformation is complete. */
2445 loc = gimple_location (c->cand_stmt);
2446 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2447 basis_name, loc, KNOWN_STRIDE);
2448 /* Replace C with an add of the new basis phi and a constant. */
2449 widest_int bump = c->index * wi::to_widest (c->stride);
2450
2451 replace_mult_candidate (c, name, bump);
2452 }
2453
2454 /* Compute the expected costs of inserting basis adjustments for
2455 candidate C with phi-definition PHI. The cost of inserting
2456 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2457 which are themselves phi results, recursively calculate costs
2458 for those phis as well. */
2459
2460 static int
2461 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2462 {
2463 unsigned i;
2464 int cost = 0;
2465 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2466
2467 /* If we work our way back to a phi that isn't dominated by the hidden
2468 basis, this isn't a candidate for replacement. Indicate this by
2469 returning an unreasonably high cost. It's not easy to detect
2470 these situations when determining the basis, so we defer the
2471 decision until now. */
2472 basic_block phi_bb = gimple_bb (phi);
2473 slsr_cand_t basis = lookup_cand (c->basis);
2474 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2475
2476 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2477 return COST_INFINITE;
2478
2479 for (i = 0; i < gimple_phi_num_args (phi); i++)
2480 {
2481 tree arg = gimple_phi_arg_def (phi, i);
2482
2483 if (arg != phi_cand->base_expr)
2484 {
2485 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2486
2487 if (gimple_code (arg_def) == GIMPLE_PHI)
2488 cost += phi_add_costs (arg_def, c, one_add_cost);
2489 else
2490 {
2491 slsr_cand_t arg_cand = base_cand_from_table (arg);
2492
2493 if (arg_cand->index != c->index)
2494 cost += one_add_cost;
2495 }
2496 }
2497 }
2498
2499 return cost;
2500 }
2501
2502 /* For candidate C, each sibling of candidate C, and each dependent of
2503 candidate C, determine whether the candidate is dependent upon a
2504 phi that hides its basis. If not, replace the candidate unconditionally.
2505 Otherwise, determine whether the cost of introducing compensation code
2506 for the candidate is offset by the gains from strength reduction. If
2507 so, replace the candidate and introduce the compensation code. */
2508
2509 static void
2510 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2511 {
2512 if (phi_dependent_cand_p (c))
2513 {
2514 /* A multiply candidate with a stride of 1 is just an artifice
2515 of a copy or cast; there is no value in replacing it. */
2516 if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
2517 {
2518 /* A candidate dependent upon a phi will replace a multiply by
2519 a constant with an add, and will insert at most one add for
2520 each phi argument. Add these costs with the potential dead-code
2521 savings to determine profitability. */
2522 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2523 int mult_savings = stmt_cost (c->cand_stmt, speed);
2524 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2525 tree phi_result = gimple_phi_result (phi);
2526 int one_add_cost = add_cost (speed,
2527 TYPE_MODE (TREE_TYPE (phi_result)));
2528 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2529 int cost = add_costs - mult_savings - c->dead_savings;
2530
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2532 {
2533 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2534 fprintf (dump_file, " add_costs = %d\n", add_costs);
2535 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2536 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2537 fprintf (dump_file, " cost = %d\n", cost);
2538 if (cost <= COST_NEUTRAL)
2539 fputs (" Replacing...\n", dump_file);
2540 else
2541 fputs (" Not replaced.\n", dump_file);
2542 }
2543
2544 if (cost <= COST_NEUTRAL)
2545 replace_conditional_candidate (c);
2546 }
2547 }
2548 else
2549 replace_unconditional_candidate (c);
2550
2551 if (c->sibling)
2552 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2553
2554 if (c->dependent)
2555 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2556 }
2557 \f
2558 /* Count the number of candidates in the tree rooted at C that have
2559 not already been replaced under other interpretations. */
2560
2561 static int
2562 count_candidates (slsr_cand_t c)
2563 {
2564 unsigned count = cand_already_replaced (c) ? 0 : 1;
2565
2566 if (c->sibling)
2567 count += count_candidates (lookup_cand (c->sibling));
2568
2569 if (c->dependent)
2570 count += count_candidates (lookup_cand (c->dependent));
2571
2572 return count;
2573 }
2574
2575 /* Increase the count of INCREMENT by one in the increment vector.
2576 INCREMENT is associated with candidate C. If INCREMENT is to be
2577 conditionally executed as part of a conditional candidate replacement,
2578 IS_PHI_ADJUST is true, otherwise false. If an initializer
2579 T_0 = stride * I is provided by a candidate that dominates all
2580 candidates with the same increment, also record T_0 for subsequent use. */
2581
2582 static void
2583 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2584 {
2585 bool found = false;
2586 unsigned i;
2587
2588 /* Treat increments that differ only in sign as identical so as to
2589 share initializers, unless we are generating pointer arithmetic. */
2590 if (!address_arithmetic_p && wi::neg_p (increment))
2591 increment = -increment;
2592
2593 for (i = 0; i < incr_vec_len; i++)
2594 {
2595 if (incr_vec[i].incr == increment)
2596 {
2597 incr_vec[i].count++;
2598 found = true;
2599
2600 /* If we previously recorded an initializer that doesn't
2601 dominate this candidate, it's not going to be useful to
2602 us after all. */
2603 if (incr_vec[i].initializer
2604 && !dominated_by_p (CDI_DOMINATORS,
2605 gimple_bb (c->cand_stmt),
2606 incr_vec[i].init_bb))
2607 {
2608 incr_vec[i].initializer = NULL_TREE;
2609 incr_vec[i].init_bb = NULL;
2610 }
2611
2612 break;
2613 }
2614 }
2615
2616 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2617 {
2618 /* The first time we see an increment, create the entry for it.
2619 If this is the root candidate which doesn't have a basis, set
2620 the count to zero. We're only processing it so it can possibly
2621 provide an initializer for other candidates. */
2622 incr_vec[incr_vec_len].incr = increment;
2623 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2624 incr_vec[incr_vec_len].cost = COST_INFINITE;
2625
2626 /* Optimistically record the first occurrence of this increment
2627 as providing an initializer (if it does); we will revise this
2628 opinion later if it doesn't dominate all other occurrences.
2629 Exception: increments of 0, 1 never need initializers;
2630 and phi adjustments don't ever provide initializers. */
2631 if (c->kind == CAND_ADD
2632 && !is_phi_adjust
2633 && c->index == increment
2634 && (increment > 1 || increment < 0)
2635 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2636 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2637 {
2638 tree t0 = NULL_TREE;
2639 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2640 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2641 if (operand_equal_p (rhs1, c->base_expr, 0))
2642 t0 = rhs2;
2643 else if (operand_equal_p (rhs2, c->base_expr, 0))
2644 t0 = rhs1;
2645 if (t0
2646 && SSA_NAME_DEF_STMT (t0)
2647 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2648 {
2649 incr_vec[incr_vec_len].initializer = t0;
2650 incr_vec[incr_vec_len++].init_bb
2651 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2652 }
2653 else
2654 {
2655 incr_vec[incr_vec_len].initializer = NULL_TREE;
2656 incr_vec[incr_vec_len++].init_bb = NULL;
2657 }
2658 }
2659 else
2660 {
2661 incr_vec[incr_vec_len].initializer = NULL_TREE;
2662 incr_vec[incr_vec_len++].init_bb = NULL;
2663 }
2664 }
2665 }
2666
2667 /* Given phi statement PHI that hides a candidate from its BASIS, find
2668 the increments along each incoming arc (recursively handling additional
2669 phis that may be present) and record them. These increments are the
2670 difference in index between the index-adjusting statements and the
2671 index of the basis. */
2672
2673 static void
2674 record_phi_increments (slsr_cand_t basis, gimple *phi)
2675 {
2676 unsigned i;
2677 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2678
2679 for (i = 0; i < gimple_phi_num_args (phi); i++)
2680 {
2681 tree arg = gimple_phi_arg_def (phi, i);
2682
2683 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2684 {
2685 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2686
2687 if (gimple_code (arg_def) == GIMPLE_PHI)
2688 record_phi_increments (basis, arg_def);
2689 else
2690 {
2691 slsr_cand_t arg_cand = base_cand_from_table (arg);
2692 widest_int diff = arg_cand->index - basis->index;
2693 record_increment (arg_cand, diff, PHI_ADJUST);
2694 }
2695 }
2696 }
2697 }
2698
2699 /* Determine how many times each unique increment occurs in the set
2700 of candidates rooted at C's parent, recording the data in the
2701 increment vector. For each unique increment I, if an initializer
2702 T_0 = stride * I is provided by a candidate that dominates all
2703 candidates with the same increment, also record T_0 for subsequent
2704 use. */
2705
2706 static void
2707 record_increments (slsr_cand_t c)
2708 {
2709 if (!cand_already_replaced (c))
2710 {
2711 if (!phi_dependent_cand_p (c))
2712 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2713 else
2714 {
2715 /* A candidate with a basis hidden by a phi will have one
2716 increment for its relationship to the index represented by
2717 the phi, and potentially additional increments along each
2718 incoming edge. For the root of the dependency tree (which
2719 has no basis), process just the initial index in case it has
2720 an initializer that can be used by subsequent candidates. */
2721 record_increment (c, c->index, NOT_PHI_ADJUST);
2722
2723 if (c->basis)
2724 record_phi_increments (lookup_cand (c->basis),
2725 lookup_cand (c->def_phi)->cand_stmt);
2726 }
2727 }
2728
2729 if (c->sibling)
2730 record_increments (lookup_cand (c->sibling));
2731
2732 if (c->dependent)
2733 record_increments (lookup_cand (c->dependent));
2734 }
2735
2736 /* Add up and return the costs of introducing add statements that
2737 require the increment INCR on behalf of candidate C and phi
2738 statement PHI. Accumulate into *SAVINGS the potential savings
2739 from removing existing statements that feed PHI and have no other
2740 uses. */
2741
2742 static int
2743 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
2744 int *savings)
2745 {
2746 unsigned i;
2747 int cost = 0;
2748 slsr_cand_t basis = lookup_cand (c->basis);
2749 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2750
2751 for (i = 0; i < gimple_phi_num_args (phi); i++)
2752 {
2753 tree arg = gimple_phi_arg_def (phi, i);
2754
2755 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2756 {
2757 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2758
2759 if (gimple_code (arg_def) == GIMPLE_PHI)
2760 {
2761 int feeding_savings = 0;
2762 tree feeding_var = gimple_phi_result (arg_def);
2763 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2764 if (uses_consumed_by_stmt (feeding_var, phi))
2765 *savings += feeding_savings;
2766 }
2767 else
2768 {
2769 slsr_cand_t arg_cand = base_cand_from_table (arg);
2770 widest_int diff = arg_cand->index - basis->index;
2771
2772 if (incr == diff)
2773 {
2774 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2775 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2776 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2777 if (uses_consumed_by_stmt (lhs, phi))
2778 *savings += stmt_cost (arg_cand->cand_stmt, true);
2779 }
2780 }
2781 }
2782 }
2783
2784 return cost;
2785 }
2786
2787 /* Return the first candidate in the tree rooted at C that has not
2788 already been replaced, favoring siblings over dependents. */
2789
2790 static slsr_cand_t
2791 unreplaced_cand_in_tree (slsr_cand_t c)
2792 {
2793 if (!cand_already_replaced (c))
2794 return c;
2795
2796 if (c->sibling)
2797 {
2798 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2799 if (sib)
2800 return sib;
2801 }
2802
2803 if (c->dependent)
2804 {
2805 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2806 if (dep)
2807 return dep;
2808 }
2809
2810 return NULL;
2811 }
2812
2813 /* Return TRUE if the candidates in the tree rooted at C should be
2814 optimized for speed, else FALSE. We estimate this based on the block
2815 containing the most dominant candidate in the tree that has not yet
2816 been replaced. */
2817
2818 static bool
2819 optimize_cands_for_speed_p (slsr_cand_t c)
2820 {
2821 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2822 gcc_assert (c2);
2823 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2824 }
2825
2826 /* Add COST_IN to the lowest cost of any dependent path starting at
2827 candidate C or any of its siblings, counting only candidates along
2828 such paths with increment INCR. Assume that replacing a candidate
2829 reduces cost by REPL_SAVINGS. Also account for savings from any
2830 statements that would go dead. If COUNT_PHIS is true, include
2831 costs of introducing feeding statements for conditional candidates. */
2832
2833 static int
2834 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2835 const widest_int &incr, bool count_phis)
2836 {
2837 int local_cost, sib_cost, savings = 0;
2838 widest_int cand_incr = cand_abs_increment (c);
2839
2840 if (cand_already_replaced (c))
2841 local_cost = cost_in;
2842 else if (incr == cand_incr)
2843 local_cost = cost_in - repl_savings - c->dead_savings;
2844 else
2845 local_cost = cost_in - c->dead_savings;
2846
2847 if (count_phis
2848 && phi_dependent_cand_p (c)
2849 && !cand_already_replaced (c))
2850 {
2851 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2852 local_cost += phi_incr_cost (c, incr, phi, &savings);
2853
2854 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2855 local_cost -= savings;
2856 }
2857
2858 if (c->dependent)
2859 local_cost = lowest_cost_path (local_cost, repl_savings,
2860 lookup_cand (c->dependent), incr,
2861 count_phis);
2862
2863 if (c->sibling)
2864 {
2865 sib_cost = lowest_cost_path (cost_in, repl_savings,
2866 lookup_cand (c->sibling), incr,
2867 count_phis);
2868 local_cost = MIN (local_cost, sib_cost);
2869 }
2870
2871 return local_cost;
2872 }
2873
2874 /* Compute the total savings that would accrue from all replacements
2875 in the candidate tree rooted at C, counting only candidates with
2876 increment INCR. Assume that replacing a candidate reduces cost
2877 by REPL_SAVINGS. Also account for savings from statements that
2878 would go dead. */
2879
2880 static int
2881 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2882 bool count_phis)
2883 {
2884 int savings = 0;
2885 widest_int cand_incr = cand_abs_increment (c);
2886
2887 if (incr == cand_incr && !cand_already_replaced (c))
2888 savings += repl_savings + c->dead_savings;
2889
2890 if (count_phis
2891 && phi_dependent_cand_p (c)
2892 && !cand_already_replaced (c))
2893 {
2894 int phi_savings = 0;
2895 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2896 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2897
2898 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2899 savings += phi_savings;
2900 }
2901
2902 if (c->dependent)
2903 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2904 count_phis);
2905
2906 if (c->sibling)
2907 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2908 count_phis);
2909
2910 return savings;
2911 }
2912
2913 /* Use target-specific costs to determine and record which increments
2914 in the current candidate tree are profitable to replace, assuming
2915 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2916 the candidate tree.
2917
2918 One slight limitation here is that we don't account for the possible
2919 introduction of casts in some cases. See replace_one_candidate for
2920 the cases where these are introduced. This should probably be cleaned
2921 up sometime. */
2922
2923 static void
2924 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
2925 {
2926 unsigned i;
2927
2928 for (i = 0; i < incr_vec_len; i++)
2929 {
2930 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2931
2932 /* If somehow this increment is bigger than a HWI, we won't
2933 be optimizing candidates that use it. And if the increment
2934 has a count of zero, nothing will be done with it. */
2935 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2936 incr_vec[i].cost = COST_INFINITE;
2937
2938 /* Increments of 0, 1, and -1 are always profitable to replace,
2939 because they always replace a multiply or add with an add or
2940 copy, and may cause one or more existing instructions to go
2941 dead. Exception: -1 can't be assumed to be profitable for
2942 pointer addition. */
2943 else if (incr == 0
2944 || incr == 1
2945 || (incr == -1
2946 && !POINTER_TYPE_P (first_dep->cand_type)))
2947 incr_vec[i].cost = COST_NEUTRAL;
2948
2949 /* If we need to add an initializer, give up if a cast from the
2950 candidate's type to its stride's type can lose precision.
2951 Note that this already takes into account that the stride may
2952 have been cast to a wider type, in which case this test won't
2953 fire. Example:
2954
2955 short int _1;
2956 _2 = (int) _1;
2957 _3 = _2 * 10;
2958 _4 = x + _3; ADD: x + (10 * (int)_1) : int
2959 _5 = _2 * 15;
2960 _6 = x + _5; ADD: x + (15 * (int)_1) : int
2961
2962 Although the stride was a short int initially, the stride
2963 used in the analysis has been widened to an int, and such
2964 widening will be done in the initializer as well. */
2965 else if (!incr_vec[i].initializer
2966 && TREE_CODE (first_dep->stride) != INTEGER_CST
2967 && !legal_cast_p_1 (first_dep->stride_type,
2968 TREE_TYPE (gimple_assign_lhs
2969 (first_dep->cand_stmt))))
2970 incr_vec[i].cost = COST_INFINITE;
2971
2972 /* If we need to add an initializer, make sure we don't introduce
2973 a multiply by a pointer type, which can happen in certain cast
2974 scenarios. */
2975 else if (!incr_vec[i].initializer
2976 && TREE_CODE (first_dep->stride) != INTEGER_CST
2977 && POINTER_TYPE_P (first_dep->stride_type))
2978 incr_vec[i].cost = COST_INFINITE;
2979
2980 /* For any other increment, if this is a multiply candidate, we
2981 must introduce a temporary T and initialize it with
2982 T_0 = stride * increment. When optimizing for speed, walk the
2983 candidate tree to calculate the best cost reduction along any
2984 path; if it offsets the fixed cost of inserting the initializer,
2985 replacing the increment is profitable. When optimizing for
2986 size, instead calculate the total cost reduction from replacing
2987 all candidates with this increment. */
2988 else if (first_dep->kind == CAND_MULT)
2989 {
2990 int cost = mult_by_coeff_cost (incr, mode, speed);
2991 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2992 if (speed)
2993 cost = lowest_cost_path (cost, repl_savings, first_dep,
2994 incr_vec[i].incr, COUNT_PHIS);
2995 else
2996 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2997 COUNT_PHIS);
2998
2999 incr_vec[i].cost = cost;
3000 }
3001
3002 /* If this is an add candidate, the initializer may already
3003 exist, so only calculate the cost of the initializer if it
3004 doesn't. We are replacing one add with another here, so the
3005 known replacement savings is zero. We will account for removal
3006 of dead instructions in lowest_cost_path or total_savings. */
3007 else
3008 {
3009 int cost = 0;
3010 if (!incr_vec[i].initializer)
3011 cost = mult_by_coeff_cost (incr, mode, speed);
3012
3013 if (speed)
3014 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3015 DONT_COUNT_PHIS);
3016 else
3017 cost -= total_savings (0, first_dep, incr_vec[i].incr,
3018 DONT_COUNT_PHIS);
3019
3020 incr_vec[i].cost = cost;
3021 }
3022 }
3023 }
3024
3025 /* Return the nearest common dominator of BB1 and BB2. If the blocks
3026 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
3027 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3028 return C2 in *WHERE; and if the NCD matches neither, return NULL in
3029 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
3030
3031 static basic_block
3032 ncd_for_two_cands (basic_block bb1, basic_block bb2,
3033 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3034 {
3035 basic_block ncd;
3036
3037 if (!bb1)
3038 {
3039 *where = c2;
3040 return bb2;
3041 }
3042
3043 if (!bb2)
3044 {
3045 *where = c1;
3046 return bb1;
3047 }
3048
3049 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3050
3051 /* If both candidates are in the same block, the earlier
3052 candidate wins. */
3053 if (bb1 == ncd && bb2 == ncd)
3054 {
3055 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3056 *where = c2;
3057 else
3058 *where = c1;
3059 }
3060
3061 /* Otherwise, if one of them produced a candidate in the
3062 dominator, that one wins. */
3063 else if (bb1 == ncd)
3064 *where = c1;
3065
3066 else if (bb2 == ncd)
3067 *where = c2;
3068
3069 /* If neither matches the dominator, neither wins. */
3070 else
3071 *where = NULL;
3072
3073 return ncd;
3074 }
3075
3076 /* Consider all candidates that feed PHI. Find the nearest common
3077 dominator of those candidates requiring the given increment INCR.
3078 Further find and return the nearest common dominator of this result
3079 with block NCD. If the returned block contains one or more of the
3080 candidates, return the earliest candidate in the block in *WHERE. */
3081
3082 static basic_block
3083 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
3084 basic_block ncd, slsr_cand_t *where)
3085 {
3086 unsigned i;
3087 slsr_cand_t basis = lookup_cand (c->basis);
3088 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3089
3090 for (i = 0; i < gimple_phi_num_args (phi); i++)
3091 {
3092 tree arg = gimple_phi_arg_def (phi, i);
3093
3094 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3095 {
3096 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3097
3098 if (gimple_code (arg_def) == GIMPLE_PHI)
3099 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd,
3100 where);
3101 else
3102 {
3103 slsr_cand_t arg_cand = base_cand_from_table (arg);
3104 widest_int diff = arg_cand->index - basis->index;
3105 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3106
3107 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3108 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3109 }
3110 }
3111 }
3112
3113 return ncd;
3114 }
3115
3116 /* Consider the candidate C together with any candidates that feed
3117 C's phi dependence (if any). Find and return the nearest common
3118 dominator of those candidates requiring the given increment INCR.
3119 If the returned block contains one or more of the candidates,
3120 return the earliest candidate in the block in *WHERE. */
3121
3122 static basic_block
3123 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3124 {
3125 basic_block ncd = NULL;
3126
3127 if (cand_abs_increment (c) == incr)
3128 {
3129 ncd = gimple_bb (c->cand_stmt);
3130 *where = c;
3131 }
3132
3133 if (phi_dependent_cand_p (c))
3134 ncd = ncd_with_phi (c, incr,
3135 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3136 ncd, where);
3137
3138 return ncd;
3139 }
3140
3141 /* Consider all candidates in the tree rooted at C for which INCR
3142 represents the required increment of C relative to its basis.
3143 Find and return the basic block that most nearly dominates all
3144 such candidates. If the returned block contains one or more of
3145 the candidates, return the earliest candidate in the block in
3146 *WHERE. */
3147
3148 static basic_block
3149 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3150 slsr_cand_t *where)
3151 {
3152 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3153 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3154
3155 /* First find the NCD of all siblings and dependents. */
3156 if (c->sibling)
3157 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3158 incr, &sib_where);
3159 if (c->dependent)
3160 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3161 incr, &dep_where);
3162 if (!sib_ncd && !dep_ncd)
3163 {
3164 new_where = NULL;
3165 ncd = NULL;
3166 }
3167 else if (sib_ncd && !dep_ncd)
3168 {
3169 new_where = sib_where;
3170 ncd = sib_ncd;
3171 }
3172 else if (dep_ncd && !sib_ncd)
3173 {
3174 new_where = dep_where;
3175 ncd = dep_ncd;
3176 }
3177 else
3178 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3179 dep_where, &new_where);
3180
3181 /* If the candidate's increment doesn't match the one we're interested
3182 in (and nor do any increments for feeding defs of a phi-dependence),
3183 then the result depends only on siblings and dependents. */
3184 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3185
3186 if (!this_ncd || cand_already_replaced (c))
3187 {
3188 *where = new_where;
3189 return ncd;
3190 }
3191
3192 /* Otherwise, compare this candidate with the result from all siblings
3193 and dependents. */
3194 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3195
3196 return ncd;
3197 }
3198
3199 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3200
3201 static inline bool
3202 profitable_increment_p (unsigned index)
3203 {
3204 return (incr_vec[index].cost <= COST_NEUTRAL);
3205 }
3206
3207 /* For each profitable increment in the increment vector not equal to
3208 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3209 dominator of all statements in the candidate chain rooted at C
3210 that require that increment, and insert an initializer
3211 T_0 = stride * increment at that location. Record T_0 with the
3212 increment record. */
3213
3214 static void
3215 insert_initializers (slsr_cand_t c)
3216 {
3217 unsigned i;
3218
3219 for (i = 0; i < incr_vec_len; i++)
3220 {
3221 basic_block bb;
3222 slsr_cand_t where = NULL;
3223 gassign *init_stmt;
3224 gassign *cast_stmt = NULL;
3225 tree new_name, incr_tree, init_stride;
3226 widest_int incr = incr_vec[i].incr;
3227
3228 if (!profitable_increment_p (i)
3229 || incr == 1
3230 || (incr == -1
3231 && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3232 || incr == 0)
3233 continue;
3234
3235 /* We may have already identified an existing initializer that
3236 will suffice. */
3237 if (incr_vec[i].initializer)
3238 {
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3240 {
3241 fputs ("Using existing initializer: ", dump_file);
3242 print_gimple_stmt (dump_file,
3243 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3244 0, 0);
3245 }
3246 continue;
3247 }
3248
3249 /* Find the block that most closely dominates all candidates
3250 with this increment. If there is at least one candidate in
3251 that block, the earliest one will be returned in WHERE. */
3252 bb = nearest_common_dominator_for_cands (c, incr, &where);
3253
3254 /* If the nominal stride has a different type than the recorded
3255 stride type, build a cast from the nominal stride to that type. */
3256 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3257 {
3258 init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3259 cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3260 }
3261 else
3262 init_stride = c->stride;
3263
3264 /* Create a new SSA name to hold the initializer's value. */
3265 new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3266 incr_vec[i].initializer = new_name;
3267
3268 /* Create the initializer and insert it in the latest possible
3269 dominating position. */
3270 incr_tree = wide_int_to_tree (c->stride_type, incr);
3271 init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3272 init_stride, incr_tree);
3273 if (where)
3274 {
3275 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3276 location_t loc = gimple_location (where->cand_stmt);
3277
3278 if (cast_stmt)
3279 {
3280 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3281 gimple_set_location (cast_stmt, loc);
3282 }
3283
3284 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3285 gimple_set_location (init_stmt, loc);
3286 }
3287 else
3288 {
3289 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3290 gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3291 location_t loc = gimple_location (basis_stmt);
3292
3293 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3294 {
3295 if (cast_stmt)
3296 {
3297 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3298 gimple_set_location (cast_stmt, loc);
3299 }
3300 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3301 }
3302 else
3303 {
3304 if (cast_stmt)
3305 {
3306 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3307 gimple_set_location (cast_stmt, loc);
3308 }
3309 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3310 }
3311
3312 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3313 }
3314
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3316 {
3317 if (cast_stmt)
3318 {
3319 fputs ("Inserting stride cast: ", dump_file);
3320 print_gimple_stmt (dump_file, cast_stmt, 0);
3321 }
3322 fputs ("Inserting initializer: ", dump_file);
3323 print_gimple_stmt (dump_file, init_stmt, 0);
3324 }
3325 }
3326 }
3327
3328 /* Return TRUE iff all required increments for candidates feeding PHI
3329 are profitable (and legal!) to replace on behalf of candidate C. */
3330
3331 static bool
3332 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3333 {
3334 unsigned i;
3335 slsr_cand_t basis = lookup_cand (c->basis);
3336 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3337
3338 /* If the basis doesn't dominate the PHI (including when the PHI is
3339 in the same block as the basis), we won't be able to create a PHI
3340 using the basis here. */
3341 basic_block basis_bb = gimple_bb (basis->cand_stmt);
3342 basic_block phi_bb = gimple_bb (phi);
3343
3344 if (phi_bb == basis_bb
3345 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3346 return false;
3347
3348 for (i = 0; i < gimple_phi_num_args (phi); i++)
3349 {
3350 /* If the PHI arg resides in a block not dominated by the basis,
3351 we won't be able to create a PHI using the basis here. */
3352 basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3353
3354 if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3355 return false;
3356
3357 tree arg = gimple_phi_arg_def (phi, i);
3358
3359 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3360 {
3361 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3362
3363 if (gimple_code (arg_def) == GIMPLE_PHI)
3364 {
3365 if (!all_phi_incrs_profitable (c, as_a <gphi *> (arg_def)))
3366 return false;
3367 }
3368 else
3369 {
3370 int j;
3371 slsr_cand_t arg_cand = base_cand_from_table (arg);
3372 widest_int increment = arg_cand->index - basis->index;
3373
3374 if (!address_arithmetic_p && wi::neg_p (increment))
3375 increment = -increment;
3376
3377 j = incr_vec_index (increment);
3378
3379 if (dump_file && (dump_flags & TDF_DETAILS))
3380 {
3381 fprintf (dump_file, " Conditional candidate %d, phi: ",
3382 c->cand_num);
3383 print_gimple_stmt (dump_file, phi, 0);
3384 fputs (" increment: ", dump_file);
3385 print_decs (increment, dump_file);
3386 if (j < 0)
3387 fprintf (dump_file,
3388 "\n Not replaced; incr_vec overflow.\n");
3389 else {
3390 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3391 if (profitable_increment_p (j))
3392 fputs (" Replacing...\n", dump_file);
3393 else
3394 fputs (" Not replaced.\n", dump_file);
3395 }
3396 }
3397
3398 if (j < 0 || !profitable_increment_p (j))
3399 return false;
3400 }
3401 }
3402 }
3403
3404 return true;
3405 }
3406
3407 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3408 type TO_TYPE, and insert it in front of the statement represented
3409 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3410 the new SSA name. */
3411
3412 static tree
3413 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3414 {
3415 tree cast_lhs;
3416 gassign *cast_stmt;
3417 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3418
3419 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3420 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3421 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3422 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3423
3424 if (dump_file && (dump_flags & TDF_DETAILS))
3425 {
3426 fputs (" Inserting: ", dump_file);
3427 print_gimple_stmt (dump_file, cast_stmt, 0);
3428 }
3429
3430 return cast_lhs;
3431 }
3432
3433 /* Replace the RHS of the statement represented by candidate C with
3434 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3435 leave C unchanged or just interchange its operands. The original
3436 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3437 If the replacement was made and we are doing a details dump,
3438 return the revised statement, else NULL. */
3439
3440 static gimple *
3441 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3442 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3443 slsr_cand_t c)
3444 {
3445 if (new_code != old_code
3446 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3447 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3448 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3449 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3450 {
3451 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3452 slsr_cand_t cc = c;
3453 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3454 update_stmt (gsi_stmt (gsi));
3455 c->cand_stmt = gsi_stmt (gsi);
3456 while (cc->next_interp)
3457 {
3458 cc = lookup_cand (cc->next_interp);
3459 cc->cand_stmt = gsi_stmt (gsi);
3460 }
3461
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 return gsi_stmt (gsi);
3464 }
3465
3466 else if (dump_file && (dump_flags & TDF_DETAILS))
3467 fputs (" (duplicate, not actually replacing)\n", dump_file);
3468
3469 return NULL;
3470 }
3471
3472 /* Strength-reduce the statement represented by candidate C by replacing
3473 it with an equivalent addition or subtraction. I is the index into
3474 the increment vector identifying C's increment. NEW_VAR is used to
3475 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3476 is the rhs1 to use in creating the add/subtract. */
3477
3478 static void
3479 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3480 {
3481 gimple *stmt_to_print = NULL;
3482 tree orig_rhs1, orig_rhs2;
3483 tree rhs2;
3484 enum tree_code orig_code, repl_code;
3485 widest_int cand_incr;
3486
3487 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3488 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3489 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3490 cand_incr = cand_increment (c);
3491
3492 if (dump_file && (dump_flags & TDF_DETAILS))
3493 {
3494 fputs ("Replacing: ", dump_file);
3495 print_gimple_stmt (dump_file, c->cand_stmt, 0);
3496 stmt_to_print = c->cand_stmt;
3497 }
3498
3499 if (address_arithmetic_p)
3500 repl_code = POINTER_PLUS_EXPR;
3501 else
3502 repl_code = PLUS_EXPR;
3503
3504 /* If the increment has an initializer T_0, replace the candidate
3505 statement with an add of the basis name and the initializer. */
3506 if (incr_vec[i].initializer)
3507 {
3508 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3509 tree orig_type = TREE_TYPE (orig_rhs2);
3510
3511 if (types_compatible_p (orig_type, init_type))
3512 rhs2 = incr_vec[i].initializer;
3513 else
3514 rhs2 = introduce_cast_before_cand (c, orig_type,
3515 incr_vec[i].initializer);
3516
3517 if (incr_vec[i].incr != cand_incr)
3518 {
3519 gcc_assert (repl_code == PLUS_EXPR);
3520 repl_code = MINUS_EXPR;
3521 }
3522
3523 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3524 orig_code, orig_rhs1, orig_rhs2,
3525 c);
3526 }
3527
3528 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3529 with a subtract of the stride from the basis name, a copy
3530 from the basis name, or an add of the stride to the basis
3531 name, respectively. It may be necessary to introduce a
3532 cast (or reuse an existing cast). */
3533 else if (cand_incr == 1)
3534 {
3535 tree stride_type = TREE_TYPE (c->stride);
3536 tree orig_type = TREE_TYPE (orig_rhs2);
3537
3538 if (types_compatible_p (orig_type, stride_type))
3539 rhs2 = c->stride;
3540 else
3541 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3542
3543 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3544 orig_code, orig_rhs1, orig_rhs2,
3545 c);
3546 }
3547
3548 else if (cand_incr == -1)
3549 {
3550 tree stride_type = TREE_TYPE (c->stride);
3551 tree orig_type = TREE_TYPE (orig_rhs2);
3552 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3553
3554 if (types_compatible_p (orig_type, stride_type))
3555 rhs2 = c->stride;
3556 else
3557 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3558
3559 if (orig_code != MINUS_EXPR
3560 || !operand_equal_p (basis_name, orig_rhs1, 0)
3561 || !operand_equal_p (rhs2, orig_rhs2, 0))
3562 {
3563 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3564 slsr_cand_t cc = c;
3565 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3566 update_stmt (gsi_stmt (gsi));
3567 c->cand_stmt = gsi_stmt (gsi);
3568 while (cc->next_interp)
3569 {
3570 cc = lookup_cand (cc->next_interp);
3571 cc->cand_stmt = gsi_stmt (gsi);
3572 }
3573
3574 if (dump_file && (dump_flags & TDF_DETAILS))
3575 stmt_to_print = gsi_stmt (gsi);
3576 }
3577 else if (dump_file && (dump_flags & TDF_DETAILS))
3578 fputs (" (duplicate, not actually replacing)\n", dump_file);
3579 }
3580
3581 else if (cand_incr == 0)
3582 {
3583 tree lhs = gimple_assign_lhs (c->cand_stmt);
3584 tree lhs_type = TREE_TYPE (lhs);
3585 tree basis_type = TREE_TYPE (basis_name);
3586
3587 if (types_compatible_p (lhs_type, basis_type))
3588 {
3589 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3590 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3591 slsr_cand_t cc = c;
3592 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3593 gsi_replace (&gsi, copy_stmt, false);
3594 c->cand_stmt = copy_stmt;
3595 while (cc->next_interp)
3596 {
3597 cc = lookup_cand (cc->next_interp);
3598 cc->cand_stmt = copy_stmt;
3599 }
3600
3601 if (dump_file && (dump_flags & TDF_DETAILS))
3602 stmt_to_print = copy_stmt;
3603 }
3604 else
3605 {
3606 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3607 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3608 slsr_cand_t cc = c;
3609 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3610 gsi_replace (&gsi, cast_stmt, false);
3611 c->cand_stmt = cast_stmt;
3612 while (cc->next_interp)
3613 {
3614 cc = lookup_cand (cc->next_interp);
3615 cc->cand_stmt = cast_stmt;
3616 }
3617
3618 if (dump_file && (dump_flags & TDF_DETAILS))
3619 stmt_to_print = cast_stmt;
3620 }
3621 }
3622 else
3623 gcc_unreachable ();
3624
3625 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3626 {
3627 fputs ("With: ", dump_file);
3628 print_gimple_stmt (dump_file, stmt_to_print, 0);
3629 fputs ("\n", dump_file);
3630 }
3631 }
3632
3633 /* For each candidate in the tree rooted at C, replace it with
3634 an increment if such has been shown to be profitable. */
3635
3636 static void
3637 replace_profitable_candidates (slsr_cand_t c)
3638 {
3639 if (!cand_already_replaced (c))
3640 {
3641 widest_int increment = cand_abs_increment (c);
3642 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3643 int i;
3644
3645 i = incr_vec_index (increment);
3646
3647 /* Only process profitable increments. Nothing useful can be done
3648 to a cast or copy. */
3649 if (i >= 0
3650 && profitable_increment_p (i)
3651 && orig_code != SSA_NAME
3652 && !CONVERT_EXPR_CODE_P (orig_code))
3653 {
3654 if (phi_dependent_cand_p (c))
3655 {
3656 gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3657
3658 if (all_phi_incrs_profitable (c, phi))
3659 {
3660 /* Look up the LHS SSA name from C's basis. This will be
3661 the RHS1 of the adds we will introduce to create new
3662 phi arguments. */
3663 slsr_cand_t basis = lookup_cand (c->basis);
3664 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3665
3666 /* Create a new phi statement that will represent C's true
3667 basis after the transformation is complete. */
3668 location_t loc = gimple_location (c->cand_stmt);
3669 tree name = create_phi_basis (c, phi, basis_name,
3670 loc, UNKNOWN_STRIDE);
3671
3672 /* Replace C with an add of the new basis phi and the
3673 increment. */
3674 replace_one_candidate (c, i, name);
3675 }
3676 }
3677 else
3678 {
3679 slsr_cand_t basis = lookup_cand (c->basis);
3680 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3681 replace_one_candidate (c, i, basis_name);
3682 }
3683 }
3684 }
3685
3686 if (c->sibling)
3687 replace_profitable_candidates (lookup_cand (c->sibling));
3688
3689 if (c->dependent)
3690 replace_profitable_candidates (lookup_cand (c->dependent));
3691 }
3692 \f
3693 /* Analyze costs of related candidates in the candidate vector,
3694 and make beneficial replacements. */
3695
3696 static void
3697 analyze_candidates_and_replace (void)
3698 {
3699 unsigned i;
3700 slsr_cand_t c;
3701
3702 /* Each candidate that has a null basis and a non-null
3703 dependent is the root of a tree of related statements.
3704 Analyze each tree to determine a subset of those
3705 statements that can be replaced with maximum benefit. */
3706 FOR_EACH_VEC_ELT (cand_vec, i, c)
3707 {
3708 slsr_cand_t first_dep;
3709
3710 if (c->basis != 0 || c->dependent == 0)
3711 continue;
3712
3713 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3715 c->cand_num);
3716
3717 first_dep = lookup_cand (c->dependent);
3718
3719 /* If this is a chain of CAND_REFs, unconditionally replace
3720 each of them with a strength-reduced data reference. */
3721 if (c->kind == CAND_REF)
3722 replace_refs (c);
3723
3724 /* If the common stride of all related candidates is a known
3725 constant, each candidate without a phi-dependence can be
3726 profitably replaced. Each replaces a multiply by a single
3727 add, with the possibility that a feeding add also goes dead.
3728 A candidate with a phi-dependence is replaced only if the
3729 compensation code it requires is offset by the strength
3730 reduction savings. */
3731 else if (TREE_CODE (c->stride) == INTEGER_CST)
3732 replace_uncond_cands_and_profitable_phis (first_dep);
3733
3734 /* When the stride is an SSA name, it may still be profitable
3735 to replace some or all of the dependent candidates, depending
3736 on whether the introduced increments can be reused, or are
3737 less expensive to calculate than the replaced statements. */
3738 else
3739 {
3740 machine_mode mode;
3741 bool speed;
3742
3743 /* Determine whether we'll be generating pointer arithmetic
3744 when replacing candidates. */
3745 address_arithmetic_p = (c->kind == CAND_ADD
3746 && POINTER_TYPE_P (c->cand_type));
3747
3748 /* If all candidates have already been replaced under other
3749 interpretations, nothing remains to be done. */
3750 if (!count_candidates (c))
3751 continue;
3752
3753 /* Construct an array of increments for this candidate chain. */
3754 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3755 incr_vec_len = 0;
3756 record_increments (c);
3757
3758 /* Determine which increments are profitable to replace. */
3759 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3760 speed = optimize_cands_for_speed_p (c);
3761 analyze_increments (first_dep, mode, speed);
3762
3763 /* Insert initializers of the form T_0 = stride * increment
3764 for use in profitable replacements. */
3765 insert_initializers (first_dep);
3766 dump_incr_vec ();
3767
3768 /* Perform the replacements. */
3769 replace_profitable_candidates (first_dep);
3770 free (incr_vec);
3771 }
3772 }
3773 }
3774
3775 namespace {
3776
3777 const pass_data pass_data_strength_reduction =
3778 {
3779 GIMPLE_PASS, /* type */
3780 "slsr", /* name */
3781 OPTGROUP_NONE, /* optinfo_flags */
3782 TV_GIMPLE_SLSR, /* tv_id */
3783 ( PROP_cfg | PROP_ssa ), /* properties_required */
3784 0, /* properties_provided */
3785 0, /* properties_destroyed */
3786 0, /* todo_flags_start */
3787 0, /* todo_flags_finish */
3788 };
3789
3790 class pass_strength_reduction : public gimple_opt_pass
3791 {
3792 public:
3793 pass_strength_reduction (gcc::context *ctxt)
3794 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3795 {}
3796
3797 /* opt_pass methods: */
3798 virtual bool gate (function *) { return flag_tree_slsr; }
3799 virtual unsigned int execute (function *);
3800
3801 }; // class pass_strength_reduction
3802
3803 unsigned
3804 pass_strength_reduction::execute (function *fun)
3805 {
3806 /* Create the obstack where candidates will reside. */
3807 gcc_obstack_init (&cand_obstack);
3808
3809 /* Allocate the candidate vector. */
3810 cand_vec.create (128);
3811
3812 /* Allocate the mapping from statements to candidate indices. */
3813 stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
3814
3815 /* Create the obstack where candidate chains will reside. */
3816 gcc_obstack_init (&chain_obstack);
3817
3818 /* Allocate the mapping from base expressions to candidate chains. */
3819 base_cand_map = new hash_table<cand_chain_hasher> (500);
3820
3821 /* Allocate the mapping from bases to alternative bases. */
3822 alt_base_map = new hash_map<tree, tree>;
3823
3824 /* Initialize the loop optimizer. We need to detect flow across
3825 back edges, and this gives us dominator information as well. */
3826 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3827
3828 /* Walk the CFG in predominator order looking for strength reduction
3829 candidates. */
3830 find_candidates_dom_walker (CDI_DOMINATORS)
3831 .walk (fun->cfg->x_entry_block_ptr);
3832
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3834 {
3835 dump_cand_vec ();
3836 dump_cand_chains ();
3837 }
3838
3839 delete alt_base_map;
3840 free_affine_expand_cache (&name_expansions);
3841
3842 /* Analyze costs and make appropriate replacements. */
3843 analyze_candidates_and_replace ();
3844
3845 loop_optimizer_finalize ();
3846 delete base_cand_map;
3847 base_cand_map = NULL;
3848 obstack_free (&chain_obstack, NULL);
3849 delete stmt_cand_map;
3850 cand_vec.release ();
3851 obstack_free (&cand_obstack, NULL);
3852
3853 return 0;
3854 }
3855
3856 } // anon namespace
3857
3858 gimple_opt_pass *
3859 make_pass_strength_reduction (gcc::context *ctxt)
3860 {
3861 return new pass_strength_reduction (ctxt);
3862 }