tree-ssa.h: Don't include gimple-low.h...
[gcc.git] / gcc / gimple-ssa-strength-reduction.c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "gimple.h"
41 #include "basic-block.h"
42 #include "tree-pass.h"
43 #include "cfgloop.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa.h"
46 #include "domwalk.h"
47 #include "pointer-set.h"
48 #include "expmed.h"
49 #include "params.h"
50 #include "hash-table.h"
51 #include "tree-ssa-address.h"
52 \f
53 /* Information about a strength reduction candidate. Each statement
54 in the candidate table represents an expression of one of the
55 following forms (the special case of CAND_REF will be described
56 later):
57
58 (CAND_MULT) S1: X = (B + i) * S
59 (CAND_ADD) S1: X = B + (i * S)
60
61 Here X and B are SSA names, i is an integer constant, and S is
62 either an SSA name or a constant. We call B the "base," i the
63 "index", and S the "stride."
64
65 Any statement S0 that dominates S1 and is of the form:
66
67 (CAND_MULT) S0: Y = (B + i') * S
68 (CAND_ADD) S0: Y = B + (i' * S)
69
70 is called a "basis" for S1. In both cases, S1 may be replaced by
71
72 S1': X = Y + (i - i') * S,
73
74 where (i - i') * S is folded to the extent possible.
75
76 All gimple statements are visited in dominator order, and each
77 statement that may contribute to one of the forms of S1 above is
78 given at least one entry in the candidate table. Such statements
79 include addition, pointer addition, subtraction, multiplication,
80 negation, copies, and nontrivial type casts. If a statement may
81 represent more than one expression of the forms of S1 above,
82 multiple "interpretations" are stored in the table and chained
83 together. Examples:
84
85 * An add of two SSA names may treat either operand as the base.
86 * A multiply of two SSA names, likewise.
87 * A copy or cast may be thought of as either a CAND_MULT with
88 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
89
90 Candidate records are allocated from an obstack. They are addressed
91 both from a hash table keyed on S1, and from a vector of candidate
92 pointers arranged in predominator order.
93
94 Opportunity note
95 ----------------
96 Currently we don't recognize:
97
98 S0: Y = (S * i') - B
99 S1: X = (S * i) - B
100
101 as a strength reduction opportunity, even though this S1 would
102 also be replaceable by the S1' above. This can be added if it
103 comes up in practice.
104
105 Strength reduction in addressing
106 --------------------------------
107 There is another kind of candidate known as CAND_REF. A CAND_REF
108 describes a statement containing a memory reference having
109 complex addressing that might benefit from strength reduction.
110 Specifically, we are interested in references for which
111 get_inner_reference returns a base address, offset, and bitpos as
112 follows:
113
114 base: MEM_REF (T1, C1)
115 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
116 bitpos: C4 * BITS_PER_UNIT
117
118 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
119 arbitrary integer constants. Note that C2 may be zero, in which
120 case the offset will be MULT_EXPR (T2, C3).
121
122 When this pattern is recognized, the original memory reference
123 can be replaced with:
124
125 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
126 C1 + (C2 * C3) + C4)
127
128 which distributes the multiply to allow constant folding. When
129 two or more addressing expressions can be represented by MEM_REFs
130 of this form, differing only in the constants C1, C2, and C4,
131 making this substitution produces more efficient addressing during
132 the RTL phases. When there are not at least two expressions with
133 the same values of T1, T2, and C3, there is nothing to be gained
134 by the replacement.
135
136 Strength reduction of CAND_REFs uses the same infrastructure as
137 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
138 field, MULT_EXPR (T2, C3) in the stride (S) field, and
139 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
140 is thus another CAND_REF with the same B and S values. When at
141 least two CAND_REFs are chained together using the basis relation,
142 each of them is replaced as above, resulting in improved code
143 generation for addressing.
144
145 Conditional candidates
146 ======================
147
148 Conditional candidates are best illustrated with an example.
149 Consider the code sequence:
150
151 (1) x_0 = ...;
152 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
153 if (...)
154 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
155 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
156 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
157 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
158
159 Here strength reduction is complicated by the uncertain value of x_2.
160 A legitimate transformation is:
161
162 (1) x_0 = ...;
163 (2) a_0 = x_0 * 5;
164 if (...)
165 {
166 (3) [x_1 = x_0 + 1;]
167 (3a) t_1 = a_0 + 5;
168 }
169 (4) [x_2 = PHI <x_0, x_1>;]
170 (4a) t_2 = PHI <a_0, t_1>;
171 (5) [x_3 = x_2 + 1;]
172 (6r) a_1 = t_2 + 5;
173
174 where the bracketed instructions may go dead.
175
176 To recognize this opportunity, we have to observe that statement (6)
177 has a "hidden basis" (2). The hidden basis is unlike a normal basis
178 in that the statement and the hidden basis have different base SSA
179 names (x_2 and x_0, respectively). The relationship is established
180 when a statement's base name (x_2) is defined by a phi statement (4),
181 each argument of which (x_0, x_1) has an identical "derived base name."
182 If the argument is defined by a candidate (as x_1 is by (3)) that is a
183 CAND_ADD having a stride of 1, the derived base name of the argument is
184 the base name of the candidate (x_0). Otherwise, the argument itself
185 is its derived base name (as is the case with argument x_0).
186
187 The hidden basis for statement (6) is the nearest dominating candidate
188 whose base name is the derived base name (x_0) of the feeding phi (4),
189 and whose stride is identical to that of the statement. We can then
190 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
191 allowing the final replacement of (6) by the strength-reduced (6r).
192
193 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
194 A CAND_PHI is not a candidate for replacement, but is maintained in the
195 candidate table to ease discovery of hidden bases. Any phi statement
196 whose arguments share a common derived base name is entered into the
197 table with the derived base name, an (arbitrary) index of zero, and a
198 stride of 1. A statement with a hidden basis can then be detected by
199 simply looking up its feeding phi definition in the candidate table,
200 extracting the derived base name, and searching for a basis in the
201 usual manner after substituting the derived base name.
202
203 Note that the transformation is only valid when the original phi and
204 the statements that define the phi's arguments are all at the same
205 position in the loop hierarchy. */
206
207
208 /* Index into the candidate vector, offset by 1. VECs are zero-based,
209 while cand_idx's are one-based, with zero indicating null. */
210 typedef unsigned cand_idx;
211
212 /* The kind of candidate. */
213 enum cand_kind
214 {
215 CAND_MULT,
216 CAND_ADD,
217 CAND_REF,
218 CAND_PHI
219 };
220
221 struct slsr_cand_d
222 {
223 /* The candidate statement S1. */
224 gimple cand_stmt;
225
226 /* The base expression B: often an SSA name, but not always. */
227 tree base_expr;
228
229 /* The stride S. */
230 tree stride;
231
232 /* The index constant i. */
233 double_int index;
234
235 /* The type of the candidate. This is normally the type of base_expr,
236 but casts may have occurred when combining feeding instructions.
237 A candidate can only be a basis for candidates of the same final type.
238 (For CAND_REFs, this is the type to be used for operand 1 of the
239 replacement MEM_REF.) */
240 tree cand_type;
241
242 /* The kind of candidate (CAND_MULT, etc.). */
243 enum cand_kind kind;
244
245 /* Index of this candidate in the candidate vector. */
246 cand_idx cand_num;
247
248 /* Index of the next candidate record for the same statement.
249 A statement may be useful in more than one way (e.g., due to
250 commutativity). So we can have multiple "interpretations"
251 of a statement. */
252 cand_idx next_interp;
253
254 /* Index of the basis statement S0, if any, in the candidate vector. */
255 cand_idx basis;
256
257 /* First candidate for which this candidate is a basis, if one exists. */
258 cand_idx dependent;
259
260 /* Next candidate having the same basis as this one. */
261 cand_idx sibling;
262
263 /* If this is a conditional candidate, the CAND_PHI candidate
264 that defines the base SSA name B. */
265 cand_idx def_phi;
266
267 /* Savings that can be expected from eliminating dead code if this
268 candidate is replaced. */
269 int dead_savings;
270 };
271
272 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
273 typedef const struct slsr_cand_d *const_slsr_cand_t;
274
275 /* Pointers to candidates are chained together as part of a mapping
276 from base expressions to the candidates that use them. */
277
278 struct cand_chain_d
279 {
280 /* Base expression for the chain of candidates: often, but not
281 always, an SSA name. */
282 tree base_expr;
283
284 /* Pointer to a candidate. */
285 slsr_cand_t cand;
286
287 /* Chain pointer. */
288 struct cand_chain_d *next;
289
290 };
291
292 typedef struct cand_chain_d cand_chain, *cand_chain_t;
293 typedef const struct cand_chain_d *const_cand_chain_t;
294
295 /* Information about a unique "increment" associated with candidates
296 having an SSA name for a stride. An increment is the difference
297 between the index of the candidate and the index of its basis,
298 i.e., (i - i') as discussed in the module commentary.
299
300 When we are not going to generate address arithmetic we treat
301 increments that differ only in sign as the same, allowing sharing
302 of the cost of initializers. The absolute value of the increment
303 is stored in the incr_info. */
304
305 struct incr_info_d
306 {
307 /* The increment that relates a candidate to its basis. */
308 double_int incr;
309
310 /* How many times the increment occurs in the candidate tree. */
311 unsigned count;
312
313 /* Cost of replacing candidates using this increment. Negative and
314 zero costs indicate replacement should be performed. */
315 int cost;
316
317 /* If this increment is profitable but is not -1, 0, or 1, it requires
318 an initializer T_0 = stride * incr to be found or introduced in the
319 nearest common dominator of all candidates. This field holds T_0
320 for subsequent use. */
321 tree initializer;
322
323 /* If the initializer was found to already exist, this is the block
324 where it was found. */
325 basic_block init_bb;
326 };
327
328 typedef struct incr_info_d incr_info, *incr_info_t;
329
330 /* Candidates are maintained in a vector. If candidate X dominates
331 candidate Y, then X appears before Y in the vector; but the
332 converse does not necessarily hold. */
333 static vec<slsr_cand_t> cand_vec;
334
335 enum cost_consts
336 {
337 COST_NEUTRAL = 0,
338 COST_INFINITE = 1000
339 };
340
341 enum stride_status
342 {
343 UNKNOWN_STRIDE = 0,
344 KNOWN_STRIDE = 1
345 };
346
347 enum phi_adjust_status
348 {
349 NOT_PHI_ADJUST = 0,
350 PHI_ADJUST = 1
351 };
352
353 enum count_phis_status
354 {
355 DONT_COUNT_PHIS = 0,
356 COUNT_PHIS = 1
357 };
358
359 /* Pointer map embodying a mapping from statements to candidates. */
360 static struct pointer_map_t *stmt_cand_map;
361
362 /* Obstack for candidates. */
363 static struct obstack cand_obstack;
364
365 /* Obstack for candidate chains. */
366 static struct obstack chain_obstack;
367
368 /* An array INCR_VEC of incr_infos is used during analysis of related
369 candidates having an SSA name for a stride. INCR_VEC_LEN describes
370 its current length. MAX_INCR_VEC_LEN is used to avoid costly
371 pathological cases. */
372 static incr_info_t incr_vec;
373 static unsigned incr_vec_len;
374 const int MAX_INCR_VEC_LEN = 16;
375
376 /* For a chain of candidates with unknown stride, indicates whether or not
377 we must generate pointer arithmetic when replacing statements. */
378 static bool address_arithmetic_p;
379
380 /* Forward function declarations. */
381 static slsr_cand_t base_cand_from_table (tree);
382 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
383 static bool legal_cast_p_1 (tree, tree);
384 \f
385 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
386
387 static slsr_cand_t
388 lookup_cand (cand_idx idx)
389 {
390 return cand_vec[idx - 1];
391 }
392
393 /* Helper for hashing a candidate chain header. */
394
395 struct cand_chain_hasher : typed_noop_remove <cand_chain>
396 {
397 typedef cand_chain value_type;
398 typedef cand_chain compare_type;
399 static inline hashval_t hash (const value_type *);
400 static inline bool equal (const value_type *, const compare_type *);
401 };
402
403 inline hashval_t
404 cand_chain_hasher::hash (const value_type *p)
405 {
406 tree base_expr = p->base_expr;
407 return iterative_hash_expr (base_expr, 0);
408 }
409
410 inline bool
411 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
412 {
413 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
414 }
415
416 /* Hash table embodying a mapping from base exprs to chains of candidates. */
417 static hash_table <cand_chain_hasher> base_cand_map;
418 \f
419 /* Look in the candidate table for a CAND_PHI that defines BASE and
420 return it if found; otherwise return NULL. */
421
422 static cand_idx
423 find_phi_def (tree base)
424 {
425 slsr_cand_t c;
426
427 if (TREE_CODE (base) != SSA_NAME)
428 return 0;
429
430 c = base_cand_from_table (base);
431
432 if (!c || c->kind != CAND_PHI)
433 return 0;
434
435 return c->cand_num;
436 }
437
438 /* Helper routine for find_basis_for_candidate. May be called twice:
439 once for the candidate's base expr, and optionally again for the
440 candidate's phi definition. */
441
442 static slsr_cand_t
443 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
444 {
445 cand_chain mapping_key;
446 cand_chain_t chain;
447 slsr_cand_t basis = NULL;
448
449 // Limit potential of N^2 behavior for long candidate chains.
450 int iters = 0;
451 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
452
453 mapping_key.base_expr = base_expr;
454 chain = base_cand_map.find (&mapping_key);
455
456 for (; chain && iters < max_iters; chain = chain->next, ++iters)
457 {
458 slsr_cand_t one_basis = chain->cand;
459
460 if (one_basis->kind != c->kind
461 || one_basis->cand_stmt == c->cand_stmt
462 || !operand_equal_p (one_basis->stride, c->stride, 0)
463 || !types_compatible_p (one_basis->cand_type, c->cand_type)
464 || !dominated_by_p (CDI_DOMINATORS,
465 gimple_bb (c->cand_stmt),
466 gimple_bb (one_basis->cand_stmt)))
467 continue;
468
469 if (!basis || basis->cand_num < one_basis->cand_num)
470 basis = one_basis;
471 }
472
473 return basis;
474 }
475
476 /* Use the base expr from candidate C to look for possible candidates
477 that can serve as a basis for C. Each potential basis must also
478 appear in a block that dominates the candidate statement and have
479 the same stride and type. If more than one possible basis exists,
480 the one with highest index in the vector is chosen; this will be
481 the most immediately dominating basis. */
482
483 static int
484 find_basis_for_candidate (slsr_cand_t c)
485 {
486 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
487
488 /* If a candidate doesn't have a basis using its base expression,
489 it may have a basis hidden by one or more intervening phis. */
490 if (!basis && c->def_phi)
491 {
492 basic_block basis_bb, phi_bb;
493 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
494 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
495
496 if (basis)
497 {
498 /* A hidden basis must dominate the phi-definition of the
499 candidate's base name. */
500 phi_bb = gimple_bb (phi_cand->cand_stmt);
501 basis_bb = gimple_bb (basis->cand_stmt);
502
503 if (phi_bb == basis_bb
504 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
505 {
506 basis = NULL;
507 c->basis = 0;
508 }
509
510 /* If we found a hidden basis, estimate additional dead-code
511 savings if the phi and its feeding statements can be removed. */
512 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
513 c->dead_savings += phi_cand->dead_savings;
514 }
515 }
516
517 if (basis)
518 {
519 c->sibling = basis->dependent;
520 basis->dependent = c->cand_num;
521 return basis->cand_num;
522 }
523
524 return 0;
525 }
526
527 /* Record a mapping from the base expression of C to C itself, indicating that
528 C may potentially serve as a basis using that base expression. */
529
530 static void
531 record_potential_basis (slsr_cand_t c)
532 {
533 cand_chain_t node;
534 cand_chain **slot;
535
536 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
537 node->base_expr = c->base_expr;
538 node->cand = c;
539 node->next = NULL;
540 slot = base_cand_map.find_slot (node, INSERT);
541
542 if (*slot)
543 {
544 cand_chain_t head = (cand_chain_t) (*slot);
545 node->next = head->next;
546 head->next = node;
547 }
548 else
549 *slot = node;
550 }
551
552 /* Allocate storage for a new candidate and initialize its fields.
553 Attempt to find a basis for the candidate. */
554
555 static slsr_cand_t
556 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
557 double_int index, tree stride, tree ctype,
558 unsigned savings)
559 {
560 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
561 sizeof (slsr_cand));
562 c->cand_stmt = gs;
563 c->base_expr = base;
564 c->stride = stride;
565 c->index = index;
566 c->cand_type = ctype;
567 c->kind = kind;
568 c->cand_num = cand_vec.length () + 1;
569 c->next_interp = 0;
570 c->dependent = 0;
571 c->sibling = 0;
572 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
573 c->dead_savings = savings;
574
575 cand_vec.safe_push (c);
576
577 if (kind == CAND_PHI)
578 c->basis = 0;
579 else
580 c->basis = find_basis_for_candidate (c);
581
582 record_potential_basis (c);
583
584 return c;
585 }
586
587 /* Determine the target cost of statement GS when compiling according
588 to SPEED. */
589
590 static int
591 stmt_cost (gimple gs, bool speed)
592 {
593 tree lhs, rhs1, rhs2;
594 enum machine_mode lhs_mode;
595
596 gcc_assert (is_gimple_assign (gs));
597 lhs = gimple_assign_lhs (gs);
598 rhs1 = gimple_assign_rhs1 (gs);
599 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
600
601 switch (gimple_assign_rhs_code (gs))
602 {
603 case MULT_EXPR:
604 rhs2 = gimple_assign_rhs2 (gs);
605
606 if (host_integerp (rhs2, 0))
607 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
608
609 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
610 return mul_cost (speed, lhs_mode);
611
612 case PLUS_EXPR:
613 case POINTER_PLUS_EXPR:
614 case MINUS_EXPR:
615 return add_cost (speed, lhs_mode);
616
617 case NEGATE_EXPR:
618 return neg_cost (speed, lhs_mode);
619
620 case NOP_EXPR:
621 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
622
623 /* Note that we don't assign costs to copies that in most cases
624 will go away. */
625 default:
626 ;
627 }
628
629 gcc_unreachable ();
630 return 0;
631 }
632
633 /* Look up the defining statement for BASE_IN and return a pointer
634 to its candidate in the candidate table, if any; otherwise NULL.
635 Only CAND_ADD and CAND_MULT candidates are returned. */
636
637 static slsr_cand_t
638 base_cand_from_table (tree base_in)
639 {
640 slsr_cand_t *result;
641
642 gimple def = SSA_NAME_DEF_STMT (base_in);
643 if (!def)
644 return (slsr_cand_t) NULL;
645
646 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
647
648 if (result && (*result)->kind != CAND_REF)
649 return *result;
650
651 return (slsr_cand_t) NULL;
652 }
653
654 /* Add an entry to the statement-to-candidate mapping. */
655
656 static void
657 add_cand_for_stmt (gimple gs, slsr_cand_t c)
658 {
659 void **slot = pointer_map_insert (stmt_cand_map, gs);
660 gcc_assert (!*slot);
661 *slot = c;
662 }
663 \f
664 /* Given PHI which contains a phi statement, determine whether it
665 satisfies all the requirements of a phi candidate. If so, create
666 a candidate. Note that a CAND_PHI never has a basis itself, but
667 is used to help find a basis for subsequent candidates. */
668
669 static void
670 slsr_process_phi (gimple phi, bool speed)
671 {
672 unsigned i;
673 tree arg0_base = NULL_TREE, base_type;
674 slsr_cand_t c;
675 struct loop *cand_loop = gimple_bb (phi)->loop_father;
676 unsigned savings = 0;
677
678 /* A CAND_PHI requires each of its arguments to have the same
679 derived base name. (See the module header commentary for a
680 definition of derived base names.) Furthermore, all feeding
681 definitions must be in the same position in the loop hierarchy
682 as PHI. */
683
684 for (i = 0; i < gimple_phi_num_args (phi); i++)
685 {
686 slsr_cand_t arg_cand;
687 tree arg = gimple_phi_arg_def (phi, i);
688 tree derived_base_name = NULL_TREE;
689 gimple arg_stmt = NULL;
690 basic_block arg_bb = NULL;
691
692 if (TREE_CODE (arg) != SSA_NAME)
693 return;
694
695 arg_cand = base_cand_from_table (arg);
696
697 if (arg_cand)
698 {
699 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
700 {
701 if (!arg_cand->next_interp)
702 return;
703
704 arg_cand = lookup_cand (arg_cand->next_interp);
705 }
706
707 if (!integer_onep (arg_cand->stride))
708 return;
709
710 derived_base_name = arg_cand->base_expr;
711 arg_stmt = arg_cand->cand_stmt;
712 arg_bb = gimple_bb (arg_stmt);
713
714 /* Gather potential dead code savings if the phi statement
715 can be removed later on. */
716 if (has_single_use (arg))
717 {
718 if (gimple_code (arg_stmt) == GIMPLE_PHI)
719 savings += arg_cand->dead_savings;
720 else
721 savings += stmt_cost (arg_stmt, speed);
722 }
723 }
724 else
725 {
726 derived_base_name = arg;
727
728 if (SSA_NAME_IS_DEFAULT_DEF (arg))
729 arg_bb = single_succ (ENTRY_BLOCK_PTR);
730 else
731 gimple_bb (SSA_NAME_DEF_STMT (arg));
732 }
733
734 if (!arg_bb || arg_bb->loop_father != cand_loop)
735 return;
736
737 if (i == 0)
738 arg0_base = derived_base_name;
739 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
740 return;
741 }
742
743 /* Create the candidate. "alloc_cand_and_find_basis" is named
744 misleadingly for this case, as no basis will be sought for a
745 CAND_PHI. */
746 base_type = TREE_TYPE (arg0_base);
747
748 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base, double_int_zero,
749 integer_one_node, base_type, savings);
750
751 /* Add the candidate to the statement-candidate mapping. */
752 add_cand_for_stmt (phi, c);
753 }
754
755 /* Given PBASE which is a pointer to tree, look up the defining
756 statement for it and check whether the candidate is in the
757 form of:
758
759 X = B + (1 * S), S is integer constant
760 X = B + (i * S), S is integer one
761
762 If so, set PBASE to the candidate's base_expr and return double
763 int (i * S).
764 Otherwise, just return double int zero. */
765
766 static double_int
767 backtrace_base_for_ref (tree *pbase)
768 {
769 tree base_in = *pbase;
770 slsr_cand_t base_cand;
771
772 STRIP_NOPS (base_in);
773
774 /* Strip off widening conversion(s) to handle cases where
775 e.g. 'B' is widened from an 'int' in order to calculate
776 a 64-bit address. */
777 if (CONVERT_EXPR_P (base_in)
778 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0)))
779 base_in = get_unwidened (base_in, NULL_TREE);
780
781 if (TREE_CODE (base_in) != SSA_NAME)
782 return tree_to_double_int (integer_zero_node);
783
784 base_cand = base_cand_from_table (base_in);
785
786 while (base_cand && base_cand->kind != CAND_PHI)
787 {
788 if (base_cand->kind == CAND_ADD
789 && base_cand->index.is_one ()
790 && TREE_CODE (base_cand->stride) == INTEGER_CST)
791 {
792 /* X = B + (1 * S), S is integer constant. */
793 *pbase = base_cand->base_expr;
794 return tree_to_double_int (base_cand->stride);
795 }
796 else if (base_cand->kind == CAND_ADD
797 && TREE_CODE (base_cand->stride) == INTEGER_CST
798 && integer_onep (base_cand->stride))
799 {
800 /* X = B + (i * S), S is integer one. */
801 *pbase = base_cand->base_expr;
802 return base_cand->index;
803 }
804
805 if (base_cand->next_interp)
806 base_cand = lookup_cand (base_cand->next_interp);
807 else
808 base_cand = NULL;
809 }
810
811 return tree_to_double_int (integer_zero_node);
812 }
813
814 /* Look for the following pattern:
815
816 *PBASE: MEM_REF (T1, C1)
817
818 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
819 or
820 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
821 or
822 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
823
824 *PINDEX: C4 * BITS_PER_UNIT
825
826 If not present, leave the input values unchanged and return FALSE.
827 Otherwise, modify the input values as follows and return TRUE:
828
829 *PBASE: T1
830 *POFFSET: MULT_EXPR (T2, C3)
831 *PINDEX: C1 + (C2 * C3) + C4
832
833 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
834 will be further restructured to:
835
836 *PBASE: T1
837 *POFFSET: MULT_EXPR (T2', C3)
838 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
839
840 static bool
841 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
842 tree *ptype)
843 {
844 tree base = *pbase, offset = *poffset;
845 double_int index = *pindex;
846 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
847 tree mult_op0, mult_op1, t1, t2, type;
848 double_int c1, c2, c3, c4, c5;
849
850 if (!base
851 || !offset
852 || TREE_CODE (base) != MEM_REF
853 || TREE_CODE (offset) != MULT_EXPR
854 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
855 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
856 return false;
857
858 t1 = TREE_OPERAND (base, 0);
859 c1 = mem_ref_offset (base);
860 type = TREE_TYPE (TREE_OPERAND (base, 1));
861
862 mult_op0 = TREE_OPERAND (offset, 0);
863 mult_op1 = TREE_OPERAND (offset, 1);
864
865 c3 = tree_to_double_int (mult_op1);
866
867 if (TREE_CODE (mult_op0) == PLUS_EXPR)
868
869 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
870 {
871 t2 = TREE_OPERAND (mult_op0, 0);
872 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
873 }
874 else
875 return false;
876
877 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
878
879 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
880 {
881 t2 = TREE_OPERAND (mult_op0, 0);
882 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
883 }
884 else
885 return false;
886
887 else
888 {
889 t2 = mult_op0;
890 c2 = double_int_zero;
891 }
892
893 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
894 c5 = backtrace_base_for_ref (&t2);
895
896 *pbase = t1;
897 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
898 double_int_to_tree (sizetype, c3));
899 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
900 *ptype = type;
901
902 return true;
903 }
904
905 /* Given GS which contains a data reference, create a CAND_REF entry in
906 the candidate table and attempt to find a basis. */
907
908 static void
909 slsr_process_ref (gimple gs)
910 {
911 tree ref_expr, base, offset, type;
912 HOST_WIDE_INT bitsize, bitpos;
913 enum machine_mode mode;
914 int unsignedp, volatilep;
915 double_int index;
916 slsr_cand_t c;
917
918 if (gimple_vdef (gs))
919 ref_expr = gimple_assign_lhs (gs);
920 else
921 ref_expr = gimple_assign_rhs1 (gs);
922
923 if (!handled_component_p (ref_expr)
924 || TREE_CODE (ref_expr) == BIT_FIELD_REF
925 || (TREE_CODE (ref_expr) == COMPONENT_REF
926 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
927 return;
928
929 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
930 &unsignedp, &volatilep, false);
931 index = double_int::from_uhwi (bitpos);
932
933 if (!restructure_reference (&base, &offset, &index, &type))
934 return;
935
936 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
937 type, 0);
938
939 /* Add the candidate to the statement-candidate mapping. */
940 add_cand_for_stmt (gs, c);
941 }
942
943 /* Create a candidate entry for a statement GS, where GS multiplies
944 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
945 about the two SSA names into the new candidate. Return the new
946 candidate. */
947
948 static slsr_cand_t
949 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
950 {
951 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
952 double_int index;
953 unsigned savings = 0;
954 slsr_cand_t c;
955 slsr_cand_t base_cand = base_cand_from_table (base_in);
956
957 /* Look at all interpretations of the base candidate, if necessary,
958 to find information to propagate into this candidate. */
959 while (base_cand && !base && base_cand->kind != CAND_PHI)
960 {
961
962 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
963 {
964 /* Y = (B + i') * 1
965 X = Y * Z
966 ================
967 X = (B + i') * Z */
968 base = base_cand->base_expr;
969 index = base_cand->index;
970 stride = stride_in;
971 ctype = base_cand->cand_type;
972 if (has_single_use (base_in))
973 savings = (base_cand->dead_savings
974 + stmt_cost (base_cand->cand_stmt, speed));
975 }
976 else if (base_cand->kind == CAND_ADD
977 && TREE_CODE (base_cand->stride) == INTEGER_CST)
978 {
979 /* Y = B + (i' * S), S constant
980 X = Y * Z
981 ============================
982 X = B + ((i' * S) * Z) */
983 base = base_cand->base_expr;
984 index = base_cand->index * tree_to_double_int (base_cand->stride);
985 stride = stride_in;
986 ctype = base_cand->cand_type;
987 if (has_single_use (base_in))
988 savings = (base_cand->dead_savings
989 + stmt_cost (base_cand->cand_stmt, speed));
990 }
991
992 if (base_cand->next_interp)
993 base_cand = lookup_cand (base_cand->next_interp);
994 else
995 base_cand = NULL;
996 }
997
998 if (!base)
999 {
1000 /* No interpretations had anything useful to propagate, so
1001 produce X = (Y + 0) * Z. */
1002 base = base_in;
1003 index = double_int_zero;
1004 stride = stride_in;
1005 ctype = TREE_TYPE (base_in);
1006 }
1007
1008 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1009 ctype, savings);
1010 return c;
1011 }
1012
1013 /* Create a candidate entry for a statement GS, where GS multiplies
1014 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1015 information about BASE_IN into the new candidate. Return the new
1016 candidate. */
1017
1018 static slsr_cand_t
1019 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1020 {
1021 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1022 double_int index, temp;
1023 unsigned savings = 0;
1024 slsr_cand_t c;
1025 slsr_cand_t base_cand = base_cand_from_table (base_in);
1026
1027 /* Look at all interpretations of the base candidate, if necessary,
1028 to find information to propagate into this candidate. */
1029 while (base_cand && !base && base_cand->kind != CAND_PHI)
1030 {
1031 if (base_cand->kind == CAND_MULT
1032 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1033 {
1034 /* Y = (B + i') * S, S constant
1035 X = Y * c
1036 ============================
1037 X = (B + i') * (S * c) */
1038 base = base_cand->base_expr;
1039 index = base_cand->index;
1040 temp = tree_to_double_int (base_cand->stride)
1041 * tree_to_double_int (stride_in);
1042 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
1043 ctype = base_cand->cand_type;
1044 if (has_single_use (base_in))
1045 savings = (base_cand->dead_savings
1046 + stmt_cost (base_cand->cand_stmt, speed));
1047 }
1048 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1049 {
1050 /* Y = B + (i' * 1)
1051 X = Y * c
1052 ===========================
1053 X = (B + i') * c */
1054 base = base_cand->base_expr;
1055 index = base_cand->index;
1056 stride = stride_in;
1057 ctype = base_cand->cand_type;
1058 if (has_single_use (base_in))
1059 savings = (base_cand->dead_savings
1060 + stmt_cost (base_cand->cand_stmt, speed));
1061 }
1062 else if (base_cand->kind == CAND_ADD
1063 && base_cand->index.is_one ()
1064 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1065 {
1066 /* Y = B + (1 * S), S constant
1067 X = Y * c
1068 ===========================
1069 X = (B + S) * c */
1070 base = base_cand->base_expr;
1071 index = tree_to_double_int (base_cand->stride);
1072 stride = stride_in;
1073 ctype = base_cand->cand_type;
1074 if (has_single_use (base_in))
1075 savings = (base_cand->dead_savings
1076 + stmt_cost (base_cand->cand_stmt, speed));
1077 }
1078
1079 if (base_cand->next_interp)
1080 base_cand = lookup_cand (base_cand->next_interp);
1081 else
1082 base_cand = NULL;
1083 }
1084
1085 if (!base)
1086 {
1087 /* No interpretations had anything useful to propagate, so
1088 produce X = (Y + 0) * c. */
1089 base = base_in;
1090 index = double_int_zero;
1091 stride = stride_in;
1092 ctype = TREE_TYPE (base_in);
1093 }
1094
1095 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1096 ctype, savings);
1097 return c;
1098 }
1099
1100 /* Given GS which is a multiply of scalar integers, make an appropriate
1101 entry in the candidate table. If this is a multiply of two SSA names,
1102 create two CAND_MULT interpretations and attempt to find a basis for
1103 each of them. Otherwise, create a single CAND_MULT and attempt to
1104 find a basis. */
1105
1106 static void
1107 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1108 {
1109 slsr_cand_t c, c2;
1110
1111 /* If this is a multiply of an SSA name with itself, it is highly
1112 unlikely that we will get a strength reduction opportunity, so
1113 don't record it as a candidate. This simplifies the logic for
1114 finding a basis, so if this is removed that must be considered. */
1115 if (rhs1 == rhs2)
1116 return;
1117
1118 if (TREE_CODE (rhs2) == SSA_NAME)
1119 {
1120 /* Record an interpretation of this statement in the candidate table
1121 assuming RHS1 is the base expression and RHS2 is the stride. */
1122 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1123
1124 /* Add the first interpretation to the statement-candidate mapping. */
1125 add_cand_for_stmt (gs, c);
1126
1127 /* Record another interpretation of this statement assuming RHS1
1128 is the stride and RHS2 is the base expression. */
1129 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1130 c->next_interp = c2->cand_num;
1131 }
1132 else
1133 {
1134 /* Record an interpretation for the multiply-immediate. */
1135 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1136
1137 /* Add the interpretation to the statement-candidate mapping. */
1138 add_cand_for_stmt (gs, c);
1139 }
1140 }
1141
1142 /* Create a candidate entry for a statement GS, where GS adds two
1143 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1144 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1145 information about the two SSA names into the new candidate.
1146 Return the new candidate. */
1147
1148 static slsr_cand_t
1149 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1150 bool subtract_p, bool speed)
1151 {
1152 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1153 double_int index;
1154 unsigned savings = 0;
1155 slsr_cand_t c;
1156 slsr_cand_t base_cand = base_cand_from_table (base_in);
1157 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1158
1159 /* The most useful transformation is a multiply-immediate feeding
1160 an add or subtract. Look for that first. */
1161 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1162 {
1163 if (addend_cand->kind == CAND_MULT
1164 && addend_cand->index.is_zero ()
1165 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1166 {
1167 /* Z = (B + 0) * S, S constant
1168 X = Y +/- Z
1169 ===========================
1170 X = Y + ((+/-1 * S) * B) */
1171 base = base_in;
1172 index = tree_to_double_int (addend_cand->stride);
1173 if (subtract_p)
1174 index = -index;
1175 stride = addend_cand->base_expr;
1176 ctype = TREE_TYPE (base_in);
1177 if (has_single_use (addend_in))
1178 savings = (addend_cand->dead_savings
1179 + stmt_cost (addend_cand->cand_stmt, speed));
1180 }
1181
1182 if (addend_cand->next_interp)
1183 addend_cand = lookup_cand (addend_cand->next_interp);
1184 else
1185 addend_cand = NULL;
1186 }
1187
1188 while (base_cand && !base && base_cand->kind != CAND_PHI)
1189 {
1190 if (base_cand->kind == CAND_ADD
1191 && (base_cand->index.is_zero ()
1192 || operand_equal_p (base_cand->stride,
1193 integer_zero_node, 0)))
1194 {
1195 /* Y = B + (i' * S), i' * S = 0
1196 X = Y +/- Z
1197 ============================
1198 X = B + (+/-1 * Z) */
1199 base = base_cand->base_expr;
1200 index = subtract_p ? double_int_minus_one : double_int_one;
1201 stride = addend_in;
1202 ctype = base_cand->cand_type;
1203 if (has_single_use (base_in))
1204 savings = (base_cand->dead_savings
1205 + stmt_cost (base_cand->cand_stmt, speed));
1206 }
1207 else if (subtract_p)
1208 {
1209 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1210
1211 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1212 {
1213 if (subtrahend_cand->kind == CAND_MULT
1214 && subtrahend_cand->index.is_zero ()
1215 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1216 {
1217 /* Z = (B + 0) * S, S constant
1218 X = Y - Z
1219 ===========================
1220 Value: X = Y + ((-1 * S) * B) */
1221 base = base_in;
1222 index = tree_to_double_int (subtrahend_cand->stride);
1223 index = -index;
1224 stride = subtrahend_cand->base_expr;
1225 ctype = TREE_TYPE (base_in);
1226 if (has_single_use (addend_in))
1227 savings = (subtrahend_cand->dead_savings
1228 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1229 }
1230
1231 if (subtrahend_cand->next_interp)
1232 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1233 else
1234 subtrahend_cand = NULL;
1235 }
1236 }
1237
1238 if (base_cand->next_interp)
1239 base_cand = lookup_cand (base_cand->next_interp);
1240 else
1241 base_cand = NULL;
1242 }
1243
1244 if (!base)
1245 {
1246 /* No interpretations had anything useful to propagate, so
1247 produce X = Y + (1 * Z). */
1248 base = base_in;
1249 index = subtract_p ? double_int_minus_one : double_int_one;
1250 stride = addend_in;
1251 ctype = TREE_TYPE (base_in);
1252 }
1253
1254 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1255 ctype, savings);
1256 return c;
1257 }
1258
1259 /* Create a candidate entry for a statement GS, where GS adds SSA
1260 name BASE_IN to constant INDEX_IN. Propagate any known information
1261 about BASE_IN into the new candidate. Return the new candidate. */
1262
1263 static slsr_cand_t
1264 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
1265 {
1266 enum cand_kind kind = CAND_ADD;
1267 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1268 double_int index, multiple;
1269 unsigned savings = 0;
1270 slsr_cand_t c;
1271 slsr_cand_t base_cand = base_cand_from_table (base_in);
1272
1273 while (base_cand && !base && base_cand->kind != CAND_PHI)
1274 {
1275 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
1276
1277 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1278 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
1279 unsigned_p, &multiple))
1280 {
1281 /* Y = (B + i') * S, S constant, c = kS for some integer k
1282 X = Y + c
1283 ============================
1284 X = (B + (i'+ k)) * S
1285 OR
1286 Y = B + (i' * S), S constant, c = kS for some integer k
1287 X = Y + c
1288 ============================
1289 X = (B + (i'+ k)) * S */
1290 kind = base_cand->kind;
1291 base = base_cand->base_expr;
1292 index = base_cand->index + multiple;
1293 stride = base_cand->stride;
1294 ctype = base_cand->cand_type;
1295 if (has_single_use (base_in))
1296 savings = (base_cand->dead_savings
1297 + stmt_cost (base_cand->cand_stmt, speed));
1298 }
1299
1300 if (base_cand->next_interp)
1301 base_cand = lookup_cand (base_cand->next_interp);
1302 else
1303 base_cand = NULL;
1304 }
1305
1306 if (!base)
1307 {
1308 /* No interpretations had anything useful to propagate, so
1309 produce X = Y + (c * 1). */
1310 kind = CAND_ADD;
1311 base = base_in;
1312 index = index_in;
1313 stride = integer_one_node;
1314 ctype = TREE_TYPE (base_in);
1315 }
1316
1317 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1318 ctype, savings);
1319 return c;
1320 }
1321
1322 /* Given GS which is an add or subtract of scalar integers or pointers,
1323 make at least one appropriate entry in the candidate table. */
1324
1325 static void
1326 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1327 {
1328 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1329 slsr_cand_t c = NULL, c2;
1330
1331 if (TREE_CODE (rhs2) == SSA_NAME)
1332 {
1333 /* First record an interpretation assuming RHS1 is the base expression
1334 and RHS2 is the stride. But it doesn't make sense for the
1335 stride to be a pointer, so don't record a candidate in that case. */
1336 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1337 {
1338 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1339
1340 /* Add the first interpretation to the statement-candidate
1341 mapping. */
1342 add_cand_for_stmt (gs, c);
1343 }
1344
1345 /* If the two RHS operands are identical, or this is a subtract,
1346 we're done. */
1347 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1348 return;
1349
1350 /* Otherwise, record another interpretation assuming RHS2 is the
1351 base expression and RHS1 is the stride, again provided that the
1352 stride is not a pointer. */
1353 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1354 {
1355 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1356 if (c)
1357 c->next_interp = c2->cand_num;
1358 else
1359 add_cand_for_stmt (gs, c2);
1360 }
1361 }
1362 else
1363 {
1364 double_int index;
1365
1366 /* Record an interpretation for the add-immediate. */
1367 index = tree_to_double_int (rhs2);
1368 if (subtract_p)
1369 index = -index;
1370
1371 c = create_add_imm_cand (gs, rhs1, index, speed);
1372
1373 /* Add the interpretation to the statement-candidate mapping. */
1374 add_cand_for_stmt (gs, c);
1375 }
1376 }
1377
1378 /* Given GS which is a negate of a scalar integer, make an appropriate
1379 entry in the candidate table. A negate is equivalent to a multiply
1380 by -1. */
1381
1382 static void
1383 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1384 {
1385 /* Record a CAND_MULT interpretation for the multiply by -1. */
1386 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1387
1388 /* Add the interpretation to the statement-candidate mapping. */
1389 add_cand_for_stmt (gs, c);
1390 }
1391
1392 /* Help function for legal_cast_p, operating on two trees. Checks
1393 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1394 for more details. */
1395
1396 static bool
1397 legal_cast_p_1 (tree lhs, tree rhs)
1398 {
1399 tree lhs_type, rhs_type;
1400 unsigned lhs_size, rhs_size;
1401 bool lhs_wraps, rhs_wraps;
1402
1403 lhs_type = TREE_TYPE (lhs);
1404 rhs_type = TREE_TYPE (rhs);
1405 lhs_size = TYPE_PRECISION (lhs_type);
1406 rhs_size = TYPE_PRECISION (rhs_type);
1407 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1408 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1409
1410 if (lhs_size < rhs_size
1411 || (rhs_wraps && !lhs_wraps)
1412 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1413 return false;
1414
1415 return true;
1416 }
1417
1418 /* Return TRUE if GS is a statement that defines an SSA name from
1419 a conversion and is legal for us to combine with an add and multiply
1420 in the candidate table. For example, suppose we have:
1421
1422 A = B + i;
1423 C = (type) A;
1424 D = C * S;
1425
1426 Without the type-cast, we would create a CAND_MULT for D with base B,
1427 index i, and stride S. We want to record this candidate only if it
1428 is equivalent to apply the type cast following the multiply:
1429
1430 A = B + i;
1431 E = A * S;
1432 D = (type) E;
1433
1434 We will record the type with the candidate for D. This allows us
1435 to use a similar previous candidate as a basis. If we have earlier seen
1436
1437 A' = B + i';
1438 C' = (type) A';
1439 D' = C' * S;
1440
1441 we can replace D with
1442
1443 D = D' + (i - i') * S;
1444
1445 But if moving the type-cast would change semantics, we mustn't do this.
1446
1447 This is legitimate for casts from a non-wrapping integral type to
1448 any integral type of the same or larger size. It is not legitimate
1449 to convert a wrapping type to a non-wrapping type, or to a wrapping
1450 type of a different size. I.e., with a wrapping type, we must
1451 assume that the addition B + i could wrap, in which case performing
1452 the multiply before or after one of the "illegal" type casts will
1453 have different semantics. */
1454
1455 static bool
1456 legal_cast_p (gimple gs, tree rhs)
1457 {
1458 if (!is_gimple_assign (gs)
1459 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1460 return false;
1461
1462 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1463 }
1464
1465 /* Given GS which is a cast to a scalar integer type, determine whether
1466 the cast is legal for strength reduction. If so, make at least one
1467 appropriate entry in the candidate table. */
1468
1469 static void
1470 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1471 {
1472 tree lhs, ctype;
1473 slsr_cand_t base_cand, c, c2;
1474 unsigned savings = 0;
1475
1476 if (!legal_cast_p (gs, rhs1))
1477 return;
1478
1479 lhs = gimple_assign_lhs (gs);
1480 base_cand = base_cand_from_table (rhs1);
1481 ctype = TREE_TYPE (lhs);
1482
1483 if (base_cand && base_cand->kind != CAND_PHI)
1484 {
1485 while (base_cand)
1486 {
1487 /* Propagate all data from the base candidate except the type,
1488 which comes from the cast, and the base candidate's cast,
1489 which is no longer applicable. */
1490 if (has_single_use (rhs1))
1491 savings = (base_cand->dead_savings
1492 + stmt_cost (base_cand->cand_stmt, speed));
1493
1494 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1495 base_cand->base_expr,
1496 base_cand->index, base_cand->stride,
1497 ctype, savings);
1498 if (base_cand->next_interp)
1499 base_cand = lookup_cand (base_cand->next_interp);
1500 else
1501 base_cand = NULL;
1502 }
1503 }
1504 else
1505 {
1506 /* If nothing is known about the RHS, create fresh CAND_ADD and
1507 CAND_MULT interpretations:
1508
1509 X = Y + (0 * 1)
1510 X = (Y + 0) * 1
1511
1512 The first of these is somewhat arbitrary, but the choice of
1513 1 for the stride simplifies the logic for propagating casts
1514 into their uses. */
1515 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1516 integer_one_node, ctype, 0);
1517 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1518 integer_one_node, ctype, 0);
1519 c->next_interp = c2->cand_num;
1520 }
1521
1522 /* Add the first (or only) interpretation to the statement-candidate
1523 mapping. */
1524 add_cand_for_stmt (gs, c);
1525 }
1526
1527 /* Given GS which is a copy of a scalar integer type, make at least one
1528 appropriate entry in the candidate table.
1529
1530 This interface is included for completeness, but is unnecessary
1531 if this pass immediately follows a pass that performs copy
1532 propagation, such as DOM. */
1533
1534 static void
1535 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1536 {
1537 slsr_cand_t base_cand, c, c2;
1538 unsigned savings = 0;
1539
1540 base_cand = base_cand_from_table (rhs1);
1541
1542 if (base_cand && base_cand->kind != CAND_PHI)
1543 {
1544 while (base_cand)
1545 {
1546 /* Propagate all data from the base candidate. */
1547 if (has_single_use (rhs1))
1548 savings = (base_cand->dead_savings
1549 + stmt_cost (base_cand->cand_stmt, speed));
1550
1551 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1552 base_cand->base_expr,
1553 base_cand->index, base_cand->stride,
1554 base_cand->cand_type, savings);
1555 if (base_cand->next_interp)
1556 base_cand = lookup_cand (base_cand->next_interp);
1557 else
1558 base_cand = NULL;
1559 }
1560 }
1561 else
1562 {
1563 /* If nothing is known about the RHS, create fresh CAND_ADD and
1564 CAND_MULT interpretations:
1565
1566 X = Y + (0 * 1)
1567 X = (Y + 0) * 1
1568
1569 The first of these is somewhat arbitrary, but the choice of
1570 1 for the stride simplifies the logic for propagating casts
1571 into their uses. */
1572 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1573 integer_one_node, TREE_TYPE (rhs1), 0);
1574 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1575 integer_one_node, TREE_TYPE (rhs1), 0);
1576 c->next_interp = c2->cand_num;
1577 }
1578
1579 /* Add the first (or only) interpretation to the statement-candidate
1580 mapping. */
1581 add_cand_for_stmt (gs, c);
1582 }
1583 \f
1584 class find_candidates_dom_walker : public dom_walker
1585 {
1586 public:
1587 find_candidates_dom_walker (cdi_direction direction)
1588 : dom_walker (direction) {}
1589 virtual void before_dom_children (basic_block);
1590 };
1591
1592 /* Find strength-reduction candidates in block BB. */
1593
1594 void
1595 find_candidates_dom_walker::before_dom_children (basic_block bb)
1596 {
1597 bool speed = optimize_bb_for_speed_p (bb);
1598 gimple_stmt_iterator gsi;
1599
1600 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1601 slsr_process_phi (gsi_stmt (gsi), speed);
1602
1603 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1604 {
1605 gimple gs = gsi_stmt (gsi);
1606
1607 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1608 slsr_process_ref (gs);
1609
1610 else if (is_gimple_assign (gs)
1611 && SCALAR_INT_MODE_P
1612 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1613 {
1614 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1615
1616 switch (gimple_assign_rhs_code (gs))
1617 {
1618 case MULT_EXPR:
1619 case PLUS_EXPR:
1620 rhs1 = gimple_assign_rhs1 (gs);
1621 rhs2 = gimple_assign_rhs2 (gs);
1622 /* Should never happen, but currently some buggy situations
1623 in earlier phases put constants in rhs1. */
1624 if (TREE_CODE (rhs1) != SSA_NAME)
1625 continue;
1626 break;
1627
1628 /* Possible future opportunity: rhs1 of a ptr+ can be
1629 an ADDR_EXPR. */
1630 case POINTER_PLUS_EXPR:
1631 case MINUS_EXPR:
1632 rhs2 = gimple_assign_rhs2 (gs);
1633 /* Fall-through. */
1634
1635 case NOP_EXPR:
1636 case MODIFY_EXPR:
1637 case NEGATE_EXPR:
1638 rhs1 = gimple_assign_rhs1 (gs);
1639 if (TREE_CODE (rhs1) != SSA_NAME)
1640 continue;
1641 break;
1642
1643 default:
1644 ;
1645 }
1646
1647 switch (gimple_assign_rhs_code (gs))
1648 {
1649 case MULT_EXPR:
1650 slsr_process_mul (gs, rhs1, rhs2, speed);
1651 break;
1652
1653 case PLUS_EXPR:
1654 case POINTER_PLUS_EXPR:
1655 case MINUS_EXPR:
1656 slsr_process_add (gs, rhs1, rhs2, speed);
1657 break;
1658
1659 case NEGATE_EXPR:
1660 slsr_process_neg (gs, rhs1, speed);
1661 break;
1662
1663 case NOP_EXPR:
1664 slsr_process_cast (gs, rhs1, speed);
1665 break;
1666
1667 case MODIFY_EXPR:
1668 slsr_process_copy (gs, rhs1, speed);
1669 break;
1670
1671 default:
1672 ;
1673 }
1674 }
1675 }
1676 }
1677 \f
1678 /* Dump a candidate for debug. */
1679
1680 static void
1681 dump_candidate (slsr_cand_t c)
1682 {
1683 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1684 gimple_bb (c->cand_stmt)->index);
1685 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1686 switch (c->kind)
1687 {
1688 case CAND_MULT:
1689 fputs (" MULT : (", dump_file);
1690 print_generic_expr (dump_file, c->base_expr, 0);
1691 fputs (" + ", dump_file);
1692 dump_double_int (dump_file, c->index, false);
1693 fputs (") * ", dump_file);
1694 print_generic_expr (dump_file, c->stride, 0);
1695 fputs (" : ", dump_file);
1696 break;
1697 case CAND_ADD:
1698 fputs (" ADD : ", dump_file);
1699 print_generic_expr (dump_file, c->base_expr, 0);
1700 fputs (" + (", dump_file);
1701 dump_double_int (dump_file, c->index, false);
1702 fputs (" * ", dump_file);
1703 print_generic_expr (dump_file, c->stride, 0);
1704 fputs (") : ", dump_file);
1705 break;
1706 case CAND_REF:
1707 fputs (" REF : ", dump_file);
1708 print_generic_expr (dump_file, c->base_expr, 0);
1709 fputs (" + (", dump_file);
1710 print_generic_expr (dump_file, c->stride, 0);
1711 fputs (") + ", dump_file);
1712 dump_double_int (dump_file, c->index, false);
1713 fputs (" : ", dump_file);
1714 break;
1715 case CAND_PHI:
1716 fputs (" PHI : ", dump_file);
1717 print_generic_expr (dump_file, c->base_expr, 0);
1718 fputs (" + (unknown * ", dump_file);
1719 print_generic_expr (dump_file, c->stride, 0);
1720 fputs (") : ", dump_file);
1721 break;
1722 default:
1723 gcc_unreachable ();
1724 }
1725 print_generic_expr (dump_file, c->cand_type, 0);
1726 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1727 c->basis, c->dependent, c->sibling);
1728 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1729 c->next_interp, c->dead_savings);
1730 if (c->def_phi)
1731 fprintf (dump_file, " phi: %d\n", c->def_phi);
1732 fputs ("\n", dump_file);
1733 }
1734
1735 /* Dump the candidate vector for debug. */
1736
1737 static void
1738 dump_cand_vec (void)
1739 {
1740 unsigned i;
1741 slsr_cand_t c;
1742
1743 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1744
1745 FOR_EACH_VEC_ELT (cand_vec, i, c)
1746 dump_candidate (c);
1747 }
1748
1749 /* Callback used to dump the candidate chains hash table. */
1750
1751 int
1752 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1753 {
1754 const_cand_chain_t chain = *slot;
1755 cand_chain_t p;
1756
1757 print_generic_expr (dump_file, chain->base_expr, 0);
1758 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1759
1760 for (p = chain->next; p; p = p->next)
1761 fprintf (dump_file, " -> %d", p->cand->cand_num);
1762
1763 fputs ("\n", dump_file);
1764 return 1;
1765 }
1766
1767 /* Dump the candidate chains. */
1768
1769 static void
1770 dump_cand_chains (void)
1771 {
1772 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1773 base_cand_map.traverse_noresize <void *, ssa_base_cand_dump_callback> (NULL);
1774 fputs ("\n", dump_file);
1775 }
1776
1777 /* Dump the increment vector for debug. */
1778
1779 static void
1780 dump_incr_vec (void)
1781 {
1782 if (dump_file && (dump_flags & TDF_DETAILS))
1783 {
1784 unsigned i;
1785
1786 fprintf (dump_file, "\nIncrement vector:\n\n");
1787
1788 for (i = 0; i < incr_vec_len; i++)
1789 {
1790 fprintf (dump_file, "%3d increment: ", i);
1791 dump_double_int (dump_file, incr_vec[i].incr, false);
1792 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1793 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1794 fputs ("\n initializer: ", dump_file);
1795 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1796 fputs ("\n\n", dump_file);
1797 }
1798 }
1799 }
1800 \f
1801 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1802 data reference. */
1803
1804 static void
1805 replace_ref (tree *expr, slsr_cand_t c)
1806 {
1807 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1808 unsigned HOST_WIDE_INT misalign;
1809 unsigned align;
1810
1811 /* Ensure the memory reference carries the minimum alignment
1812 requirement for the data type. See PR58041. */
1813 get_object_alignment_1 (*expr, &align, &misalign);
1814 if (misalign != 0)
1815 align = (misalign & -misalign);
1816 if (align < TYPE_ALIGN (acc_type))
1817 acc_type = build_aligned_type (acc_type, align);
1818
1819 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1820 c->base_expr, c->stride);
1821 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1822 double_int_to_tree (c->cand_type, c->index));
1823
1824 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1825 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1826 TREE_OPERAND (mem_ref, 0)
1827 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1828 /*simple_p=*/true, NULL,
1829 /*before=*/true, GSI_SAME_STMT);
1830 copy_ref_info (mem_ref, *expr);
1831 *expr = mem_ref;
1832 update_stmt (c->cand_stmt);
1833 }
1834
1835 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1836 dependent of candidate C with an equivalent strength-reduced data
1837 reference. */
1838
1839 static void
1840 replace_refs (slsr_cand_t c)
1841 {
1842 if (gimple_vdef (c->cand_stmt))
1843 {
1844 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1845 replace_ref (lhs, c);
1846 }
1847 else
1848 {
1849 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1850 replace_ref (rhs, c);
1851 }
1852
1853 if (c->sibling)
1854 replace_refs (lookup_cand (c->sibling));
1855
1856 if (c->dependent)
1857 replace_refs (lookup_cand (c->dependent));
1858 }
1859
1860 /* Return TRUE if candidate C is dependent upon a PHI. */
1861
1862 static bool
1863 phi_dependent_cand_p (slsr_cand_t c)
1864 {
1865 /* A candidate is not necessarily dependent upon a PHI just because
1866 it has a phi definition for its base name. It may have a basis
1867 that relies upon the same phi definition, in which case the PHI
1868 is irrelevant to this candidate. */
1869 return (c->def_phi
1870 && c->basis
1871 && lookup_cand (c->basis)->def_phi != c->def_phi);
1872 }
1873
1874 /* Calculate the increment required for candidate C relative to
1875 its basis. */
1876
1877 static double_int
1878 cand_increment (slsr_cand_t c)
1879 {
1880 slsr_cand_t basis;
1881
1882 /* If the candidate doesn't have a basis, just return its own
1883 index. This is useful in record_increments to help us find
1884 an existing initializer. Also, if the candidate's basis is
1885 hidden by a phi, then its own index will be the increment
1886 from the newly introduced phi basis. */
1887 if (!c->basis || phi_dependent_cand_p (c))
1888 return c->index;
1889
1890 basis = lookup_cand (c->basis);
1891 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1892 return c->index - basis->index;
1893 }
1894
1895 /* Calculate the increment required for candidate C relative to
1896 its basis. If we aren't going to generate pointer arithmetic
1897 for this candidate, return the absolute value of that increment
1898 instead. */
1899
1900 static inline double_int
1901 cand_abs_increment (slsr_cand_t c)
1902 {
1903 double_int increment = cand_increment (c);
1904
1905 if (!address_arithmetic_p && increment.is_negative ())
1906 increment = -increment;
1907
1908 return increment;
1909 }
1910
1911 /* Return TRUE iff candidate C has already been replaced under
1912 another interpretation. */
1913
1914 static inline bool
1915 cand_already_replaced (slsr_cand_t c)
1916 {
1917 return (gimple_bb (c->cand_stmt) == 0);
1918 }
1919
1920 /* Common logic used by replace_unconditional_candidate and
1921 replace_conditional_candidate. */
1922
1923 static void
1924 replace_mult_candidate (slsr_cand_t c, tree basis_name, double_int bump)
1925 {
1926 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
1927 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1928
1929 /* It is highly unlikely, but possible, that the resulting
1930 bump doesn't fit in a HWI. Abandon the replacement
1931 in this case. This does not affect siblings or dependents
1932 of C. Restriction to signed HWI is conservative for unsigned
1933 types but allows for safe negation without twisted logic. */
1934 if (bump.fits_shwi ()
1935 && bump.to_shwi () != HOST_WIDE_INT_MIN
1936 /* It is not useful to replace casts, copies, or adds of
1937 an SSA name and a constant. */
1938 && cand_code != MODIFY_EXPR
1939 && cand_code != NOP_EXPR
1940 && cand_code != PLUS_EXPR
1941 && cand_code != POINTER_PLUS_EXPR
1942 && cand_code != MINUS_EXPR)
1943 {
1944 enum tree_code code = PLUS_EXPR;
1945 tree bump_tree;
1946 gimple stmt_to_print = NULL;
1947
1948 /* If the basis name and the candidate's LHS have incompatible
1949 types, introduce a cast. */
1950 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
1951 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
1952 if (bump.is_negative ())
1953 {
1954 code = MINUS_EXPR;
1955 bump = -bump;
1956 }
1957
1958 bump_tree = double_int_to_tree (target_type, bump);
1959
1960 if (dump_file && (dump_flags & TDF_DETAILS))
1961 {
1962 fputs ("Replacing: ", dump_file);
1963 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1964 }
1965
1966 if (bump.is_zero ())
1967 {
1968 tree lhs = gimple_assign_lhs (c->cand_stmt);
1969 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1970 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1971 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1972 gsi_replace (&gsi, copy_stmt, false);
1973 c->cand_stmt = copy_stmt;
1974 if (dump_file && (dump_flags & TDF_DETAILS))
1975 stmt_to_print = copy_stmt;
1976 }
1977 else
1978 {
1979 tree rhs1, rhs2;
1980 if (cand_code != NEGATE_EXPR) {
1981 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1982 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1983 }
1984 if (cand_code != NEGATE_EXPR
1985 && ((operand_equal_p (rhs1, basis_name, 0)
1986 && operand_equal_p (rhs2, bump_tree, 0))
1987 || (operand_equal_p (rhs1, bump_tree, 0)
1988 && operand_equal_p (rhs2, basis_name, 0))))
1989 {
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 {
1992 fputs ("(duplicate, not actually replacing)", dump_file);
1993 stmt_to_print = c->cand_stmt;
1994 }
1995 }
1996 else
1997 {
1998 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1999 gimple_assign_set_rhs_with_ops (&gsi, code,
2000 basis_name, bump_tree);
2001 update_stmt (gsi_stmt (gsi));
2002 c->cand_stmt = gsi_stmt (gsi);
2003 if (dump_file && (dump_flags & TDF_DETAILS))
2004 stmt_to_print = gsi_stmt (gsi);
2005 }
2006 }
2007
2008 if (dump_file && (dump_flags & TDF_DETAILS))
2009 {
2010 fputs ("With: ", dump_file);
2011 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2012 fputs ("\n", dump_file);
2013 }
2014 }
2015 }
2016
2017 /* Replace candidate C with an add or subtract. Note that we only
2018 operate on CAND_MULTs with known strides, so we will never generate
2019 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2020 X = Y + ((i - i') * S), as described in the module commentary. The
2021 folded value ((i - i') * S) is referred to here as the "bump." */
2022
2023 static void
2024 replace_unconditional_candidate (slsr_cand_t c)
2025 {
2026 slsr_cand_t basis;
2027 double_int stride, bump;
2028
2029 if (cand_already_replaced (c))
2030 return;
2031
2032 basis = lookup_cand (c->basis);
2033 stride = tree_to_double_int (c->stride);
2034 bump = cand_increment (c) * stride;
2035
2036 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2037 }
2038 \f
2039 /* Return the index in the increment vector of the given INCREMENT,
2040 or -1 if not found. The latter can occur if more than
2041 MAX_INCR_VEC_LEN increments have been found. */
2042
2043 static inline int
2044 incr_vec_index (double_int increment)
2045 {
2046 unsigned i;
2047
2048 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2049 ;
2050
2051 if (i < incr_vec_len)
2052 return i;
2053 else
2054 return -1;
2055 }
2056
2057 /* Create a new statement along edge E to add BASIS_NAME to the product
2058 of INCREMENT and the stride of candidate C. Create and return a new
2059 SSA name from *VAR to be used as the LHS of the new statement.
2060 KNOWN_STRIDE is true iff C's stride is a constant. */
2061
2062 static tree
2063 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2064 double_int increment, edge e, location_t loc,
2065 bool known_stride)
2066 {
2067 basic_block insert_bb;
2068 gimple_stmt_iterator gsi;
2069 tree lhs, basis_type;
2070 gimple new_stmt;
2071
2072 /* If the add candidate along this incoming edge has the same
2073 index as C's hidden basis, the hidden basis represents this
2074 edge correctly. */
2075 if (increment.is_zero ())
2076 return basis_name;
2077
2078 basis_type = TREE_TYPE (basis_name);
2079 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2080
2081 if (known_stride)
2082 {
2083 tree bump_tree;
2084 enum tree_code code = PLUS_EXPR;
2085 double_int bump = increment * tree_to_double_int (c->stride);
2086 if (bump.is_negative ())
2087 {
2088 code = MINUS_EXPR;
2089 bump = -bump;
2090 }
2091
2092 bump_tree = double_int_to_tree (basis_type, bump);
2093 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2094 bump_tree);
2095 }
2096 else
2097 {
2098 int i;
2099 bool negate_incr = (!address_arithmetic_p && increment.is_negative ());
2100 i = incr_vec_index (negate_incr ? -increment : increment);
2101 gcc_assert (i >= 0);
2102
2103 if (incr_vec[i].initializer)
2104 {
2105 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2106 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2107 incr_vec[i].initializer);
2108 }
2109 else if (increment.is_one ())
2110 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2111 c->stride);
2112 else if (increment.is_minus_one ())
2113 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2114 c->stride);
2115 else
2116 gcc_unreachable ();
2117 }
2118
2119 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2120 gsi = gsi_last_bb (insert_bb);
2121
2122 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2123 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2124 else
2125 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2126
2127 gimple_set_location (new_stmt, loc);
2128
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2130 {
2131 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2132 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2133 }
2134
2135 return lhs;
2136 }
2137
2138 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2139 is hidden by the phi node FROM_PHI, create a new phi node in the same
2140 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2141 with its phi arguments representing conditional adjustments to the
2142 hidden basis along conditional incoming paths. Those adjustments are
2143 made by creating add statements (and sometimes recursively creating
2144 phis) along those incoming paths. LOC is the location to attach to
2145 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2146 constant. */
2147
2148 static tree
2149 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2150 location_t loc, bool known_stride)
2151 {
2152 int i;
2153 tree name, phi_arg;
2154 gimple phi;
2155 vec<tree> phi_args;
2156 slsr_cand_t basis = lookup_cand (c->basis);
2157 int nargs = gimple_phi_num_args (from_phi);
2158 basic_block phi_bb = gimple_bb (from_phi);
2159 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2160 phi_args.create (nargs);
2161
2162 /* Process each argument of the existing phi that represents
2163 conditionally-executed add candidates. */
2164 for (i = 0; i < nargs; i++)
2165 {
2166 edge e = (*phi_bb->preds)[i];
2167 tree arg = gimple_phi_arg_def (from_phi, i);
2168 tree feeding_def;
2169
2170 /* If the phi argument is the base name of the CAND_PHI, then
2171 this incoming arc should use the hidden basis. */
2172 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2173 if (basis->index.is_zero ())
2174 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2175 else
2176 {
2177 double_int incr = -basis->index;
2178 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2179 e, loc, known_stride);
2180 }
2181 else
2182 {
2183 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2184
2185 /* If there is another phi along this incoming edge, we must
2186 process it in the same fashion to ensure that all basis
2187 adjustments are made along its incoming edges. */
2188 if (gimple_code (arg_def) == GIMPLE_PHI)
2189 feeding_def = create_phi_basis (c, arg_def, basis_name,
2190 loc, known_stride);
2191 else
2192 {
2193 slsr_cand_t arg_cand = base_cand_from_table (arg);
2194 double_int diff = arg_cand->index - basis->index;
2195 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2196 e, loc, known_stride);
2197 }
2198 }
2199
2200 /* Because of recursion, we need to save the arguments in a vector
2201 so we can create the PHI statement all at once. Otherwise the
2202 storage for the half-created PHI can be reclaimed. */
2203 phi_args.safe_push (feeding_def);
2204 }
2205
2206 /* Create the new phi basis. */
2207 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2208 phi = create_phi_node (name, phi_bb);
2209 SSA_NAME_DEF_STMT (name) = phi;
2210
2211 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2212 {
2213 edge e = (*phi_bb->preds)[i];
2214 add_phi_arg (phi, phi_arg, e, loc);
2215 }
2216
2217 update_stmt (phi);
2218
2219 if (dump_file && (dump_flags & TDF_DETAILS))
2220 {
2221 fputs ("Introducing new phi basis: ", dump_file);
2222 print_gimple_stmt (dump_file, phi, 0, 0);
2223 }
2224
2225 return name;
2226 }
2227
2228 /* Given a candidate C whose basis is hidden by at least one intervening
2229 phi, introduce a matching number of new phis to represent its basis
2230 adjusted by conditional increments along possible incoming paths. Then
2231 replace C as though it were an unconditional candidate, using the new
2232 basis. */
2233
2234 static void
2235 replace_conditional_candidate (slsr_cand_t c)
2236 {
2237 tree basis_name, name;
2238 slsr_cand_t basis;
2239 location_t loc;
2240 double_int stride, bump;
2241
2242 /* Look up the LHS SSA name from C's basis. This will be the
2243 RHS1 of the adds we will introduce to create new phi arguments. */
2244 basis = lookup_cand (c->basis);
2245 basis_name = gimple_assign_lhs (basis->cand_stmt);
2246
2247 /* Create a new phi statement which will represent C's true basis
2248 after the transformation is complete. */
2249 loc = gimple_location (c->cand_stmt);
2250 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2251 basis_name, loc, KNOWN_STRIDE);
2252 /* Replace C with an add of the new basis phi and a constant. */
2253 stride = tree_to_double_int (c->stride);
2254 bump = c->index * stride;
2255
2256 replace_mult_candidate (c, name, bump);
2257 }
2258
2259 /* Compute the expected costs of inserting basis adjustments for
2260 candidate C with phi-definition PHI. The cost of inserting
2261 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2262 which are themselves phi results, recursively calculate costs
2263 for those phis as well. */
2264
2265 static int
2266 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2267 {
2268 unsigned i;
2269 int cost = 0;
2270 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2271
2272 /* If we work our way back to a phi that isn't dominated by the hidden
2273 basis, this isn't a candidate for replacement. Indicate this by
2274 returning an unreasonably high cost. It's not easy to detect
2275 these situations when determining the basis, so we defer the
2276 decision until now. */
2277 basic_block phi_bb = gimple_bb (phi);
2278 slsr_cand_t basis = lookup_cand (c->basis);
2279 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2280
2281 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2282 return COST_INFINITE;
2283
2284 for (i = 0; i < gimple_phi_num_args (phi); i++)
2285 {
2286 tree arg = gimple_phi_arg_def (phi, i);
2287
2288 if (arg != phi_cand->base_expr)
2289 {
2290 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2291
2292 if (gimple_code (arg_def) == GIMPLE_PHI)
2293 cost += phi_add_costs (arg_def, c, one_add_cost);
2294 else
2295 {
2296 slsr_cand_t arg_cand = base_cand_from_table (arg);
2297
2298 if (arg_cand->index != c->index)
2299 cost += one_add_cost;
2300 }
2301 }
2302 }
2303
2304 return cost;
2305 }
2306
2307 /* For candidate C, each sibling of candidate C, and each dependent of
2308 candidate C, determine whether the candidate is dependent upon a
2309 phi that hides its basis. If not, replace the candidate unconditionally.
2310 Otherwise, determine whether the cost of introducing compensation code
2311 for the candidate is offset by the gains from strength reduction. If
2312 so, replace the candidate and introduce the compensation code. */
2313
2314 static void
2315 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2316 {
2317 if (phi_dependent_cand_p (c))
2318 {
2319 if (c->kind == CAND_MULT)
2320 {
2321 /* A candidate dependent upon a phi will replace a multiply by
2322 a constant with an add, and will insert at most one add for
2323 each phi argument. Add these costs with the potential dead-code
2324 savings to determine profitability. */
2325 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2326 int mult_savings = stmt_cost (c->cand_stmt, speed);
2327 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2328 tree phi_result = gimple_phi_result (phi);
2329 int one_add_cost = add_cost (speed,
2330 TYPE_MODE (TREE_TYPE (phi_result)));
2331 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2332 int cost = add_costs - mult_savings - c->dead_savings;
2333
2334 if (dump_file && (dump_flags & TDF_DETAILS))
2335 {
2336 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2337 fprintf (dump_file, " add_costs = %d\n", add_costs);
2338 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2339 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2340 fprintf (dump_file, " cost = %d\n", cost);
2341 if (cost <= COST_NEUTRAL)
2342 fputs (" Replacing...\n", dump_file);
2343 else
2344 fputs (" Not replaced.\n", dump_file);
2345 }
2346
2347 if (cost <= COST_NEUTRAL)
2348 replace_conditional_candidate (c);
2349 }
2350 }
2351 else
2352 replace_unconditional_candidate (c);
2353
2354 if (c->sibling)
2355 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2356
2357 if (c->dependent)
2358 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2359 }
2360 \f
2361 /* Count the number of candidates in the tree rooted at C that have
2362 not already been replaced under other interpretations. */
2363
2364 static int
2365 count_candidates (slsr_cand_t c)
2366 {
2367 unsigned count = cand_already_replaced (c) ? 0 : 1;
2368
2369 if (c->sibling)
2370 count += count_candidates (lookup_cand (c->sibling));
2371
2372 if (c->dependent)
2373 count += count_candidates (lookup_cand (c->dependent));
2374
2375 return count;
2376 }
2377
2378 /* Increase the count of INCREMENT by one in the increment vector.
2379 INCREMENT is associated with candidate C. If INCREMENT is to be
2380 conditionally executed as part of a conditional candidate replacement,
2381 IS_PHI_ADJUST is true, otherwise false. If an initializer
2382 T_0 = stride * I is provided by a candidate that dominates all
2383 candidates with the same increment, also record T_0 for subsequent use. */
2384
2385 static void
2386 record_increment (slsr_cand_t c, double_int increment, bool is_phi_adjust)
2387 {
2388 bool found = false;
2389 unsigned i;
2390
2391 /* Treat increments that differ only in sign as identical so as to
2392 share initializers, unless we are generating pointer arithmetic. */
2393 if (!address_arithmetic_p && increment.is_negative ())
2394 increment = -increment;
2395
2396 for (i = 0; i < incr_vec_len; i++)
2397 {
2398 if (incr_vec[i].incr == increment)
2399 {
2400 incr_vec[i].count++;
2401 found = true;
2402
2403 /* If we previously recorded an initializer that doesn't
2404 dominate this candidate, it's not going to be useful to
2405 us after all. */
2406 if (incr_vec[i].initializer
2407 && !dominated_by_p (CDI_DOMINATORS,
2408 gimple_bb (c->cand_stmt),
2409 incr_vec[i].init_bb))
2410 {
2411 incr_vec[i].initializer = NULL_TREE;
2412 incr_vec[i].init_bb = NULL;
2413 }
2414
2415 break;
2416 }
2417 }
2418
2419 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2420 {
2421 /* The first time we see an increment, create the entry for it.
2422 If this is the root candidate which doesn't have a basis, set
2423 the count to zero. We're only processing it so it can possibly
2424 provide an initializer for other candidates. */
2425 incr_vec[incr_vec_len].incr = increment;
2426 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2427 incr_vec[incr_vec_len].cost = COST_INFINITE;
2428
2429 /* Optimistically record the first occurrence of this increment
2430 as providing an initializer (if it does); we will revise this
2431 opinion later if it doesn't dominate all other occurrences.
2432 Exception: increments of -1, 0, 1 never need initializers;
2433 and phi adjustments don't ever provide initializers. */
2434 if (c->kind == CAND_ADD
2435 && !is_phi_adjust
2436 && c->index == increment
2437 && (increment.sgt (double_int_one)
2438 || increment.slt (double_int_minus_one))
2439 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2440 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2441 {
2442 tree t0 = NULL_TREE;
2443 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2444 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2445 if (operand_equal_p (rhs1, c->base_expr, 0))
2446 t0 = rhs2;
2447 else if (operand_equal_p (rhs2, c->base_expr, 0))
2448 t0 = rhs1;
2449 if (t0
2450 && SSA_NAME_DEF_STMT (t0)
2451 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2452 {
2453 incr_vec[incr_vec_len].initializer = t0;
2454 incr_vec[incr_vec_len++].init_bb
2455 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2456 }
2457 else
2458 {
2459 incr_vec[incr_vec_len].initializer = NULL_TREE;
2460 incr_vec[incr_vec_len++].init_bb = NULL;
2461 }
2462 }
2463 else
2464 {
2465 incr_vec[incr_vec_len].initializer = NULL_TREE;
2466 incr_vec[incr_vec_len++].init_bb = NULL;
2467 }
2468 }
2469 }
2470
2471 /* Given phi statement PHI that hides a candidate from its BASIS, find
2472 the increments along each incoming arc (recursively handling additional
2473 phis that may be present) and record them. These increments are the
2474 difference in index between the index-adjusting statements and the
2475 index of the basis. */
2476
2477 static void
2478 record_phi_increments (slsr_cand_t basis, gimple phi)
2479 {
2480 unsigned i;
2481 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2482
2483 for (i = 0; i < gimple_phi_num_args (phi); i++)
2484 {
2485 tree arg = gimple_phi_arg_def (phi, i);
2486
2487 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2488 {
2489 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2490
2491 if (gimple_code (arg_def) == GIMPLE_PHI)
2492 record_phi_increments (basis, arg_def);
2493 else
2494 {
2495 slsr_cand_t arg_cand = base_cand_from_table (arg);
2496 double_int diff = arg_cand->index - basis->index;
2497 record_increment (arg_cand, diff, PHI_ADJUST);
2498 }
2499 }
2500 }
2501 }
2502
2503 /* Determine how many times each unique increment occurs in the set
2504 of candidates rooted at C's parent, recording the data in the
2505 increment vector. For each unique increment I, if an initializer
2506 T_0 = stride * I is provided by a candidate that dominates all
2507 candidates with the same increment, also record T_0 for subsequent
2508 use. */
2509
2510 static void
2511 record_increments (slsr_cand_t c)
2512 {
2513 if (!cand_already_replaced (c))
2514 {
2515 if (!phi_dependent_cand_p (c))
2516 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2517 else
2518 {
2519 /* A candidate with a basis hidden by a phi will have one
2520 increment for its relationship to the index represented by
2521 the phi, and potentially additional increments along each
2522 incoming edge. For the root of the dependency tree (which
2523 has no basis), process just the initial index in case it has
2524 an initializer that can be used by subsequent candidates. */
2525 record_increment (c, c->index, NOT_PHI_ADJUST);
2526
2527 if (c->basis)
2528 record_phi_increments (lookup_cand (c->basis),
2529 lookup_cand (c->def_phi)->cand_stmt);
2530 }
2531 }
2532
2533 if (c->sibling)
2534 record_increments (lookup_cand (c->sibling));
2535
2536 if (c->dependent)
2537 record_increments (lookup_cand (c->dependent));
2538 }
2539
2540 /* Add up and return the costs of introducing add statements that
2541 require the increment INCR on behalf of candidate C and phi
2542 statement PHI. Accumulate into *SAVINGS the potential savings
2543 from removing existing statements that feed PHI and have no other
2544 uses. */
2545
2546 static int
2547 phi_incr_cost (slsr_cand_t c, double_int incr, gimple phi, int *savings)
2548 {
2549 unsigned i;
2550 int cost = 0;
2551 slsr_cand_t basis = lookup_cand (c->basis);
2552 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2553
2554 for (i = 0; i < gimple_phi_num_args (phi); i++)
2555 {
2556 tree arg = gimple_phi_arg_def (phi, i);
2557
2558 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2559 {
2560 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2561
2562 if (gimple_code (arg_def) == GIMPLE_PHI)
2563 {
2564 int feeding_savings = 0;
2565 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2566 if (has_single_use (gimple_phi_result (arg_def)))
2567 *savings += feeding_savings;
2568 }
2569 else
2570 {
2571 slsr_cand_t arg_cand = base_cand_from_table (arg);
2572 double_int diff = arg_cand->index - basis->index;
2573
2574 if (incr == diff)
2575 {
2576 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2577 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2578 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2579 if (has_single_use (lhs))
2580 *savings += stmt_cost (arg_cand->cand_stmt, true);
2581 }
2582 }
2583 }
2584 }
2585
2586 return cost;
2587 }
2588
2589 /* Return the first candidate in the tree rooted at C that has not
2590 already been replaced, favoring siblings over dependents. */
2591
2592 static slsr_cand_t
2593 unreplaced_cand_in_tree (slsr_cand_t c)
2594 {
2595 if (!cand_already_replaced (c))
2596 return c;
2597
2598 if (c->sibling)
2599 {
2600 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2601 if (sib)
2602 return sib;
2603 }
2604
2605 if (c->dependent)
2606 {
2607 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2608 if (dep)
2609 return dep;
2610 }
2611
2612 return NULL;
2613 }
2614
2615 /* Return TRUE if the candidates in the tree rooted at C should be
2616 optimized for speed, else FALSE. We estimate this based on the block
2617 containing the most dominant candidate in the tree that has not yet
2618 been replaced. */
2619
2620 static bool
2621 optimize_cands_for_speed_p (slsr_cand_t c)
2622 {
2623 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2624 gcc_assert (c2);
2625 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2626 }
2627
2628 /* Add COST_IN to the lowest cost of any dependent path starting at
2629 candidate C or any of its siblings, counting only candidates along
2630 such paths with increment INCR. Assume that replacing a candidate
2631 reduces cost by REPL_SAVINGS. Also account for savings from any
2632 statements that would go dead. If COUNT_PHIS is true, include
2633 costs of introducing feeding statements for conditional candidates. */
2634
2635 static int
2636 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2637 double_int incr, bool count_phis)
2638 {
2639 int local_cost, sib_cost, savings = 0;
2640 double_int cand_incr = cand_abs_increment (c);
2641
2642 if (cand_already_replaced (c))
2643 local_cost = cost_in;
2644 else if (incr == cand_incr)
2645 local_cost = cost_in - repl_savings - c->dead_savings;
2646 else
2647 local_cost = cost_in - c->dead_savings;
2648
2649 if (count_phis
2650 && phi_dependent_cand_p (c)
2651 && !cand_already_replaced (c))
2652 {
2653 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2654 local_cost += phi_incr_cost (c, incr, phi, &savings);
2655
2656 if (has_single_use (gimple_phi_result (phi)))
2657 local_cost -= savings;
2658 }
2659
2660 if (c->dependent)
2661 local_cost = lowest_cost_path (local_cost, repl_savings,
2662 lookup_cand (c->dependent), incr,
2663 count_phis);
2664
2665 if (c->sibling)
2666 {
2667 sib_cost = lowest_cost_path (cost_in, repl_savings,
2668 lookup_cand (c->sibling), incr,
2669 count_phis);
2670 local_cost = MIN (local_cost, sib_cost);
2671 }
2672
2673 return local_cost;
2674 }
2675
2676 /* Compute the total savings that would accrue from all replacements
2677 in the candidate tree rooted at C, counting only candidates with
2678 increment INCR. Assume that replacing a candidate reduces cost
2679 by REPL_SAVINGS. Also account for savings from statements that
2680 would go dead. */
2681
2682 static int
2683 total_savings (int repl_savings, slsr_cand_t c, double_int incr,
2684 bool count_phis)
2685 {
2686 int savings = 0;
2687 double_int cand_incr = cand_abs_increment (c);
2688
2689 if (incr == cand_incr && !cand_already_replaced (c))
2690 savings += repl_savings + c->dead_savings;
2691
2692 if (count_phis
2693 && phi_dependent_cand_p (c)
2694 && !cand_already_replaced (c))
2695 {
2696 int phi_savings = 0;
2697 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2698 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2699
2700 if (has_single_use (gimple_phi_result (phi)))
2701 savings += phi_savings;
2702 }
2703
2704 if (c->dependent)
2705 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2706 count_phis);
2707
2708 if (c->sibling)
2709 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2710 count_phis);
2711
2712 return savings;
2713 }
2714
2715 /* Use target-specific costs to determine and record which increments
2716 in the current candidate tree are profitable to replace, assuming
2717 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2718 the candidate tree.
2719
2720 One slight limitation here is that we don't account for the possible
2721 introduction of casts in some cases. See replace_one_candidate for
2722 the cases where these are introduced. This should probably be cleaned
2723 up sometime. */
2724
2725 static void
2726 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2727 {
2728 unsigned i;
2729
2730 for (i = 0; i < incr_vec_len; i++)
2731 {
2732 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2733
2734 /* If somehow this increment is bigger than a HWI, we won't
2735 be optimizing candidates that use it. And if the increment
2736 has a count of zero, nothing will be done with it. */
2737 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2738 incr_vec[i].cost = COST_INFINITE;
2739
2740 /* Increments of 0, 1, and -1 are always profitable to replace,
2741 because they always replace a multiply or add with an add or
2742 copy, and may cause one or more existing instructions to go
2743 dead. Exception: -1 can't be assumed to be profitable for
2744 pointer addition. */
2745 else if (incr == 0
2746 || incr == 1
2747 || (incr == -1
2748 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2749 != POINTER_PLUS_EXPR)))
2750 incr_vec[i].cost = COST_NEUTRAL;
2751
2752 /* FORNOW: If we need to add an initializer, give up if a cast from
2753 the candidate's type to its stride's type can lose precision.
2754 This could eventually be handled better by expressly retaining the
2755 result of a cast to a wider type in the stride. Example:
2756
2757 short int _1;
2758 _2 = (int) _1;
2759 _3 = _2 * 10;
2760 _4 = x + _3; ADD: x + (10 * _1) : int
2761 _5 = _2 * 15;
2762 _6 = x + _3; ADD: x + (15 * _1) : int
2763
2764 Right now replacing _6 would cause insertion of an initializer
2765 of the form "short int T = _1 * 5;" followed by a cast to
2766 int, which could overflow incorrectly. Had we recorded _2 or
2767 (int)_1 as the stride, this wouldn't happen. However, doing
2768 this breaks other opportunities, so this will require some
2769 care. */
2770 else if (!incr_vec[i].initializer
2771 && TREE_CODE (first_dep->stride) != INTEGER_CST
2772 && !legal_cast_p_1 (first_dep->stride,
2773 gimple_assign_lhs (first_dep->cand_stmt)))
2774
2775 incr_vec[i].cost = COST_INFINITE;
2776
2777 /* If we need to add an initializer, make sure we don't introduce
2778 a multiply by a pointer type, which can happen in certain cast
2779 scenarios. FIXME: When cleaning up these cast issues, we can
2780 afford to introduce the multiply provided we cast out to an
2781 unsigned int of appropriate size. */
2782 else if (!incr_vec[i].initializer
2783 && TREE_CODE (first_dep->stride) != INTEGER_CST
2784 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2785
2786 incr_vec[i].cost = COST_INFINITE;
2787
2788 /* For any other increment, if this is a multiply candidate, we
2789 must introduce a temporary T and initialize it with
2790 T_0 = stride * increment. When optimizing for speed, walk the
2791 candidate tree to calculate the best cost reduction along any
2792 path; if it offsets the fixed cost of inserting the initializer,
2793 replacing the increment is profitable. When optimizing for
2794 size, instead calculate the total cost reduction from replacing
2795 all candidates with this increment. */
2796 else if (first_dep->kind == CAND_MULT)
2797 {
2798 int cost = mult_by_coeff_cost (incr, mode, speed);
2799 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2800 if (speed)
2801 cost = lowest_cost_path (cost, repl_savings, first_dep,
2802 incr_vec[i].incr, COUNT_PHIS);
2803 else
2804 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2805 COUNT_PHIS);
2806
2807 incr_vec[i].cost = cost;
2808 }
2809
2810 /* If this is an add candidate, the initializer may already
2811 exist, so only calculate the cost of the initializer if it
2812 doesn't. We are replacing one add with another here, so the
2813 known replacement savings is zero. We will account for removal
2814 of dead instructions in lowest_cost_path or total_savings. */
2815 else
2816 {
2817 int cost = 0;
2818 if (!incr_vec[i].initializer)
2819 cost = mult_by_coeff_cost (incr, mode, speed);
2820
2821 if (speed)
2822 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2823 DONT_COUNT_PHIS);
2824 else
2825 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2826 DONT_COUNT_PHIS);
2827
2828 incr_vec[i].cost = cost;
2829 }
2830 }
2831 }
2832
2833 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2834 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2835 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2836 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2837 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2838
2839 static basic_block
2840 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2841 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2842 {
2843 basic_block ncd;
2844
2845 if (!bb1)
2846 {
2847 *where = c2;
2848 return bb2;
2849 }
2850
2851 if (!bb2)
2852 {
2853 *where = c1;
2854 return bb1;
2855 }
2856
2857 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2858
2859 /* If both candidates are in the same block, the earlier
2860 candidate wins. */
2861 if (bb1 == ncd && bb2 == ncd)
2862 {
2863 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2864 *where = c2;
2865 else
2866 *where = c1;
2867 }
2868
2869 /* Otherwise, if one of them produced a candidate in the
2870 dominator, that one wins. */
2871 else if (bb1 == ncd)
2872 *where = c1;
2873
2874 else if (bb2 == ncd)
2875 *where = c2;
2876
2877 /* If neither matches the dominator, neither wins. */
2878 else
2879 *where = NULL;
2880
2881 return ncd;
2882 }
2883
2884 /* Consider all candidates that feed PHI. Find the nearest common
2885 dominator of those candidates requiring the given increment INCR.
2886 Further find and return the nearest common dominator of this result
2887 with block NCD. If the returned block contains one or more of the
2888 candidates, return the earliest candidate in the block in *WHERE. */
2889
2890 static basic_block
2891 ncd_with_phi (slsr_cand_t c, double_int incr, gimple phi,
2892 basic_block ncd, slsr_cand_t *where)
2893 {
2894 unsigned i;
2895 slsr_cand_t basis = lookup_cand (c->basis);
2896 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2897
2898 for (i = 0; i < gimple_phi_num_args (phi); i++)
2899 {
2900 tree arg = gimple_phi_arg_def (phi, i);
2901
2902 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2903 {
2904 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2905
2906 if (gimple_code (arg_def) == GIMPLE_PHI)
2907 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
2908 else
2909 {
2910 slsr_cand_t arg_cand = base_cand_from_table (arg);
2911 double_int diff = arg_cand->index - basis->index;
2912
2913 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
2914 ncd = ncd_for_two_cands (ncd, gimple_bb (arg_cand->cand_stmt),
2915 *where, arg_cand, where);
2916 }
2917 }
2918 }
2919
2920 return ncd;
2921 }
2922
2923 /* Consider the candidate C together with any candidates that feed
2924 C's phi dependence (if any). Find and return the nearest common
2925 dominator of those candidates requiring the given increment INCR.
2926 If the returned block contains one or more of the candidates,
2927 return the earliest candidate in the block in *WHERE. */
2928
2929 static basic_block
2930 ncd_of_cand_and_phis (slsr_cand_t c, double_int incr, slsr_cand_t *where)
2931 {
2932 basic_block ncd = NULL;
2933
2934 if (cand_abs_increment (c) == incr)
2935 {
2936 ncd = gimple_bb (c->cand_stmt);
2937 *where = c;
2938 }
2939
2940 if (phi_dependent_cand_p (c))
2941 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
2942 ncd, where);
2943
2944 return ncd;
2945 }
2946
2947 /* Consider all candidates in the tree rooted at C for which INCR
2948 represents the required increment of C relative to its basis.
2949 Find and return the basic block that most nearly dominates all
2950 such candidates. If the returned block contains one or more of
2951 the candidates, return the earliest candidate in the block in
2952 *WHERE. */
2953
2954 static basic_block
2955 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2956 slsr_cand_t *where)
2957 {
2958 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2959 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2960
2961 /* First find the NCD of all siblings and dependents. */
2962 if (c->sibling)
2963 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2964 incr, &sib_where);
2965 if (c->dependent)
2966 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2967 incr, &dep_where);
2968 if (!sib_ncd && !dep_ncd)
2969 {
2970 new_where = NULL;
2971 ncd = NULL;
2972 }
2973 else if (sib_ncd && !dep_ncd)
2974 {
2975 new_where = sib_where;
2976 ncd = sib_ncd;
2977 }
2978 else if (dep_ncd && !sib_ncd)
2979 {
2980 new_where = dep_where;
2981 ncd = dep_ncd;
2982 }
2983 else
2984 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2985 dep_where, &new_where);
2986
2987 /* If the candidate's increment doesn't match the one we're interested
2988 in (and nor do any increments for feeding defs of a phi-dependence),
2989 then the result depends only on siblings and dependents. */
2990 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
2991
2992 if (!this_ncd || cand_already_replaced (c))
2993 {
2994 *where = new_where;
2995 return ncd;
2996 }
2997
2998 /* Otherwise, compare this candidate with the result from all siblings
2999 and dependents. */
3000 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3001
3002 return ncd;
3003 }
3004
3005 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3006
3007 static inline bool
3008 profitable_increment_p (unsigned index)
3009 {
3010 return (incr_vec[index].cost <= COST_NEUTRAL);
3011 }
3012
3013 /* For each profitable increment in the increment vector not equal to
3014 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3015 dominator of all statements in the candidate chain rooted at C
3016 that require that increment, and insert an initializer
3017 T_0 = stride * increment at that location. Record T_0 with the
3018 increment record. */
3019
3020 static void
3021 insert_initializers (slsr_cand_t c)
3022 {
3023 unsigned i;
3024
3025 for (i = 0; i < incr_vec_len; i++)
3026 {
3027 basic_block bb;
3028 slsr_cand_t where = NULL;
3029 gimple init_stmt;
3030 tree stride_type, new_name, incr_tree;
3031 double_int incr = incr_vec[i].incr;
3032
3033 if (!profitable_increment_p (i)
3034 || incr.is_one ()
3035 || (incr.is_minus_one ()
3036 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
3037 || incr.is_zero ())
3038 continue;
3039
3040 /* We may have already identified an existing initializer that
3041 will suffice. */
3042 if (incr_vec[i].initializer)
3043 {
3044 if (dump_file && (dump_flags & TDF_DETAILS))
3045 {
3046 fputs ("Using existing initializer: ", dump_file);
3047 print_gimple_stmt (dump_file,
3048 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3049 0, 0);
3050 }
3051 continue;
3052 }
3053
3054 /* Find the block that most closely dominates all candidates
3055 with this increment. If there is at least one candidate in
3056 that block, the earliest one will be returned in WHERE. */
3057 bb = nearest_common_dominator_for_cands (c, incr, &where);
3058
3059 /* Create a new SSA name to hold the initializer's value. */
3060 stride_type = TREE_TYPE (c->stride);
3061 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
3062 incr_vec[i].initializer = new_name;
3063
3064 /* Create the initializer and insert it in the latest possible
3065 dominating position. */
3066 incr_tree = double_int_to_tree (stride_type, incr);
3067 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
3068 c->stride, incr_tree);
3069 if (where)
3070 {
3071 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3072 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3073 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
3074 }
3075 else
3076 {
3077 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3078 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3079
3080 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3081 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3082 else
3083 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3084
3085 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3086 }
3087
3088 if (dump_file && (dump_flags & TDF_DETAILS))
3089 {
3090 fputs ("Inserting initializer: ", dump_file);
3091 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3092 }
3093 }
3094 }
3095
3096 /* Return TRUE iff all required increments for candidates feeding PHI
3097 are profitable to replace on behalf of candidate C. */
3098
3099 static bool
3100 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3101 {
3102 unsigned i;
3103 slsr_cand_t basis = lookup_cand (c->basis);
3104 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3105
3106 for (i = 0; i < gimple_phi_num_args (phi); i++)
3107 {
3108 tree arg = gimple_phi_arg_def (phi, i);
3109
3110 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3111 {
3112 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3113
3114 if (gimple_code (arg_def) == GIMPLE_PHI)
3115 {
3116 if (!all_phi_incrs_profitable (c, arg_def))
3117 return false;
3118 }
3119 else
3120 {
3121 int j;
3122 slsr_cand_t arg_cand = base_cand_from_table (arg);
3123 double_int increment = arg_cand->index - basis->index;
3124
3125 if (!address_arithmetic_p && increment.is_negative ())
3126 increment = -increment;
3127
3128 j = incr_vec_index (increment);
3129
3130 if (dump_file && (dump_flags & TDF_DETAILS))
3131 {
3132 fprintf (dump_file, " Conditional candidate %d, phi: ",
3133 c->cand_num);
3134 print_gimple_stmt (dump_file, phi, 0, 0);
3135 fputs (" increment: ", dump_file);
3136 dump_double_int (dump_file, increment, false);
3137 if (j < 0)
3138 fprintf (dump_file,
3139 "\n Not replaced; incr_vec overflow.\n");
3140 else {
3141 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3142 if (profitable_increment_p (j))
3143 fputs (" Replacing...\n", dump_file);
3144 else
3145 fputs (" Not replaced.\n", dump_file);
3146 }
3147 }
3148
3149 if (j < 0 || !profitable_increment_p (j))
3150 return false;
3151 }
3152 }
3153 }
3154
3155 return true;
3156 }
3157
3158 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3159 type TO_TYPE, and insert it in front of the statement represented
3160 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3161 the new SSA name. */
3162
3163 static tree
3164 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3165 {
3166 tree cast_lhs;
3167 gimple cast_stmt;
3168 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3169
3170 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3171 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3172 from_expr, NULL_TREE);
3173 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3174 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3175
3176 if (dump_file && (dump_flags & TDF_DETAILS))
3177 {
3178 fputs (" Inserting: ", dump_file);
3179 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3180 }
3181
3182 return cast_lhs;
3183 }
3184
3185 /* Replace the RHS of the statement represented by candidate C with
3186 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3187 leave C unchanged or just interchange its operands. The original
3188 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3189 If the replacement was made and we are doing a details dump,
3190 return the revised statement, else NULL. */
3191
3192 static gimple
3193 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3194 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3195 slsr_cand_t c)
3196 {
3197 if (new_code != old_code
3198 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3199 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3200 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3201 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3202 {
3203 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3204 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3205 update_stmt (gsi_stmt (gsi));
3206 c->cand_stmt = gsi_stmt (gsi);
3207
3208 if (dump_file && (dump_flags & TDF_DETAILS))
3209 return gsi_stmt (gsi);
3210 }
3211
3212 else if (dump_file && (dump_flags & TDF_DETAILS))
3213 fputs (" (duplicate, not actually replacing)\n", dump_file);
3214
3215 return NULL;
3216 }
3217
3218 /* Strength-reduce the statement represented by candidate C by replacing
3219 it with an equivalent addition or subtraction. I is the index into
3220 the increment vector identifying C's increment. NEW_VAR is used to
3221 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3222 is the rhs1 to use in creating the add/subtract. */
3223
3224 static void
3225 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3226 {
3227 gimple stmt_to_print = NULL;
3228 tree orig_rhs1, orig_rhs2;
3229 tree rhs2;
3230 enum tree_code orig_code, repl_code;
3231 double_int cand_incr;
3232
3233 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3234 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3235 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3236 cand_incr = cand_increment (c);
3237
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 {
3240 fputs ("Replacing: ", dump_file);
3241 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3242 stmt_to_print = c->cand_stmt;
3243 }
3244
3245 if (address_arithmetic_p)
3246 repl_code = POINTER_PLUS_EXPR;
3247 else
3248 repl_code = PLUS_EXPR;
3249
3250 /* If the increment has an initializer T_0, replace the candidate
3251 statement with an add of the basis name and the initializer. */
3252 if (incr_vec[i].initializer)
3253 {
3254 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3255 tree orig_type = TREE_TYPE (orig_rhs2);
3256
3257 if (types_compatible_p (orig_type, init_type))
3258 rhs2 = incr_vec[i].initializer;
3259 else
3260 rhs2 = introduce_cast_before_cand (c, orig_type,
3261 incr_vec[i].initializer);
3262
3263 if (incr_vec[i].incr != cand_incr)
3264 {
3265 gcc_assert (repl_code == PLUS_EXPR);
3266 repl_code = MINUS_EXPR;
3267 }
3268
3269 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3270 orig_code, orig_rhs1, orig_rhs2,
3271 c);
3272 }
3273
3274 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3275 with a subtract of the stride from the basis name, a copy
3276 from the basis name, or an add of the stride to the basis
3277 name, respectively. It may be necessary to introduce a
3278 cast (or reuse an existing cast). */
3279 else if (cand_incr.is_one ())
3280 {
3281 tree stride_type = TREE_TYPE (c->stride);
3282 tree orig_type = TREE_TYPE (orig_rhs2);
3283
3284 if (types_compatible_p (orig_type, stride_type))
3285 rhs2 = c->stride;
3286 else
3287 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3288
3289 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3290 orig_code, orig_rhs1, orig_rhs2,
3291 c);
3292 }
3293
3294 else if (cand_incr.is_minus_one ())
3295 {
3296 tree stride_type = TREE_TYPE (c->stride);
3297 tree orig_type = TREE_TYPE (orig_rhs2);
3298 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3299
3300 if (types_compatible_p (orig_type, stride_type))
3301 rhs2 = c->stride;
3302 else
3303 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3304
3305 if (orig_code != MINUS_EXPR
3306 || !operand_equal_p (basis_name, orig_rhs1, 0)
3307 || !operand_equal_p (rhs2, orig_rhs2, 0))
3308 {
3309 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3310 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3311 update_stmt (gsi_stmt (gsi));
3312 c->cand_stmt = gsi_stmt (gsi);
3313
3314 if (dump_file && (dump_flags & TDF_DETAILS))
3315 stmt_to_print = gsi_stmt (gsi);
3316 }
3317 else if (dump_file && (dump_flags & TDF_DETAILS))
3318 fputs (" (duplicate, not actually replacing)\n", dump_file);
3319 }
3320
3321 else if (cand_incr.is_zero ())
3322 {
3323 tree lhs = gimple_assign_lhs (c->cand_stmt);
3324 tree lhs_type = TREE_TYPE (lhs);
3325 tree basis_type = TREE_TYPE (basis_name);
3326
3327 if (types_compatible_p (lhs_type, basis_type))
3328 {
3329 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3330 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3331 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3332 gsi_replace (&gsi, copy_stmt, false);
3333 c->cand_stmt = copy_stmt;
3334
3335 if (dump_file && (dump_flags & TDF_DETAILS))
3336 stmt_to_print = copy_stmt;
3337 }
3338 else
3339 {
3340 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3341 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3342 basis_name,
3343 NULL_TREE);
3344 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3345 gsi_replace (&gsi, cast_stmt, false);
3346 c->cand_stmt = cast_stmt;
3347
3348 if (dump_file && (dump_flags & TDF_DETAILS))
3349 stmt_to_print = cast_stmt;
3350 }
3351 }
3352 else
3353 gcc_unreachable ();
3354
3355 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3356 {
3357 fputs ("With: ", dump_file);
3358 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3359 fputs ("\n", dump_file);
3360 }
3361 }
3362
3363 /* For each candidate in the tree rooted at C, replace it with
3364 an increment if such has been shown to be profitable. */
3365
3366 static void
3367 replace_profitable_candidates (slsr_cand_t c)
3368 {
3369 if (!cand_already_replaced (c))
3370 {
3371 double_int increment = cand_abs_increment (c);
3372 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3373 int i;
3374
3375 i = incr_vec_index (increment);
3376
3377 /* Only process profitable increments. Nothing useful can be done
3378 to a cast or copy. */
3379 if (i >= 0
3380 && profitable_increment_p (i)
3381 && orig_code != MODIFY_EXPR
3382 && orig_code != NOP_EXPR)
3383 {
3384 if (phi_dependent_cand_p (c))
3385 {
3386 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3387
3388 if (all_phi_incrs_profitable (c, phi))
3389 {
3390 /* Look up the LHS SSA name from C's basis. This will be
3391 the RHS1 of the adds we will introduce to create new
3392 phi arguments. */
3393 slsr_cand_t basis = lookup_cand (c->basis);
3394 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3395
3396 /* Create a new phi statement that will represent C's true
3397 basis after the transformation is complete. */
3398 location_t loc = gimple_location (c->cand_stmt);
3399 tree name = create_phi_basis (c, phi, basis_name,
3400 loc, UNKNOWN_STRIDE);
3401
3402 /* Replace C with an add of the new basis phi and the
3403 increment. */
3404 replace_one_candidate (c, i, name);
3405 }
3406 }
3407 else
3408 {
3409 slsr_cand_t basis = lookup_cand (c->basis);
3410 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3411 replace_one_candidate (c, i, basis_name);
3412 }
3413 }
3414 }
3415
3416 if (c->sibling)
3417 replace_profitable_candidates (lookup_cand (c->sibling));
3418
3419 if (c->dependent)
3420 replace_profitable_candidates (lookup_cand (c->dependent));
3421 }
3422 \f
3423 /* Analyze costs of related candidates in the candidate vector,
3424 and make beneficial replacements. */
3425
3426 static void
3427 analyze_candidates_and_replace (void)
3428 {
3429 unsigned i;
3430 slsr_cand_t c;
3431
3432 /* Each candidate that has a null basis and a non-null
3433 dependent is the root of a tree of related statements.
3434 Analyze each tree to determine a subset of those
3435 statements that can be replaced with maximum benefit. */
3436 FOR_EACH_VEC_ELT (cand_vec, i, c)
3437 {
3438 slsr_cand_t first_dep;
3439
3440 if (c->basis != 0 || c->dependent == 0)
3441 continue;
3442
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3445 c->cand_num);
3446
3447 first_dep = lookup_cand (c->dependent);
3448
3449 /* If this is a chain of CAND_REFs, unconditionally replace
3450 each of them with a strength-reduced data reference. */
3451 if (c->kind == CAND_REF)
3452 replace_refs (c);
3453
3454 /* If the common stride of all related candidates is a known
3455 constant, each candidate without a phi-dependence can be
3456 profitably replaced. Each replaces a multiply by a single
3457 add, with the possibility that a feeding add also goes dead.
3458 A candidate with a phi-dependence is replaced only if the
3459 compensation code it requires is offset by the strength
3460 reduction savings. */
3461 else if (TREE_CODE (c->stride) == INTEGER_CST)
3462 replace_uncond_cands_and_profitable_phis (first_dep);
3463
3464 /* When the stride is an SSA name, it may still be profitable
3465 to replace some or all of the dependent candidates, depending
3466 on whether the introduced increments can be reused, or are
3467 less expensive to calculate than the replaced statements. */
3468 else
3469 {
3470 enum machine_mode mode;
3471 bool speed;
3472
3473 /* Determine whether we'll be generating pointer arithmetic
3474 when replacing candidates. */
3475 address_arithmetic_p = (c->kind == CAND_ADD
3476 && POINTER_TYPE_P (c->cand_type));
3477
3478 /* If all candidates have already been replaced under other
3479 interpretations, nothing remains to be done. */
3480 if (!count_candidates (c))
3481 continue;
3482
3483 /* Construct an array of increments for this candidate chain. */
3484 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3485 incr_vec_len = 0;
3486 record_increments (c);
3487
3488 /* Determine which increments are profitable to replace. */
3489 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3490 speed = optimize_cands_for_speed_p (c);
3491 analyze_increments (first_dep, mode, speed);
3492
3493 /* Insert initializers of the form T_0 = stride * increment
3494 for use in profitable replacements. */
3495 insert_initializers (first_dep);
3496 dump_incr_vec ();
3497
3498 /* Perform the replacements. */
3499 replace_profitable_candidates (first_dep);
3500 free (incr_vec);
3501 }
3502 }
3503 }
3504
3505 static unsigned
3506 execute_strength_reduction (void)
3507 {
3508 /* Create the obstack where candidates will reside. */
3509 gcc_obstack_init (&cand_obstack);
3510
3511 /* Allocate the candidate vector. */
3512 cand_vec.create (128);
3513
3514 /* Allocate the mapping from statements to candidate indices. */
3515 stmt_cand_map = pointer_map_create ();
3516
3517 /* Create the obstack where candidate chains will reside. */
3518 gcc_obstack_init (&chain_obstack);
3519
3520 /* Allocate the mapping from base expressions to candidate chains. */
3521 base_cand_map.create (500);
3522
3523 /* Initialize the loop optimizer. We need to detect flow across
3524 back edges, and this gives us dominator information as well. */
3525 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3526
3527 /* Walk the CFG in predominator order looking for strength reduction
3528 candidates. */
3529 find_candidates_dom_walker (CDI_DOMINATORS)
3530 .walk (cfun->cfg->x_entry_block_ptr);
3531
3532 if (dump_file && (dump_flags & TDF_DETAILS))
3533 {
3534 dump_cand_vec ();
3535 dump_cand_chains ();
3536 }
3537
3538 /* Analyze costs and make appropriate replacements. */
3539 analyze_candidates_and_replace ();
3540
3541 loop_optimizer_finalize ();
3542 base_cand_map.dispose ();
3543 obstack_free (&chain_obstack, NULL);
3544 pointer_map_destroy (stmt_cand_map);
3545 cand_vec.release ();
3546 obstack_free (&cand_obstack, NULL);
3547
3548 return 0;
3549 }
3550
3551 static bool
3552 gate_strength_reduction (void)
3553 {
3554 return flag_tree_slsr;
3555 }
3556
3557 namespace {
3558
3559 const pass_data pass_data_strength_reduction =
3560 {
3561 GIMPLE_PASS, /* type */
3562 "slsr", /* name */
3563 OPTGROUP_NONE, /* optinfo_flags */
3564 true, /* has_gate */
3565 true, /* has_execute */
3566 TV_GIMPLE_SLSR, /* tv_id */
3567 ( PROP_cfg | PROP_ssa ), /* properties_required */
3568 0, /* properties_provided */
3569 0, /* properties_destroyed */
3570 0, /* todo_flags_start */
3571 TODO_verify_ssa, /* todo_flags_finish */
3572 };
3573
3574 class pass_strength_reduction : public gimple_opt_pass
3575 {
3576 public:
3577 pass_strength_reduction (gcc::context *ctxt)
3578 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3579 {}
3580
3581 /* opt_pass methods: */
3582 bool gate () { return gate_strength_reduction (); }
3583 unsigned int execute () { return execute_strength_reduction (); }
3584
3585 }; // class pass_strength_reduction
3586
3587 } // anon namespace
3588
3589 gimple_opt_pass *
3590 make_pass_strength_reduction (gcc::context *ctxt)
3591 {
3592 return new pass_strength_reduction (ctxt);
3593 }