PR c++/68795: fix uninitialized close_paren_loc in cp_parser_postfix_expression
[gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "backend.h"
191 #include "rtl.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "predict.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "gimple-pretty-print.h"
198 #include "alias.h"
199 #include "fold-const.h"
200 #include "cfgloop.h"
201 #include "tree-eh.h"
202 #include "gimplify.h"
203 #include "gimple-iterator.h"
204 #include "gimplify-me.h"
205 #include "tree-ssa-loop-ivopts.h"
206 #include "tree-ssa-loop-manip.h"
207 #include "tree-ssa-loop-niter.h"
208 #include "tree-ssa-loop.h"
209 #include "tree-into-ssa.h"
210 #include "tree-dfa.h"
211 #include "tree-ssa.h"
212 #include "tree-data-ref.h"
213 #include "tree-scalar-evolution.h"
214 #include "params.h"
215 #include "tree-affine.h"
216
217 /* The maximum number of iterations between the considered memory
218 references. */
219
220 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
221
222 /* Data references (or phi nodes that carry data reference values across
223 loop iterations). */
224
225 typedef struct dref_d
226 {
227 /* The reference itself. */
228 struct data_reference *ref;
229
230 /* The statement in that the reference appears. */
231 gimple *stmt;
232
233 /* In case that STMT is a phi node, this field is set to the SSA name
234 defined by it in replace_phis_by_defined_names (in order to avoid
235 pointing to phi node that got reallocated in the meantime). */
236 tree name_defined_by_phi;
237
238 /* Distance of the reference from the root of the chain (in number of
239 iterations of the loop). */
240 unsigned distance;
241
242 /* Number of iterations offset from the first reference in the component. */
243 widest_int offset;
244
245 /* Number of the reference in a component, in dominance ordering. */
246 unsigned pos;
247
248 /* True if the memory reference is always accessed when the loop is
249 entered. */
250 unsigned always_accessed : 1;
251 } *dref;
252
253
254 /* Type of the chain of the references. */
255
256 enum chain_type
257 {
258 /* The addresses of the references in the chain are constant. */
259 CT_INVARIANT,
260
261 /* There are only loads in the chain. */
262 CT_LOAD,
263
264 /* Root of the chain is store, the rest are loads. */
265 CT_STORE_LOAD,
266
267 /* A combination of two chains. */
268 CT_COMBINATION
269 };
270
271 /* Chains of data references. */
272
273 typedef struct chain
274 {
275 /* Type of the chain. */
276 enum chain_type type;
277
278 /* For combination chains, the operator and the two chains that are
279 combined, and the type of the result. */
280 enum tree_code op;
281 tree rslt_type;
282 struct chain *ch1, *ch2;
283
284 /* The references in the chain. */
285 vec<dref> refs;
286
287 /* The maximum distance of the reference in the chain from the root. */
288 unsigned length;
289
290 /* The variables used to copy the value throughout iterations. */
291 vec<tree> vars;
292
293 /* Initializers for the variables. */
294 vec<tree> inits;
295
296 /* True if there is a use of a variable with the maximal distance
297 that comes after the root in the loop. */
298 unsigned has_max_use_after : 1;
299
300 /* True if all the memory references in the chain are always accessed. */
301 unsigned all_always_accessed : 1;
302
303 /* True if this chain was combined together with some other chain. */
304 unsigned combined : 1;
305 } *chain_p;
306
307
308 /* Describes the knowledge about the step of the memory references in
309 the component. */
310
311 enum ref_step_type
312 {
313 /* The step is zero. */
314 RS_INVARIANT,
315
316 /* The step is nonzero. */
317 RS_NONZERO,
318
319 /* The step may or may not be nonzero. */
320 RS_ANY
321 };
322
323 /* Components of the data dependence graph. */
324
325 struct component
326 {
327 /* The references in the component. */
328 vec<dref> refs;
329
330 /* What we know about the step of the references in the component. */
331 enum ref_step_type comp_step;
332
333 /* Next component in the list. */
334 struct component *next;
335 };
336
337 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
338
339 static bitmap looparound_phis;
340
341 /* Cache used by tree_to_aff_combination_expand. */
342
343 static hash_map<tree, name_expansion *> *name_expansions;
344
345 /* Dumps data reference REF to FILE. */
346
347 extern void dump_dref (FILE *, dref);
348 void
349 dump_dref (FILE *file, dref ref)
350 {
351 if (ref->ref)
352 {
353 fprintf (file, " ");
354 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
355 fprintf (file, " (id %u%s)\n", ref->pos,
356 DR_IS_READ (ref->ref) ? "" : ", write");
357
358 fprintf (file, " offset ");
359 print_decs (ref->offset, file);
360 fprintf (file, "\n");
361
362 fprintf (file, " distance %u\n", ref->distance);
363 }
364 else
365 {
366 if (gimple_code (ref->stmt) == GIMPLE_PHI)
367 fprintf (file, " looparound ref\n");
368 else
369 fprintf (file, " combination ref\n");
370 fprintf (file, " in statement ");
371 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
372 fprintf (file, "\n");
373 fprintf (file, " distance %u\n", ref->distance);
374 }
375
376 }
377
378 /* Dumps CHAIN to FILE. */
379
380 extern void dump_chain (FILE *, chain_p);
381 void
382 dump_chain (FILE *file, chain_p chain)
383 {
384 dref a;
385 const char *chain_type;
386 unsigned i;
387 tree var;
388
389 switch (chain->type)
390 {
391 case CT_INVARIANT:
392 chain_type = "Load motion";
393 break;
394
395 case CT_LOAD:
396 chain_type = "Loads-only";
397 break;
398
399 case CT_STORE_LOAD:
400 chain_type = "Store-loads";
401 break;
402
403 case CT_COMBINATION:
404 chain_type = "Combination";
405 break;
406
407 default:
408 gcc_unreachable ();
409 }
410
411 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
412 chain->combined ? " (combined)" : "");
413 if (chain->type != CT_INVARIANT)
414 fprintf (file, " max distance %u%s\n", chain->length,
415 chain->has_max_use_after ? "" : ", may reuse first");
416
417 if (chain->type == CT_COMBINATION)
418 {
419 fprintf (file, " equal to %p %s %p in type ",
420 (void *) chain->ch1, op_symbol_code (chain->op),
421 (void *) chain->ch2);
422 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
423 fprintf (file, "\n");
424 }
425
426 if (chain->vars.exists ())
427 {
428 fprintf (file, " vars");
429 FOR_EACH_VEC_ELT (chain->vars, i, var)
430 {
431 fprintf (file, " ");
432 print_generic_expr (file, var, TDF_SLIM);
433 }
434 fprintf (file, "\n");
435 }
436
437 if (chain->inits.exists ())
438 {
439 fprintf (file, " inits");
440 FOR_EACH_VEC_ELT (chain->inits, i, var)
441 {
442 fprintf (file, " ");
443 print_generic_expr (file, var, TDF_SLIM);
444 }
445 fprintf (file, "\n");
446 }
447
448 fprintf (file, " references:\n");
449 FOR_EACH_VEC_ELT (chain->refs, i, a)
450 dump_dref (file, a);
451
452 fprintf (file, "\n");
453 }
454
455 /* Dumps CHAINS to FILE. */
456
457 extern void dump_chains (FILE *, vec<chain_p> );
458 void
459 dump_chains (FILE *file, vec<chain_p> chains)
460 {
461 chain_p chain;
462 unsigned i;
463
464 FOR_EACH_VEC_ELT (chains, i, chain)
465 dump_chain (file, chain);
466 }
467
468 /* Dumps COMP to FILE. */
469
470 extern void dump_component (FILE *, struct component *);
471 void
472 dump_component (FILE *file, struct component *comp)
473 {
474 dref a;
475 unsigned i;
476
477 fprintf (file, "Component%s:\n",
478 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
479 FOR_EACH_VEC_ELT (comp->refs, i, a)
480 dump_dref (file, a);
481 fprintf (file, "\n");
482 }
483
484 /* Dumps COMPS to FILE. */
485
486 extern void dump_components (FILE *, struct component *);
487 void
488 dump_components (FILE *file, struct component *comps)
489 {
490 struct component *comp;
491
492 for (comp = comps; comp; comp = comp->next)
493 dump_component (file, comp);
494 }
495
496 /* Frees a chain CHAIN. */
497
498 static void
499 release_chain (chain_p chain)
500 {
501 dref ref;
502 unsigned i;
503
504 if (chain == NULL)
505 return;
506
507 FOR_EACH_VEC_ELT (chain->refs, i, ref)
508 free (ref);
509
510 chain->refs.release ();
511 chain->vars.release ();
512 chain->inits.release ();
513
514 free (chain);
515 }
516
517 /* Frees CHAINS. */
518
519 static void
520 release_chains (vec<chain_p> chains)
521 {
522 unsigned i;
523 chain_p chain;
524
525 FOR_EACH_VEC_ELT (chains, i, chain)
526 release_chain (chain);
527 chains.release ();
528 }
529
530 /* Frees a component COMP. */
531
532 static void
533 release_component (struct component *comp)
534 {
535 comp->refs.release ();
536 free (comp);
537 }
538
539 /* Frees list of components COMPS. */
540
541 static void
542 release_components (struct component *comps)
543 {
544 struct component *act, *next;
545
546 for (act = comps; act; act = next)
547 {
548 next = act->next;
549 release_component (act);
550 }
551 }
552
553 /* Finds a root of tree given by FATHERS containing A, and performs path
554 shortening. */
555
556 static unsigned
557 component_of (unsigned fathers[], unsigned a)
558 {
559 unsigned root, n;
560
561 for (root = a; root != fathers[root]; root = fathers[root])
562 continue;
563
564 for (; a != root; a = n)
565 {
566 n = fathers[a];
567 fathers[a] = root;
568 }
569
570 return root;
571 }
572
573 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
574 components, A and B are components to merge. */
575
576 static void
577 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
578 {
579 unsigned ca = component_of (fathers, a);
580 unsigned cb = component_of (fathers, b);
581
582 if (ca == cb)
583 return;
584
585 if (sizes[ca] < sizes[cb])
586 {
587 sizes[cb] += sizes[ca];
588 fathers[ca] = cb;
589 }
590 else
591 {
592 sizes[ca] += sizes[cb];
593 fathers[cb] = ca;
594 }
595 }
596
597 /* Returns true if A is a reference that is suitable for predictive commoning
598 in the innermost loop that contains it. REF_STEP is set according to the
599 step of the reference A. */
600
601 static bool
602 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
603 {
604 tree ref = DR_REF (a), step = DR_STEP (a);
605
606 if (!step
607 || TREE_THIS_VOLATILE (ref)
608 || !is_gimple_reg_type (TREE_TYPE (ref))
609 || tree_could_throw_p (ref))
610 return false;
611
612 if (integer_zerop (step))
613 *ref_step = RS_INVARIANT;
614 else if (integer_nonzerop (step))
615 *ref_step = RS_NONZERO;
616 else
617 *ref_step = RS_ANY;
618
619 return true;
620 }
621
622 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
623
624 static void
625 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
626 {
627 tree type = TREE_TYPE (DR_OFFSET (dr));
628 aff_tree delta;
629
630 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
631 &name_expansions);
632 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
633 aff_combination_add (offset, &delta);
634 }
635
636 /* Determines number of iterations of the innermost enclosing loop before B
637 refers to exactly the same location as A and stores it to OFF. If A and
638 B do not have the same step, they never meet, or anything else fails,
639 returns false, otherwise returns true. Both A and B are assumed to
640 satisfy suitable_reference_p. */
641
642 static bool
643 determine_offset (struct data_reference *a, struct data_reference *b,
644 widest_int *off)
645 {
646 aff_tree diff, baseb, step;
647 tree typea, typeb;
648
649 /* Check that both the references access the location in the same type. */
650 typea = TREE_TYPE (DR_REF (a));
651 typeb = TREE_TYPE (DR_REF (b));
652 if (!useless_type_conversion_p (typeb, typea))
653 return false;
654
655 /* Check whether the base address and the step of both references is the
656 same. */
657 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
658 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
659 return false;
660
661 if (integer_zerop (DR_STEP (a)))
662 {
663 /* If the references have loop invariant address, check that they access
664 exactly the same location. */
665 *off = 0;
666 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
667 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
668 }
669
670 /* Compare the offsets of the addresses, and check whether the difference
671 is a multiple of step. */
672 aff_combination_dr_offset (a, &diff);
673 aff_combination_dr_offset (b, &baseb);
674 aff_combination_scale (&baseb, -1);
675 aff_combination_add (&diff, &baseb);
676
677 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
678 &step, &name_expansions);
679 return aff_combination_constant_multiple_p (&diff, &step, off);
680 }
681
682 /* Returns the last basic block in LOOP for that we are sure that
683 it is executed whenever the loop is entered. */
684
685 static basic_block
686 last_always_executed_block (struct loop *loop)
687 {
688 unsigned i;
689 vec<edge> exits = get_loop_exit_edges (loop);
690 edge ex;
691 basic_block last = loop->latch;
692
693 FOR_EACH_VEC_ELT (exits, i, ex)
694 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
695 exits.release ();
696
697 return last;
698 }
699
700 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
701
702 static struct component *
703 split_data_refs_to_components (struct loop *loop,
704 vec<data_reference_p> datarefs,
705 vec<ddr_p> depends)
706 {
707 unsigned i, n = datarefs.length ();
708 unsigned ca, ia, ib, bad;
709 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
710 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
711 struct component **comps;
712 struct data_reference *dr, *dra, *drb;
713 struct data_dependence_relation *ddr;
714 struct component *comp_list = NULL, *comp;
715 dref dataref;
716 basic_block last_always_executed = last_always_executed_block (loop);
717
718 FOR_EACH_VEC_ELT (datarefs, i, dr)
719 {
720 if (!DR_REF (dr))
721 {
722 /* A fake reference for call or asm_expr that may clobber memory;
723 just fail. */
724 goto end;
725 }
726 /* predcom pass isn't prepared to handle calls with data references. */
727 if (is_gimple_call (DR_STMT (dr)))
728 goto end;
729 dr->aux = (void *) (size_t) i;
730 comp_father[i] = i;
731 comp_size[i] = 1;
732 }
733
734 /* A component reserved for the "bad" data references. */
735 comp_father[n] = n;
736 comp_size[n] = 1;
737
738 FOR_EACH_VEC_ELT (datarefs, i, dr)
739 {
740 enum ref_step_type dummy;
741
742 if (!suitable_reference_p (dr, &dummy))
743 {
744 ia = (unsigned) (size_t) dr->aux;
745 merge_comps (comp_father, comp_size, n, ia);
746 }
747 }
748
749 FOR_EACH_VEC_ELT (depends, i, ddr)
750 {
751 widest_int dummy_off;
752
753 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
754 continue;
755
756 dra = DDR_A (ddr);
757 drb = DDR_B (ddr);
758 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
759 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
760 if (ia == ib)
761 continue;
762
763 bad = component_of (comp_father, n);
764
765 /* If both A and B are reads, we may ignore unsuitable dependences. */
766 if (DR_IS_READ (dra) && DR_IS_READ (drb))
767 {
768 if (ia == bad || ib == bad
769 || !determine_offset (dra, drb, &dummy_off))
770 continue;
771 }
772 /* If A is read and B write or vice versa and there is unsuitable
773 dependence, instead of merging both components into a component
774 that will certainly not pass suitable_component_p, just put the
775 read into bad component, perhaps at least the write together with
776 all the other data refs in it's component will be optimizable. */
777 else if (DR_IS_READ (dra) && ib != bad)
778 {
779 if (ia == bad)
780 continue;
781 else if (!determine_offset (dra, drb, &dummy_off))
782 {
783 merge_comps (comp_father, comp_size, bad, ia);
784 continue;
785 }
786 }
787 else if (DR_IS_READ (drb) && ia != bad)
788 {
789 if (ib == bad)
790 continue;
791 else if (!determine_offset (dra, drb, &dummy_off))
792 {
793 merge_comps (comp_father, comp_size, bad, ib);
794 continue;
795 }
796 }
797
798 merge_comps (comp_father, comp_size, ia, ib);
799 }
800
801 comps = XCNEWVEC (struct component *, n);
802 bad = component_of (comp_father, n);
803 FOR_EACH_VEC_ELT (datarefs, i, dr)
804 {
805 ia = (unsigned) (size_t) dr->aux;
806 ca = component_of (comp_father, ia);
807 if (ca == bad)
808 continue;
809
810 comp = comps[ca];
811 if (!comp)
812 {
813 comp = XCNEW (struct component);
814 comp->refs.create (comp_size[ca]);
815 comps[ca] = comp;
816 }
817
818 dataref = XCNEW (struct dref_d);
819 dataref->ref = dr;
820 dataref->stmt = DR_STMT (dr);
821 dataref->offset = 0;
822 dataref->distance = 0;
823
824 dataref->always_accessed
825 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
826 gimple_bb (dataref->stmt));
827 dataref->pos = comp->refs.length ();
828 comp->refs.quick_push (dataref);
829 }
830
831 for (i = 0; i < n; i++)
832 {
833 comp = comps[i];
834 if (comp)
835 {
836 comp->next = comp_list;
837 comp_list = comp;
838 }
839 }
840 free (comps);
841
842 end:
843 free (comp_father);
844 free (comp_size);
845 return comp_list;
846 }
847
848 /* Returns true if the component COMP satisfies the conditions
849 described in 2) at the beginning of this file. LOOP is the current
850 loop. */
851
852 static bool
853 suitable_component_p (struct loop *loop, struct component *comp)
854 {
855 unsigned i;
856 dref a, first;
857 basic_block ba, bp = loop->header;
858 bool ok, has_write = false;
859
860 FOR_EACH_VEC_ELT (comp->refs, i, a)
861 {
862 ba = gimple_bb (a->stmt);
863
864 if (!just_once_each_iteration_p (loop, ba))
865 return false;
866
867 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
868 bp = ba;
869
870 if (DR_IS_WRITE (a->ref))
871 has_write = true;
872 }
873
874 first = comp->refs[0];
875 ok = suitable_reference_p (first->ref, &comp->comp_step);
876 gcc_assert (ok);
877 first->offset = 0;
878
879 for (i = 1; comp->refs.iterate (i, &a); i++)
880 {
881 if (!determine_offset (first->ref, a->ref, &a->offset))
882 return false;
883
884 enum ref_step_type a_step;
885 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
886 && a_step == comp->comp_step);
887 }
888
889 /* If there is a write inside the component, we must know whether the
890 step is nonzero or not -- we would not otherwise be able to recognize
891 whether the value accessed by reads comes from the OFFSET-th iteration
892 or the previous one. */
893 if (has_write && comp->comp_step == RS_ANY)
894 return false;
895
896 return true;
897 }
898
899 /* Check the conditions on references inside each of components COMPS,
900 and remove the unsuitable components from the list. The new list
901 of components is returned. The conditions are described in 2) at
902 the beginning of this file. LOOP is the current loop. */
903
904 static struct component *
905 filter_suitable_components (struct loop *loop, struct component *comps)
906 {
907 struct component **comp, *act;
908
909 for (comp = &comps; *comp; )
910 {
911 act = *comp;
912 if (suitable_component_p (loop, act))
913 comp = &act->next;
914 else
915 {
916 dref ref;
917 unsigned i;
918
919 *comp = act->next;
920 FOR_EACH_VEC_ELT (act->refs, i, ref)
921 free (ref);
922 release_component (act);
923 }
924 }
925
926 return comps;
927 }
928
929 /* Compares two drefs A and B by their offset and position. Callback for
930 qsort. */
931
932 static int
933 order_drefs (const void *a, const void *b)
934 {
935 const dref *const da = (const dref *) a;
936 const dref *const db = (const dref *) b;
937 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
938
939 if (offcmp != 0)
940 return offcmp;
941
942 return (*da)->pos - (*db)->pos;
943 }
944
945 /* Returns root of the CHAIN. */
946
947 static inline dref
948 get_chain_root (chain_p chain)
949 {
950 return chain->refs[0];
951 }
952
953 /* Adds REF to the chain CHAIN. */
954
955 static void
956 add_ref_to_chain (chain_p chain, dref ref)
957 {
958 dref root = get_chain_root (chain);
959
960 gcc_assert (wi::les_p (root->offset, ref->offset));
961 widest_int dist = ref->offset - root->offset;
962 if (wi::leu_p (MAX_DISTANCE, dist))
963 {
964 free (ref);
965 return;
966 }
967 gcc_assert (wi::fits_uhwi_p (dist));
968
969 chain->refs.safe_push (ref);
970
971 ref->distance = dist.to_uhwi ();
972
973 if (ref->distance >= chain->length)
974 {
975 chain->length = ref->distance;
976 chain->has_max_use_after = false;
977 }
978
979 if (ref->distance == chain->length
980 && ref->pos > root->pos)
981 chain->has_max_use_after = true;
982
983 chain->all_always_accessed &= ref->always_accessed;
984 }
985
986 /* Returns the chain for invariant component COMP. */
987
988 static chain_p
989 make_invariant_chain (struct component *comp)
990 {
991 chain_p chain = XCNEW (struct chain);
992 unsigned i;
993 dref ref;
994
995 chain->type = CT_INVARIANT;
996
997 chain->all_always_accessed = true;
998
999 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1000 {
1001 chain->refs.safe_push (ref);
1002 chain->all_always_accessed &= ref->always_accessed;
1003 }
1004
1005 return chain;
1006 }
1007
1008 /* Make a new chain rooted at REF. */
1009
1010 static chain_p
1011 make_rooted_chain (dref ref)
1012 {
1013 chain_p chain = XCNEW (struct chain);
1014
1015 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1016
1017 chain->refs.safe_push (ref);
1018 chain->all_always_accessed = ref->always_accessed;
1019
1020 ref->distance = 0;
1021
1022 return chain;
1023 }
1024
1025 /* Returns true if CHAIN is not trivial. */
1026
1027 static bool
1028 nontrivial_chain_p (chain_p chain)
1029 {
1030 return chain != NULL && chain->refs.length () > 1;
1031 }
1032
1033 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1034 is no such name. */
1035
1036 static tree
1037 name_for_ref (dref ref)
1038 {
1039 tree name;
1040
1041 if (is_gimple_assign (ref->stmt))
1042 {
1043 if (!ref->ref || DR_IS_READ (ref->ref))
1044 name = gimple_assign_lhs (ref->stmt);
1045 else
1046 name = gimple_assign_rhs1 (ref->stmt);
1047 }
1048 else
1049 name = PHI_RESULT (ref->stmt);
1050
1051 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1052 }
1053
1054 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1055 iterations of the innermost enclosing loop). */
1056
1057 static bool
1058 valid_initializer_p (struct data_reference *ref,
1059 unsigned distance, struct data_reference *root)
1060 {
1061 aff_tree diff, base, step;
1062 widest_int off;
1063
1064 /* Both REF and ROOT must be accessing the same object. */
1065 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1066 return false;
1067
1068 /* The initializer is defined outside of loop, hence its address must be
1069 invariant inside the loop. */
1070 gcc_assert (integer_zerop (DR_STEP (ref)));
1071
1072 /* If the address of the reference is invariant, initializer must access
1073 exactly the same location. */
1074 if (integer_zerop (DR_STEP (root)))
1075 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1076 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1077
1078 /* Verify that this index of REF is equal to the root's index at
1079 -DISTANCE-th iteration. */
1080 aff_combination_dr_offset (root, &diff);
1081 aff_combination_dr_offset (ref, &base);
1082 aff_combination_scale (&base, -1);
1083 aff_combination_add (&diff, &base);
1084
1085 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1086 &step, &name_expansions);
1087 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1088 return false;
1089
1090 if (off != distance)
1091 return false;
1092
1093 return true;
1094 }
1095
1096 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1097 initial value is correct (equal to initial value of REF shifted by one
1098 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1099 is the root of the current chain. */
1100
1101 static gphi *
1102 find_looparound_phi (struct loop *loop, dref ref, dref root)
1103 {
1104 tree name, init, init_ref;
1105 gphi *phi = NULL;
1106 gimple *init_stmt;
1107 edge latch = loop_latch_edge (loop);
1108 struct data_reference init_dr;
1109 gphi_iterator psi;
1110
1111 if (is_gimple_assign (ref->stmt))
1112 {
1113 if (DR_IS_READ (ref->ref))
1114 name = gimple_assign_lhs (ref->stmt);
1115 else
1116 name = gimple_assign_rhs1 (ref->stmt);
1117 }
1118 else
1119 name = PHI_RESULT (ref->stmt);
1120 if (!name)
1121 return NULL;
1122
1123 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124 {
1125 phi = psi.phi ();
1126 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1127 break;
1128 }
1129
1130 if (gsi_end_p (psi))
1131 return NULL;
1132
1133 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1134 if (TREE_CODE (init) != SSA_NAME)
1135 return NULL;
1136 init_stmt = SSA_NAME_DEF_STMT (init);
1137 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1138 return NULL;
1139 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1140
1141 init_ref = gimple_assign_rhs1 (init_stmt);
1142 if (!REFERENCE_CLASS_P (init_ref)
1143 && !DECL_P (init_ref))
1144 return NULL;
1145
1146 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1147 loop enclosing PHI). */
1148 memset (&init_dr, 0, sizeof (struct data_reference));
1149 DR_REF (&init_dr) = init_ref;
1150 DR_STMT (&init_dr) = phi;
1151 if (!dr_analyze_innermost (&init_dr, loop))
1152 return NULL;
1153
1154 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1155 return NULL;
1156
1157 return phi;
1158 }
1159
1160 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1161
1162 static void
1163 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1164 {
1165 dref nw = XCNEW (struct dref_d), aref;
1166 unsigned i;
1167
1168 nw->stmt = phi;
1169 nw->distance = ref->distance + 1;
1170 nw->always_accessed = 1;
1171
1172 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1173 if (aref->distance >= nw->distance)
1174 break;
1175 chain->refs.safe_insert (i, nw);
1176
1177 if (nw->distance > chain->length)
1178 {
1179 chain->length = nw->distance;
1180 chain->has_max_use_after = false;
1181 }
1182 }
1183
1184 /* For references in CHAIN that are copied around the LOOP (created previously
1185 by PRE, or by user), add the results of such copies to the chain. This
1186 enables us to remove the copies by unrolling, and may need less registers
1187 (also, it may allow us to combine chains together). */
1188
1189 static void
1190 add_looparound_copies (struct loop *loop, chain_p chain)
1191 {
1192 unsigned i;
1193 dref ref, root = get_chain_root (chain);
1194 gphi *phi;
1195
1196 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1197 {
1198 phi = find_looparound_phi (loop, ref, root);
1199 if (!phi)
1200 continue;
1201
1202 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1203 insert_looparound_copy (chain, ref, phi);
1204 }
1205 }
1206
1207 /* Find roots of the values and determine distances in the component COMP.
1208 The references are redistributed into CHAINS. LOOP is the current
1209 loop. */
1210
1211 static void
1212 determine_roots_comp (struct loop *loop,
1213 struct component *comp,
1214 vec<chain_p> *chains)
1215 {
1216 unsigned i;
1217 dref a;
1218 chain_p chain = NULL;
1219 widest_int last_ofs = 0;
1220
1221 /* Invariants are handled specially. */
1222 if (comp->comp_step == RS_INVARIANT)
1223 {
1224 chain = make_invariant_chain (comp);
1225 chains->safe_push (chain);
1226 return;
1227 }
1228
1229 comp->refs.qsort (order_drefs);
1230
1231 FOR_EACH_VEC_ELT (comp->refs, i, a)
1232 {
1233 if (!chain || DR_IS_WRITE (a->ref)
1234 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1235 {
1236 if (nontrivial_chain_p (chain))
1237 {
1238 add_looparound_copies (loop, chain);
1239 chains->safe_push (chain);
1240 }
1241 else
1242 release_chain (chain);
1243 chain = make_rooted_chain (a);
1244 last_ofs = a->offset;
1245 continue;
1246 }
1247
1248 add_ref_to_chain (chain, a);
1249 }
1250
1251 if (nontrivial_chain_p (chain))
1252 {
1253 add_looparound_copies (loop, chain);
1254 chains->safe_push (chain);
1255 }
1256 else
1257 release_chain (chain);
1258 }
1259
1260 /* Find roots of the values and determine distances in components COMPS, and
1261 separates the references to CHAINS. LOOP is the current loop. */
1262
1263 static void
1264 determine_roots (struct loop *loop,
1265 struct component *comps, vec<chain_p> *chains)
1266 {
1267 struct component *comp;
1268
1269 for (comp = comps; comp; comp = comp->next)
1270 determine_roots_comp (loop, comp, chains);
1271 }
1272
1273 /* Replace the reference in statement STMT with temporary variable
1274 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1275 the reference in the statement. IN_LHS is true if the reference
1276 is in the lhs of STMT, false if it is in rhs. */
1277
1278 static void
1279 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1280 {
1281 tree val;
1282 gassign *new_stmt;
1283 gimple_stmt_iterator bsi, psi;
1284
1285 if (gimple_code (stmt) == GIMPLE_PHI)
1286 {
1287 gcc_assert (!in_lhs && !set);
1288
1289 val = PHI_RESULT (stmt);
1290 bsi = gsi_after_labels (gimple_bb (stmt));
1291 psi = gsi_for_stmt (stmt);
1292 remove_phi_node (&psi, false);
1293
1294 /* Turn the phi node into GIMPLE_ASSIGN. */
1295 new_stmt = gimple_build_assign (val, new_tree);
1296 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1297 return;
1298 }
1299
1300 /* Since the reference is of gimple_reg type, it should only
1301 appear as lhs or rhs of modify statement. */
1302 gcc_assert (is_gimple_assign (stmt));
1303
1304 bsi = gsi_for_stmt (stmt);
1305
1306 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1307 if (!set)
1308 {
1309 gcc_assert (!in_lhs);
1310 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1311 stmt = gsi_stmt (bsi);
1312 update_stmt (stmt);
1313 return;
1314 }
1315
1316 if (in_lhs)
1317 {
1318 /* We have statement
1319
1320 OLD = VAL
1321
1322 If OLD is a memory reference, then VAL is gimple_val, and we transform
1323 this to
1324
1325 OLD = VAL
1326 NEW = VAL
1327
1328 Otherwise, we are replacing a combination chain,
1329 VAL is the expression that performs the combination, and OLD is an
1330 SSA name. In this case, we transform the assignment to
1331
1332 OLD = VAL
1333 NEW = OLD
1334
1335 */
1336
1337 val = gimple_assign_lhs (stmt);
1338 if (TREE_CODE (val) != SSA_NAME)
1339 {
1340 val = gimple_assign_rhs1 (stmt);
1341 gcc_assert (gimple_assign_single_p (stmt));
1342 if (TREE_CLOBBER_P (val))
1343 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1344 else
1345 gcc_assert (gimple_assign_copy_p (stmt));
1346 }
1347 }
1348 else
1349 {
1350 /* VAL = OLD
1351
1352 is transformed to
1353
1354 VAL = OLD
1355 NEW = VAL */
1356
1357 val = gimple_assign_lhs (stmt);
1358 }
1359
1360 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1361 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1362 }
1363
1364 /* Returns a memory reference to DR in the ITER-th iteration of
1365 the loop it was analyzed in. Append init stmts to STMTS. */
1366
1367 static tree
1368 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1369 {
1370 tree off = DR_OFFSET (dr);
1371 tree coff = DR_INIT (dr);
1372 if (iter == 0)
1373 ;
1374 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1375 coff = size_binop (PLUS_EXPR, coff,
1376 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1377 else
1378 off = size_binop (PLUS_EXPR, off,
1379 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1380 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1381 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1382 is_gimple_mem_ref_addr, NULL_TREE);
1383 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1384 /* While data-ref analysis punts on bit offsets it still handles
1385 bitfield accesses at byte boundaries. Cope with that. Note that
1386 we cannot simply re-apply the outer COMPONENT_REF because the
1387 byte-granular portion of it is already applied via DR_INIT and
1388 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1389 start at offset zero. */
1390 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1391 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1392 {
1393 tree field = TREE_OPERAND (DR_REF (dr), 1);
1394 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1395 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1396 addr, alias_ptr),
1397 DECL_SIZE (field), bitsize_zero_node);
1398 }
1399 else
1400 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1401 }
1402
1403 /* Get the initialization expression for the INDEX-th temporary variable
1404 of CHAIN. */
1405
1406 static tree
1407 get_init_expr (chain_p chain, unsigned index)
1408 {
1409 if (chain->type == CT_COMBINATION)
1410 {
1411 tree e1 = get_init_expr (chain->ch1, index);
1412 tree e2 = get_init_expr (chain->ch2, index);
1413
1414 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1415 }
1416 else
1417 return chain->inits[index];
1418 }
1419
1420 /* Returns a new temporary variable used for the I-th variable carrying
1421 value of REF. The variable's uid is marked in TMP_VARS. */
1422
1423 static tree
1424 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1425 {
1426 tree type = TREE_TYPE (ref);
1427 /* We never access the components of the temporary variable in predictive
1428 commoning. */
1429 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1430 bitmap_set_bit (tmp_vars, DECL_UID (var));
1431 return var;
1432 }
1433
1434 /* Creates the variables for CHAIN, as well as phi nodes for them and
1435 initialization on entry to LOOP. Uids of the newly created
1436 temporary variables are marked in TMP_VARS. */
1437
1438 static void
1439 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1440 {
1441 unsigned i;
1442 unsigned n = chain->length;
1443 dref root = get_chain_root (chain);
1444 bool reuse_first = !chain->has_max_use_after;
1445 tree ref, init, var, next;
1446 gphi *phi;
1447 gimple_seq stmts;
1448 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1449
1450 /* If N == 0, then all the references are within the single iteration. And
1451 since this is an nonempty chain, reuse_first cannot be true. */
1452 gcc_assert (n > 0 || !reuse_first);
1453
1454 chain->vars.create (n + 1);
1455
1456 if (chain->type == CT_COMBINATION)
1457 ref = gimple_assign_lhs (root->stmt);
1458 else
1459 ref = DR_REF (root->ref);
1460
1461 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1462 {
1463 var = predcom_tmp_var (ref, i, tmp_vars);
1464 chain->vars.quick_push (var);
1465 }
1466 if (reuse_first)
1467 chain->vars.quick_push (chain->vars[0]);
1468
1469 FOR_EACH_VEC_ELT (chain->vars, i, var)
1470 chain->vars[i] = make_ssa_name (var);
1471
1472 for (i = 0; i < n; i++)
1473 {
1474 var = chain->vars[i];
1475 next = chain->vars[i + 1];
1476 init = get_init_expr (chain, i);
1477
1478 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1479 if (stmts)
1480 gsi_insert_seq_on_edge_immediate (entry, stmts);
1481
1482 phi = create_phi_node (var, loop->header);
1483 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1484 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1485 }
1486 }
1487
1488 /* Create the variables and initialization statement for root of chain
1489 CHAIN. Uids of the newly created temporary variables are marked
1490 in TMP_VARS. */
1491
1492 static void
1493 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1494 {
1495 dref root = get_chain_root (chain);
1496 bool in_lhs = (chain->type == CT_STORE_LOAD
1497 || chain->type == CT_COMBINATION);
1498
1499 initialize_root_vars (loop, chain, tmp_vars);
1500 replace_ref_with (root->stmt,
1501 chain->vars[chain->length],
1502 true, in_lhs);
1503 }
1504
1505 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1506 initialization on entry to LOOP if necessary. The ssa name for the variable
1507 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1508 around the loop is created. Uid of the newly created temporary variable
1509 is marked in TMP_VARS. INITS is the list containing the (single)
1510 initializer. */
1511
1512 static void
1513 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1514 vec<tree> *vars, vec<tree> inits,
1515 bitmap tmp_vars)
1516 {
1517 unsigned i;
1518 tree ref = DR_REF (root->ref), init, var, next;
1519 gimple_seq stmts;
1520 gphi *phi;
1521 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1522
1523 /* Find the initializer for the variable, and check that it cannot
1524 trap. */
1525 init = inits[0];
1526
1527 vars->create (written ? 2 : 1);
1528 var = predcom_tmp_var (ref, 0, tmp_vars);
1529 vars->quick_push (var);
1530 if (written)
1531 vars->quick_push ((*vars)[0]);
1532
1533 FOR_EACH_VEC_ELT (*vars, i, var)
1534 (*vars)[i] = make_ssa_name (var);
1535
1536 var = (*vars)[0];
1537
1538 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1539 if (stmts)
1540 gsi_insert_seq_on_edge_immediate (entry, stmts);
1541
1542 if (written)
1543 {
1544 next = (*vars)[1];
1545 phi = create_phi_node (var, loop->header);
1546 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1547 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1548 }
1549 else
1550 {
1551 gassign *init_stmt = gimple_build_assign (var, init);
1552 gsi_insert_on_edge_immediate (entry, init_stmt);
1553 }
1554 }
1555
1556
1557 /* Execute load motion for references in chain CHAIN. Uids of the newly
1558 created temporary variables are marked in TMP_VARS. */
1559
1560 static void
1561 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1562 {
1563 auto_vec<tree> vars;
1564 dref a;
1565 unsigned n_writes = 0, ridx, i;
1566 tree var;
1567
1568 gcc_assert (chain->type == CT_INVARIANT);
1569 gcc_assert (!chain->combined);
1570 FOR_EACH_VEC_ELT (chain->refs, i, a)
1571 if (DR_IS_WRITE (a->ref))
1572 n_writes++;
1573
1574 /* If there are no reads in the loop, there is nothing to do. */
1575 if (n_writes == chain->refs.length ())
1576 return;
1577
1578 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1579 &vars, chain->inits, tmp_vars);
1580
1581 ridx = 0;
1582 FOR_EACH_VEC_ELT (chain->refs, i, a)
1583 {
1584 bool is_read = DR_IS_READ (a->ref);
1585
1586 if (DR_IS_WRITE (a->ref))
1587 {
1588 n_writes--;
1589 if (n_writes)
1590 {
1591 var = vars[0];
1592 var = make_ssa_name (SSA_NAME_VAR (var));
1593 vars[0] = var;
1594 }
1595 else
1596 ridx = 1;
1597 }
1598
1599 replace_ref_with (a->stmt, vars[ridx],
1600 !is_read, !is_read);
1601 }
1602 }
1603
1604 /* Returns the single statement in that NAME is used, excepting
1605 the looparound phi nodes contained in one of the chains. If there is no
1606 such statement, or more statements, NULL is returned. */
1607
1608 static gimple *
1609 single_nonlooparound_use (tree name)
1610 {
1611 use_operand_p use;
1612 imm_use_iterator it;
1613 gimple *stmt, *ret = NULL;
1614
1615 FOR_EACH_IMM_USE_FAST (use, it, name)
1616 {
1617 stmt = USE_STMT (use);
1618
1619 if (gimple_code (stmt) == GIMPLE_PHI)
1620 {
1621 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1622 could not be processed anyway, so just fail for them. */
1623 if (bitmap_bit_p (looparound_phis,
1624 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1625 continue;
1626
1627 return NULL;
1628 }
1629 else if (is_gimple_debug (stmt))
1630 continue;
1631 else if (ret != NULL)
1632 return NULL;
1633 else
1634 ret = stmt;
1635 }
1636
1637 return ret;
1638 }
1639
1640 /* Remove statement STMT, as well as the chain of assignments in that it is
1641 used. */
1642
1643 static void
1644 remove_stmt (gimple *stmt)
1645 {
1646 tree name;
1647 gimple *next;
1648 gimple_stmt_iterator psi;
1649
1650 if (gimple_code (stmt) == GIMPLE_PHI)
1651 {
1652 name = PHI_RESULT (stmt);
1653 next = single_nonlooparound_use (name);
1654 reset_debug_uses (stmt);
1655 psi = gsi_for_stmt (stmt);
1656 remove_phi_node (&psi, true);
1657
1658 if (!next
1659 || !gimple_assign_ssa_name_copy_p (next)
1660 || gimple_assign_rhs1 (next) != name)
1661 return;
1662
1663 stmt = next;
1664 }
1665
1666 while (1)
1667 {
1668 gimple_stmt_iterator bsi;
1669
1670 bsi = gsi_for_stmt (stmt);
1671
1672 name = gimple_assign_lhs (stmt);
1673 gcc_assert (TREE_CODE (name) == SSA_NAME);
1674
1675 next = single_nonlooparound_use (name);
1676 reset_debug_uses (stmt);
1677
1678 unlink_stmt_vdef (stmt);
1679 gsi_remove (&bsi, true);
1680 release_defs (stmt);
1681
1682 if (!next
1683 || !gimple_assign_ssa_name_copy_p (next)
1684 || gimple_assign_rhs1 (next) != name)
1685 return;
1686
1687 stmt = next;
1688 }
1689 }
1690
1691 /* Perform the predictive commoning optimization for a chain CHAIN.
1692 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1693
1694 static void
1695 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1696 bitmap tmp_vars)
1697 {
1698 unsigned i;
1699 dref a;
1700 tree var;
1701
1702 if (chain->combined)
1703 {
1704 /* For combined chains, just remove the statements that are used to
1705 compute the values of the expression (except for the root one).
1706 We delay this until after all chains are processed. */
1707 }
1708 else
1709 {
1710 /* For non-combined chains, set up the variables that hold its value,
1711 and replace the uses of the original references by these
1712 variables. */
1713 initialize_root (loop, chain, tmp_vars);
1714 for (i = 1; chain->refs.iterate (i, &a); i++)
1715 {
1716 var = chain->vars[chain->length - a->distance];
1717 replace_ref_with (a->stmt, var, false, false);
1718 }
1719 }
1720 }
1721
1722 /* Determines the unroll factor necessary to remove as many temporary variable
1723 copies as possible. CHAINS is the list of chains that will be
1724 optimized. */
1725
1726 static unsigned
1727 determine_unroll_factor (vec<chain_p> chains)
1728 {
1729 chain_p chain;
1730 unsigned factor = 1, af, nfactor, i;
1731 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1732
1733 FOR_EACH_VEC_ELT (chains, i, chain)
1734 {
1735 if (chain->type == CT_INVARIANT)
1736 continue;
1737
1738 if (chain->combined)
1739 {
1740 /* For combined chains, we can't handle unrolling if we replace
1741 looparound PHIs. */
1742 dref a;
1743 unsigned j;
1744 for (j = 1; chain->refs.iterate (j, &a); j++)
1745 if (gimple_code (a->stmt) == GIMPLE_PHI)
1746 return 1;
1747 continue;
1748 }
1749
1750 /* The best unroll factor for this chain is equal to the number of
1751 temporary variables that we create for it. */
1752 af = chain->length;
1753 if (chain->has_max_use_after)
1754 af++;
1755
1756 nfactor = factor * af / gcd (factor, af);
1757 if (nfactor <= max)
1758 factor = nfactor;
1759 }
1760
1761 return factor;
1762 }
1763
1764 /* Perform the predictive commoning optimization for CHAINS.
1765 Uids of the newly created temporary variables are marked in TMP_VARS. */
1766
1767 static void
1768 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1769 bitmap tmp_vars)
1770 {
1771 chain_p chain;
1772 unsigned i;
1773
1774 FOR_EACH_VEC_ELT (chains, i, chain)
1775 {
1776 if (chain->type == CT_INVARIANT)
1777 execute_load_motion (loop, chain, tmp_vars);
1778 else
1779 execute_pred_commoning_chain (loop, chain, tmp_vars);
1780 }
1781
1782 FOR_EACH_VEC_ELT (chains, i, chain)
1783 {
1784 if (chain->type == CT_INVARIANT)
1785 ;
1786 else if (chain->combined)
1787 {
1788 /* For combined chains, just remove the statements that are used to
1789 compute the values of the expression (except for the root one). */
1790 dref a;
1791 unsigned j;
1792 for (j = 1; chain->refs.iterate (j, &a); j++)
1793 remove_stmt (a->stmt);
1794 }
1795 }
1796
1797 update_ssa (TODO_update_ssa_only_virtuals);
1798 }
1799
1800 /* For each reference in CHAINS, if its defining statement is
1801 phi node, record the ssa name that is defined by it. */
1802
1803 static void
1804 replace_phis_by_defined_names (vec<chain_p> chains)
1805 {
1806 chain_p chain;
1807 dref a;
1808 unsigned i, j;
1809
1810 FOR_EACH_VEC_ELT (chains, i, chain)
1811 FOR_EACH_VEC_ELT (chain->refs, j, a)
1812 {
1813 if (gimple_code (a->stmt) == GIMPLE_PHI)
1814 {
1815 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1816 a->stmt = NULL;
1817 }
1818 }
1819 }
1820
1821 /* For each reference in CHAINS, if name_defined_by_phi is not
1822 NULL, use it to set the stmt field. */
1823
1824 static void
1825 replace_names_by_phis (vec<chain_p> chains)
1826 {
1827 chain_p chain;
1828 dref a;
1829 unsigned i, j;
1830
1831 FOR_EACH_VEC_ELT (chains, i, chain)
1832 FOR_EACH_VEC_ELT (chain->refs, j, a)
1833 if (a->stmt == NULL)
1834 {
1835 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1836 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1837 a->name_defined_by_phi = NULL_TREE;
1838 }
1839 }
1840
1841 /* Wrapper over execute_pred_commoning, to pass it as a callback
1842 to tree_transform_and_unroll_loop. */
1843
1844 struct epcc_data
1845 {
1846 vec<chain_p> chains;
1847 bitmap tmp_vars;
1848 };
1849
1850 static void
1851 execute_pred_commoning_cbck (struct loop *loop, void *data)
1852 {
1853 struct epcc_data *const dta = (struct epcc_data *) data;
1854
1855 /* Restore phi nodes that were replaced by ssa names before
1856 tree_transform_and_unroll_loop (see detailed description in
1857 tree_predictive_commoning_loop). */
1858 replace_names_by_phis (dta->chains);
1859 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1860 }
1861
1862 /* Base NAME and all the names in the chain of phi nodes that use it
1863 on variable VAR. The phi nodes are recognized by being in the copies of
1864 the header of the LOOP. */
1865
1866 static void
1867 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1868 {
1869 gimple *stmt, *phi;
1870 imm_use_iterator iter;
1871
1872 replace_ssa_name_symbol (name, var);
1873
1874 while (1)
1875 {
1876 phi = NULL;
1877 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1878 {
1879 if (gimple_code (stmt) == GIMPLE_PHI
1880 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1881 {
1882 phi = stmt;
1883 BREAK_FROM_IMM_USE_STMT (iter);
1884 }
1885 }
1886 if (!phi)
1887 return;
1888
1889 name = PHI_RESULT (phi);
1890 replace_ssa_name_symbol (name, var);
1891 }
1892 }
1893
1894 /* Given an unrolled LOOP after predictive commoning, remove the
1895 register copies arising from phi nodes by changing the base
1896 variables of SSA names. TMP_VARS is the set of the temporary variables
1897 for those we want to perform this. */
1898
1899 static void
1900 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1901 {
1902 edge e;
1903 gphi *phi;
1904 gimple *stmt;
1905 tree name, use, var;
1906 gphi_iterator psi;
1907
1908 e = loop_latch_edge (loop);
1909 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1910 {
1911 phi = psi.phi ();
1912 name = PHI_RESULT (phi);
1913 var = SSA_NAME_VAR (name);
1914 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1915 continue;
1916 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1917 gcc_assert (TREE_CODE (use) == SSA_NAME);
1918
1919 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1920 stmt = SSA_NAME_DEF_STMT (use);
1921 while (gimple_code (stmt) == GIMPLE_PHI
1922 /* In case we could not unroll the loop enough to eliminate
1923 all copies, we may reach the loop header before the defining
1924 statement (in that case, some register copies will be present
1925 in loop latch in the final code, corresponding to the newly
1926 created looparound phi nodes). */
1927 && gimple_bb (stmt) != loop->header)
1928 {
1929 gcc_assert (single_pred_p (gimple_bb (stmt)));
1930 use = PHI_ARG_DEF (stmt, 0);
1931 stmt = SSA_NAME_DEF_STMT (use);
1932 }
1933
1934 base_names_in_chain_on (loop, use, var);
1935 }
1936 }
1937
1938 /* Returns true if CHAIN is suitable to be combined. */
1939
1940 static bool
1941 chain_can_be_combined_p (chain_p chain)
1942 {
1943 return (!chain->combined
1944 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1945 }
1946
1947 /* Returns the modify statement that uses NAME. Skips over assignment
1948 statements, NAME is replaced with the actual name used in the returned
1949 statement. */
1950
1951 static gimple *
1952 find_use_stmt (tree *name)
1953 {
1954 gimple *stmt;
1955 tree rhs, lhs;
1956
1957 /* Skip over assignments. */
1958 while (1)
1959 {
1960 stmt = single_nonlooparound_use (*name);
1961 if (!stmt)
1962 return NULL;
1963
1964 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1965 return NULL;
1966
1967 lhs = gimple_assign_lhs (stmt);
1968 if (TREE_CODE (lhs) != SSA_NAME)
1969 return NULL;
1970
1971 if (gimple_assign_copy_p (stmt))
1972 {
1973 rhs = gimple_assign_rhs1 (stmt);
1974 if (rhs != *name)
1975 return NULL;
1976
1977 *name = lhs;
1978 }
1979 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1980 == GIMPLE_BINARY_RHS)
1981 return stmt;
1982 else
1983 return NULL;
1984 }
1985 }
1986
1987 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
1988
1989 static bool
1990 may_reassociate_p (tree type, enum tree_code code)
1991 {
1992 if (FLOAT_TYPE_P (type)
1993 && !flag_unsafe_math_optimizations)
1994 return false;
1995
1996 return (commutative_tree_code (code)
1997 && associative_tree_code (code));
1998 }
1999
2000 /* If the operation used in STMT is associative and commutative, go through the
2001 tree of the same operations and returns its root. Distance to the root
2002 is stored in DISTANCE. */
2003
2004 static gimple *
2005 find_associative_operation_root (gimple *stmt, unsigned *distance)
2006 {
2007 tree lhs;
2008 gimple *next;
2009 enum tree_code code = gimple_assign_rhs_code (stmt);
2010 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2011 unsigned dist = 0;
2012
2013 if (!may_reassociate_p (type, code))
2014 return NULL;
2015
2016 while (1)
2017 {
2018 lhs = gimple_assign_lhs (stmt);
2019 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2020
2021 next = find_use_stmt (&lhs);
2022 if (!next
2023 || gimple_assign_rhs_code (next) != code)
2024 break;
2025
2026 stmt = next;
2027 dist++;
2028 }
2029
2030 if (distance)
2031 *distance = dist;
2032 return stmt;
2033 }
2034
2035 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2036 is no such statement, returns NULL_TREE. In case the operation used on
2037 NAME1 and NAME2 is associative and commutative, returns the root of the
2038 tree formed by this operation instead of the statement that uses NAME1 or
2039 NAME2. */
2040
2041 static gimple *
2042 find_common_use_stmt (tree *name1, tree *name2)
2043 {
2044 gimple *stmt1, *stmt2;
2045
2046 stmt1 = find_use_stmt (name1);
2047 if (!stmt1)
2048 return NULL;
2049
2050 stmt2 = find_use_stmt (name2);
2051 if (!stmt2)
2052 return NULL;
2053
2054 if (stmt1 == stmt2)
2055 return stmt1;
2056
2057 stmt1 = find_associative_operation_root (stmt1, NULL);
2058 if (!stmt1)
2059 return NULL;
2060 stmt2 = find_associative_operation_root (stmt2, NULL);
2061 if (!stmt2)
2062 return NULL;
2063
2064 return (stmt1 == stmt2 ? stmt1 : NULL);
2065 }
2066
2067 /* Checks whether R1 and R2 are combined together using CODE, with the result
2068 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2069 if it is true. If CODE is ERROR_MARK, set these values instead. */
2070
2071 static bool
2072 combinable_refs_p (dref r1, dref r2,
2073 enum tree_code *code, bool *swap, tree *rslt_type)
2074 {
2075 enum tree_code acode;
2076 bool aswap;
2077 tree atype;
2078 tree name1, name2;
2079 gimple *stmt;
2080
2081 name1 = name_for_ref (r1);
2082 name2 = name_for_ref (r2);
2083 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2084
2085 stmt = find_common_use_stmt (&name1, &name2);
2086
2087 if (!stmt
2088 /* A simple post-dominance check - make sure the combination
2089 is executed under the same condition as the references. */
2090 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2091 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2092 return false;
2093
2094 acode = gimple_assign_rhs_code (stmt);
2095 aswap = (!commutative_tree_code (acode)
2096 && gimple_assign_rhs1 (stmt) != name1);
2097 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2098
2099 if (*code == ERROR_MARK)
2100 {
2101 *code = acode;
2102 *swap = aswap;
2103 *rslt_type = atype;
2104 return true;
2105 }
2106
2107 return (*code == acode
2108 && *swap == aswap
2109 && *rslt_type == atype);
2110 }
2111
2112 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2113 an assignment of the remaining operand. */
2114
2115 static void
2116 remove_name_from_operation (gimple *stmt, tree op)
2117 {
2118 tree other_op;
2119 gimple_stmt_iterator si;
2120
2121 gcc_assert (is_gimple_assign (stmt));
2122
2123 if (gimple_assign_rhs1 (stmt) == op)
2124 other_op = gimple_assign_rhs2 (stmt);
2125 else
2126 other_op = gimple_assign_rhs1 (stmt);
2127
2128 si = gsi_for_stmt (stmt);
2129 gimple_assign_set_rhs_from_tree (&si, other_op);
2130
2131 /* We should not have reallocated STMT. */
2132 gcc_assert (gsi_stmt (si) == stmt);
2133
2134 update_stmt (stmt);
2135 }
2136
2137 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2138 are combined in a single statement, and returns this statement. */
2139
2140 static gimple *
2141 reassociate_to_the_same_stmt (tree name1, tree name2)
2142 {
2143 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2144 gassign *new_stmt, *tmp_stmt;
2145 tree new_name, tmp_name, var, r1, r2;
2146 unsigned dist1, dist2;
2147 enum tree_code code;
2148 tree type = TREE_TYPE (name1);
2149 gimple_stmt_iterator bsi;
2150
2151 stmt1 = find_use_stmt (&name1);
2152 stmt2 = find_use_stmt (&name2);
2153 root1 = find_associative_operation_root (stmt1, &dist1);
2154 root2 = find_associative_operation_root (stmt2, &dist2);
2155 code = gimple_assign_rhs_code (stmt1);
2156
2157 gcc_assert (root1 && root2 && root1 == root2
2158 && code == gimple_assign_rhs_code (stmt2));
2159
2160 /* Find the root of the nearest expression in that both NAME1 and NAME2
2161 are used. */
2162 r1 = name1;
2163 s1 = stmt1;
2164 r2 = name2;
2165 s2 = stmt2;
2166
2167 while (dist1 > dist2)
2168 {
2169 s1 = find_use_stmt (&r1);
2170 r1 = gimple_assign_lhs (s1);
2171 dist1--;
2172 }
2173 while (dist2 > dist1)
2174 {
2175 s2 = find_use_stmt (&r2);
2176 r2 = gimple_assign_lhs (s2);
2177 dist2--;
2178 }
2179
2180 while (s1 != s2)
2181 {
2182 s1 = find_use_stmt (&r1);
2183 r1 = gimple_assign_lhs (s1);
2184 s2 = find_use_stmt (&r2);
2185 r2 = gimple_assign_lhs (s2);
2186 }
2187
2188 /* Remove NAME1 and NAME2 from the statements in that they are used
2189 currently. */
2190 remove_name_from_operation (stmt1, name1);
2191 remove_name_from_operation (stmt2, name2);
2192
2193 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2194 combine it with the rhs of S1. */
2195 var = create_tmp_reg (type, "predreastmp");
2196 new_name = make_ssa_name (var);
2197 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2198
2199 var = create_tmp_reg (type, "predreastmp");
2200 tmp_name = make_ssa_name (var);
2201
2202 /* Rhs of S1 may now be either a binary expression with operation
2203 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2204 so that name1 or name2 was removed from it). */
2205 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2206 gimple_assign_rhs1 (s1),
2207 gimple_assign_rhs2 (s1));
2208
2209 bsi = gsi_for_stmt (s1);
2210 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2211 s1 = gsi_stmt (bsi);
2212 update_stmt (s1);
2213
2214 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2215 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2216
2217 return new_stmt;
2218 }
2219
2220 /* Returns the statement that combines references R1 and R2. In case R1
2221 and R2 are not used in the same statement, but they are used with an
2222 associative and commutative operation in the same expression, reassociate
2223 the expression so that they are used in the same statement. */
2224
2225 static gimple *
2226 stmt_combining_refs (dref r1, dref r2)
2227 {
2228 gimple *stmt1, *stmt2;
2229 tree name1 = name_for_ref (r1);
2230 tree name2 = name_for_ref (r2);
2231
2232 stmt1 = find_use_stmt (&name1);
2233 stmt2 = find_use_stmt (&name2);
2234 if (stmt1 == stmt2)
2235 return stmt1;
2236
2237 return reassociate_to_the_same_stmt (name1, name2);
2238 }
2239
2240 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2241 description of the new chain is returned, otherwise we return NULL. */
2242
2243 static chain_p
2244 combine_chains (chain_p ch1, chain_p ch2)
2245 {
2246 dref r1, r2, nw;
2247 enum tree_code op = ERROR_MARK;
2248 bool swap = false;
2249 chain_p new_chain;
2250 unsigned i;
2251 gimple *root_stmt;
2252 tree rslt_type = NULL_TREE;
2253
2254 if (ch1 == ch2)
2255 return NULL;
2256 if (ch1->length != ch2->length)
2257 return NULL;
2258
2259 if (ch1->refs.length () != ch2->refs.length ())
2260 return NULL;
2261
2262 for (i = 0; (ch1->refs.iterate (i, &r1)
2263 && ch2->refs.iterate (i, &r2)); i++)
2264 {
2265 if (r1->distance != r2->distance)
2266 return NULL;
2267
2268 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2269 return NULL;
2270 }
2271
2272 if (swap)
2273 std::swap (ch1, ch2);
2274
2275 new_chain = XCNEW (struct chain);
2276 new_chain->type = CT_COMBINATION;
2277 new_chain->op = op;
2278 new_chain->ch1 = ch1;
2279 new_chain->ch2 = ch2;
2280 new_chain->rslt_type = rslt_type;
2281 new_chain->length = ch1->length;
2282
2283 for (i = 0; (ch1->refs.iterate (i, &r1)
2284 && ch2->refs.iterate (i, &r2)); i++)
2285 {
2286 nw = XCNEW (struct dref_d);
2287 nw->stmt = stmt_combining_refs (r1, r2);
2288 nw->distance = r1->distance;
2289
2290 new_chain->refs.safe_push (nw);
2291 }
2292
2293 new_chain->has_max_use_after = false;
2294 root_stmt = get_chain_root (new_chain)->stmt;
2295 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2296 {
2297 if (nw->distance == new_chain->length
2298 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2299 {
2300 new_chain->has_max_use_after = true;
2301 break;
2302 }
2303 }
2304
2305 ch1->combined = true;
2306 ch2->combined = true;
2307 return new_chain;
2308 }
2309
2310 /* Try to combine the CHAINS. */
2311
2312 static void
2313 try_combine_chains (vec<chain_p> *chains)
2314 {
2315 unsigned i, j;
2316 chain_p ch1, ch2, cch;
2317 auto_vec<chain_p> worklist;
2318
2319 FOR_EACH_VEC_ELT (*chains, i, ch1)
2320 if (chain_can_be_combined_p (ch1))
2321 worklist.safe_push (ch1);
2322
2323 while (!worklist.is_empty ())
2324 {
2325 ch1 = worklist.pop ();
2326 if (!chain_can_be_combined_p (ch1))
2327 continue;
2328
2329 FOR_EACH_VEC_ELT (*chains, j, ch2)
2330 {
2331 if (!chain_can_be_combined_p (ch2))
2332 continue;
2333
2334 cch = combine_chains (ch1, ch2);
2335 if (cch)
2336 {
2337 worklist.safe_push (cch);
2338 chains->safe_push (cch);
2339 break;
2340 }
2341 }
2342 }
2343 }
2344
2345 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2346 impossible because one of these initializers may trap, true otherwise. */
2347
2348 static bool
2349 prepare_initializers_chain (struct loop *loop, chain_p chain)
2350 {
2351 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2352 struct data_reference *dr = get_chain_root (chain)->ref;
2353 tree init;
2354 dref laref;
2355 edge entry = loop_preheader_edge (loop);
2356
2357 /* Find the initializers for the variables, and check that they cannot
2358 trap. */
2359 chain->inits.create (n);
2360 for (i = 0; i < n; i++)
2361 chain->inits.quick_push (NULL_TREE);
2362
2363 /* If we have replaced some looparound phi nodes, use their initializers
2364 instead of creating our own. */
2365 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2366 {
2367 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2368 continue;
2369
2370 gcc_assert (laref->distance > 0);
2371 chain->inits[n - laref->distance]
2372 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2373 }
2374
2375 for (i = 0; i < n; i++)
2376 {
2377 gimple_seq stmts = NULL;
2378
2379 if (chain->inits[i] != NULL_TREE)
2380 continue;
2381
2382 init = ref_at_iteration (dr, (int) i - n, &stmts);
2383 if (!chain->all_always_accessed && tree_could_trap_p (init))
2384 {
2385 gimple_seq_discard (stmts);
2386 return false;
2387 }
2388
2389 if (stmts)
2390 gsi_insert_seq_on_edge_immediate (entry, stmts);
2391
2392 chain->inits[i] = init;
2393 }
2394
2395 return true;
2396 }
2397
2398 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2399 be used because the initializers might trap. */
2400
2401 static void
2402 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2403 {
2404 chain_p chain;
2405 unsigned i;
2406
2407 for (i = 0; i < chains.length (); )
2408 {
2409 chain = chains[i];
2410 if (prepare_initializers_chain (loop, chain))
2411 i++;
2412 else
2413 {
2414 release_chain (chain);
2415 chains.unordered_remove (i);
2416 }
2417 }
2418 }
2419
2420 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2421 unrolled. */
2422
2423 static bool
2424 tree_predictive_commoning_loop (struct loop *loop)
2425 {
2426 vec<data_reference_p> datarefs;
2427 vec<ddr_p> dependences;
2428 struct component *components;
2429 vec<chain_p> chains = vNULL;
2430 unsigned unroll_factor;
2431 struct tree_niter_desc desc;
2432 bool unroll = false;
2433 edge exit;
2434 bitmap tmp_vars;
2435
2436 if (dump_file && (dump_flags & TDF_DETAILS))
2437 fprintf (dump_file, "Processing loop %d\n", loop->num);
2438
2439 /* Find the data references and split them into components according to their
2440 dependence relations. */
2441 auto_vec<loop_p, 3> loop_nest;
2442 dependences.create (10);
2443 datarefs.create (10);
2444 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2445 &dependences))
2446 {
2447 if (dump_file && (dump_flags & TDF_DETAILS))
2448 fprintf (dump_file, "Cannot analyze data dependencies\n");
2449 free_data_refs (datarefs);
2450 free_dependence_relations (dependences);
2451 return false;
2452 }
2453
2454 if (dump_file && (dump_flags & TDF_DETAILS))
2455 dump_data_dependence_relations (dump_file, dependences);
2456
2457 components = split_data_refs_to_components (loop, datarefs, dependences);
2458 loop_nest.release ();
2459 free_dependence_relations (dependences);
2460 if (!components)
2461 {
2462 free_data_refs (datarefs);
2463 free_affine_expand_cache (&name_expansions);
2464 return false;
2465 }
2466
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 {
2469 fprintf (dump_file, "Initial state:\n\n");
2470 dump_components (dump_file, components);
2471 }
2472
2473 /* Find the suitable components and split them into chains. */
2474 components = filter_suitable_components (loop, components);
2475
2476 tmp_vars = BITMAP_ALLOC (NULL);
2477 looparound_phis = BITMAP_ALLOC (NULL);
2478 determine_roots (loop, components, &chains);
2479 release_components (components);
2480
2481 if (!chains.exists ())
2482 {
2483 if (dump_file && (dump_flags & TDF_DETAILS))
2484 fprintf (dump_file,
2485 "Predictive commoning failed: no suitable chains\n");
2486 goto end;
2487 }
2488 prepare_initializers (loop, chains);
2489
2490 /* Try to combine the chains that are always worked with together. */
2491 try_combine_chains (&chains);
2492
2493 if (dump_file && (dump_flags & TDF_DETAILS))
2494 {
2495 fprintf (dump_file, "Before commoning:\n\n");
2496 dump_chains (dump_file, chains);
2497 }
2498
2499 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2500 that its number of iterations is divisible by the factor. */
2501 unroll_factor = determine_unroll_factor (chains);
2502 scev_reset ();
2503 unroll = (unroll_factor > 1
2504 && can_unroll_loop_p (loop, unroll_factor, &desc));
2505 exit = single_dom_exit (loop);
2506
2507 /* Execute the predictive commoning transformations, and possibly unroll the
2508 loop. */
2509 if (unroll)
2510 {
2511 struct epcc_data dta;
2512
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2514 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2515
2516 dta.chains = chains;
2517 dta.tmp_vars = tmp_vars;
2518
2519 update_ssa (TODO_update_ssa_only_virtuals);
2520
2521 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2522 execute_pred_commoning_cbck is called may cause phi nodes to be
2523 reallocated, which is a problem since CHAINS may point to these
2524 statements. To fix this, we store the ssa names defined by the
2525 phi nodes here instead of the phi nodes themselves, and restore
2526 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2527 replace_phis_by_defined_names (chains);
2528
2529 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2530 execute_pred_commoning_cbck, &dta);
2531 eliminate_temp_copies (loop, tmp_vars);
2532 }
2533 else
2534 {
2535 if (dump_file && (dump_flags & TDF_DETAILS))
2536 fprintf (dump_file,
2537 "Executing predictive commoning without unrolling.\n");
2538 execute_pred_commoning (loop, chains, tmp_vars);
2539 }
2540
2541 end: ;
2542 release_chains (chains);
2543 free_data_refs (datarefs);
2544 BITMAP_FREE (tmp_vars);
2545 BITMAP_FREE (looparound_phis);
2546
2547 free_affine_expand_cache (&name_expansions);
2548
2549 return unroll;
2550 }
2551
2552 /* Runs predictive commoning. */
2553
2554 unsigned
2555 tree_predictive_commoning (void)
2556 {
2557 bool unrolled = false;
2558 struct loop *loop;
2559 unsigned ret = 0;
2560
2561 initialize_original_copy_tables ();
2562 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2563 if (optimize_loop_for_speed_p (loop))
2564 {
2565 unrolled |= tree_predictive_commoning_loop (loop);
2566 }
2567
2568 if (unrolled)
2569 {
2570 scev_reset ();
2571 ret = TODO_cleanup_cfg;
2572 }
2573 free_original_copy_tables ();
2574
2575 return ret;
2576 }
2577
2578 /* Predictive commoning Pass. */
2579
2580 static unsigned
2581 run_tree_predictive_commoning (struct function *fun)
2582 {
2583 if (number_of_loops (fun) <= 1)
2584 return 0;
2585
2586 return tree_predictive_commoning ();
2587 }
2588
2589 namespace {
2590
2591 const pass_data pass_data_predcom =
2592 {
2593 GIMPLE_PASS, /* type */
2594 "pcom", /* name */
2595 OPTGROUP_LOOP, /* optinfo_flags */
2596 TV_PREDCOM, /* tv_id */
2597 PROP_cfg, /* properties_required */
2598 0, /* properties_provided */
2599 0, /* properties_destroyed */
2600 0, /* todo_flags_start */
2601 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2602 };
2603
2604 class pass_predcom : public gimple_opt_pass
2605 {
2606 public:
2607 pass_predcom (gcc::context *ctxt)
2608 : gimple_opt_pass (pass_data_predcom, ctxt)
2609 {}
2610
2611 /* opt_pass methods: */
2612 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2613 virtual unsigned int execute (function *fun)
2614 {
2615 return run_tree_predictive_commoning (fun);
2616 }
2617
2618 }; // class pass_predcom
2619
2620 } // anon namespace
2621
2622 gimple_opt_pass *
2623 make_pass_predcom (gcc::context *ctxt)
2624 {
2625 return new pass_predcom (ctxt);
2626 }
2627
2628