PR c++/68795: fix uninitialized close_paren_loc in cp_parser_postfix_expression
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67
68
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 struct rdg_vertex
71 {
72 /* The statement represented by this vertex. */
73 gimple *stmt;
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
83 };
84
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94 /* Data dependence type. */
95
96 enum rdg_dep_type
97 {
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
103 };
104
105 /* Dependence information attached to an edge of the RDG. */
106
107 struct rdg_edge
108 {
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
111 };
112
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
114
115 /* Dump vertex I in RDG to FILE. */
116
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119 {
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140 }
141
142 /* Call dump_rdg_vertex on stderr. */
143
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
146 {
147 dump_rdg_vertex (stderr, rdg, i);
148 }
149
150 /* Dump the reduced dependence graph RDG to FILE. */
151
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
154 {
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
159 }
160
161 /* Call dump_rdg on stderr. */
162
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
165 {
166 dump_rdg (stderr, rdg);
167 }
168
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
171 {
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216 }
217
218 /* Display the Reduced Dependence Graph using dotty. */
219
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
222 {
223 /* When debugging, you may want to enable the following code. */
224 #ifdef HAVE_POPEN
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
235 }
236
237 /* Returns the index of STMT in RDG. */
238
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
241 {
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245 }
246
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252 {
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
267 }
268 }
269
270 /* Creates an edge for the control dependences of BB to the vertex V. */
271
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275 {
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
281 basic_block cond_bb = cd->get_edge (edge_n)->src;
282 gimple *stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
293 }
294 }
295 }
296
297 /* Creates the edges of the reduced dependence graph RDG. */
298
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
301 {
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 }
311
312 /* Creates the edges of the reduced dependence graph RDG. */
313
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
316 {
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
321 gimple *stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 create_edge_for_control_dependence (rdg, e->src, i, cd);
328 }
329 else
330 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
331 }
332 }
333
334 /* Build the vertices of the reduced dependence graph RDG. Return false
335 if that failed. */
336
337 static bool
338 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
339 vec<data_reference_p> *datarefs)
340 {
341 int i;
342 gimple *stmt;
343
344 FOR_EACH_VEC_ELT (stmts, i, stmt)
345 {
346 struct vertex *v = &(rdg->vertices[i]);
347
348 /* Record statement to vertex mapping. */
349 gimple_set_uid (stmt, i);
350
351 v->data = XNEW (struct rdg_vertex);
352 RDGV_STMT (v) = stmt;
353 RDGV_DATAREFS (v).create (0);
354 RDGV_HAS_MEM_WRITE (v) = false;
355 RDGV_HAS_MEM_READS (v) = false;
356 if (gimple_code (stmt) == GIMPLE_PHI)
357 continue;
358
359 unsigned drp = datarefs->length ();
360 if (!find_data_references_in_stmt (loop, stmt, datarefs))
361 return false;
362 for (unsigned j = drp; j < datarefs->length (); ++j)
363 {
364 data_reference_p dr = (*datarefs)[j];
365 if (DR_IS_READ (dr))
366 RDGV_HAS_MEM_READS (v) = true;
367 else
368 RDGV_HAS_MEM_WRITE (v) = true;
369 RDGV_DATAREFS (v).safe_push (dr);
370 }
371 }
372 return true;
373 }
374
375 /* Initialize STMTS with all the statements of LOOP. The order in
376 which we discover statements is important as
377 generate_loops_for_partition is using the same traversal for
378 identifying statements in loop copies. */
379
380 static void
381 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
382 {
383 unsigned int i;
384 basic_block *bbs = get_loop_body_in_dom_order (loop);
385
386 for (i = 0; i < loop->num_nodes; i++)
387 {
388 basic_block bb = bbs[i];
389
390 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
391 gsi_next (&bsi))
392 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
393 stmts->safe_push (bsi.phi ());
394
395 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
396 gsi_next (&bsi))
397 {
398 gimple *stmt = gsi_stmt (bsi);
399 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
400 stmts->safe_push (stmt);
401 }
402 }
403
404 free (bbs);
405 }
406
407 /* Free the reduced dependence graph RDG. */
408
409 static void
410 free_rdg (struct graph *rdg)
411 {
412 int i;
413
414 for (i = 0; i < rdg->n_vertices; i++)
415 {
416 struct vertex *v = &(rdg->vertices[i]);
417 struct graph_edge *e;
418
419 for (e = v->succ; e; e = e->succ_next)
420 free (e->data);
421
422 if (v->data)
423 {
424 gimple_set_uid (RDGV_STMT (v), -1);
425 free_data_refs (RDGV_DATAREFS (v));
426 free (v->data);
427 }
428 }
429
430 free_graph (rdg);
431 }
432
433 /* Build the Reduced Dependence Graph (RDG) with one vertex per
434 statement of the loop nest LOOP_NEST, and one edge per data dependence or
435 scalar dependence. */
436
437 static struct graph *
438 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
439 {
440 struct graph *rdg;
441 vec<data_reference_p> datarefs;
442
443 /* Create the RDG vertices from the stmts of the loop nest. */
444 auto_vec<gimple *, 10> stmts;
445 stmts_from_loop (loop_nest[0], &stmts);
446 rdg = new_graph (stmts.length ());
447 datarefs.create (10);
448 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
449 {
450 datarefs.release ();
451 free_rdg (rdg);
452 return NULL;
453 }
454 stmts.release ();
455
456 create_rdg_flow_edges (rdg);
457 if (cd)
458 create_rdg_cd_edges (rdg, cd);
459
460 datarefs.release ();
461
462 return rdg;
463 }
464
465
466
467 enum partition_kind {
468 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
469 };
470
471 struct partition
472 {
473 bitmap stmts;
474 bitmap loops;
475 bool reduction_p;
476 enum partition_kind kind;
477 /* data-references a kind != PKIND_NORMAL partition is about. */
478 data_reference_p main_dr;
479 data_reference_p secondary_dr;
480 tree niter;
481 bool plus_one;
482 };
483
484
485 /* Allocate and initialize a partition from BITMAP. */
486
487 static partition *
488 partition_alloc (bitmap stmts, bitmap loops)
489 {
490 partition *partition = XCNEW (struct partition);
491 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
492 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
493 partition->reduction_p = false;
494 partition->kind = PKIND_NORMAL;
495 return partition;
496 }
497
498 /* Free PARTITION. */
499
500 static void
501 partition_free (partition *partition)
502 {
503 BITMAP_FREE (partition->stmts);
504 BITMAP_FREE (partition->loops);
505 free (partition);
506 }
507
508 /* Returns true if the partition can be generated as a builtin. */
509
510 static bool
511 partition_builtin_p (partition *partition)
512 {
513 return partition->kind != PKIND_NORMAL;
514 }
515
516 /* Returns true if the partition contains a reduction. */
517
518 static bool
519 partition_reduction_p (partition *partition)
520 {
521 return partition->reduction_p;
522 }
523
524 /* Merge PARTITION into the partition DEST. */
525
526 static void
527 partition_merge_into (partition *dest, partition *partition)
528 {
529 dest->kind = PKIND_NORMAL;
530 bitmap_ior_into (dest->stmts, partition->stmts);
531 if (partition_reduction_p (partition))
532 dest->reduction_p = true;
533 }
534
535
536 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
537 the LOOP. */
538
539 static bool
540 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
541 {
542 imm_use_iterator imm_iter;
543 use_operand_p use_p;
544
545 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
546 {
547 gimple *use_stmt = USE_STMT (use_p);
548 if (!is_gimple_debug (use_stmt)
549 && loop != loop_containing_stmt (use_stmt))
550 return true;
551 }
552
553 return false;
554 }
555
556 /* Returns true when STMT defines a scalar variable used after the
557 loop LOOP. */
558
559 static bool
560 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
561 {
562 def_operand_p def_p;
563 ssa_op_iter op_iter;
564
565 if (gimple_code (stmt) == GIMPLE_PHI)
566 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
567
568 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
569 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
570 return true;
571
572 return false;
573 }
574
575 /* Return a copy of LOOP placed before LOOP. */
576
577 static struct loop *
578 copy_loop_before (struct loop *loop)
579 {
580 struct loop *res;
581 edge preheader = loop_preheader_edge (loop);
582
583 initialize_original_copy_tables ();
584 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
585 gcc_assert (res != NULL);
586 free_original_copy_tables ();
587 delete_update_ssa ();
588
589 return res;
590 }
591
592 /* Creates an empty basic block after LOOP. */
593
594 static void
595 create_bb_after_loop (struct loop *loop)
596 {
597 edge exit = single_exit (loop);
598
599 if (!exit)
600 return;
601
602 split_edge (exit);
603 }
604
605 /* Generate code for PARTITION from the code in LOOP. The loop is
606 copied when COPY_P is true. All the statements not flagged in the
607 PARTITION bitmap are removed from the loop or from its copy. The
608 statements are indexed in sequence inside a basic block, and the
609 basic blocks of a loop are taken in dom order. */
610
611 static void
612 generate_loops_for_partition (struct loop *loop, partition *partition,
613 bool copy_p)
614 {
615 unsigned i;
616 basic_block *bbs;
617
618 if (copy_p)
619 {
620 loop = copy_loop_before (loop);
621 gcc_assert (loop != NULL);
622 create_preheader (loop, CP_SIMPLE_PREHEADERS);
623 create_bb_after_loop (loop);
624 }
625
626 /* Remove stmts not in the PARTITION bitmap. */
627 bbs = get_loop_body_in_dom_order (loop);
628
629 if (MAY_HAVE_DEBUG_STMTS)
630 for (i = 0; i < loop->num_nodes; i++)
631 {
632 basic_block bb = bbs[i];
633
634 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
635 gsi_next (&bsi))
636 {
637 gphi *phi = bsi.phi ();
638 if (!virtual_operand_p (gimple_phi_result (phi))
639 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
640 reset_debug_uses (phi);
641 }
642
643 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
644 {
645 gimple *stmt = gsi_stmt (bsi);
646 if (gimple_code (stmt) != GIMPLE_LABEL
647 && !is_gimple_debug (stmt)
648 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
649 reset_debug_uses (stmt);
650 }
651 }
652
653 for (i = 0; i < loop->num_nodes; i++)
654 {
655 basic_block bb = bbs[i];
656
657 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
658 {
659 gphi *phi = bsi.phi ();
660 if (!virtual_operand_p (gimple_phi_result (phi))
661 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
662 remove_phi_node (&bsi, true);
663 else
664 gsi_next (&bsi);
665 }
666
667 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
668 {
669 gimple *stmt = gsi_stmt (bsi);
670 if (gimple_code (stmt) != GIMPLE_LABEL
671 && !is_gimple_debug (stmt)
672 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
673 {
674 /* Choose an arbitrary path through the empty CFG part
675 that this unnecessary control stmt controls. */
676 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
677 {
678 gimple_cond_make_false (cond_stmt);
679 update_stmt (stmt);
680 }
681 else if (gimple_code (stmt) == GIMPLE_SWITCH)
682 {
683 gswitch *switch_stmt = as_a <gswitch *> (stmt);
684 gimple_switch_set_index
685 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
686 update_stmt (stmt);
687 }
688 else
689 {
690 unlink_stmt_vdef (stmt);
691 gsi_remove (&bsi, true);
692 release_defs (stmt);
693 continue;
694 }
695 }
696 gsi_next (&bsi);
697 }
698 }
699
700 free (bbs);
701 }
702
703 /* Build the size argument for a memory operation call. */
704
705 static tree
706 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
707 bool plus_one)
708 {
709 tree size = fold_convert_loc (loc, sizetype, nb_iter);
710 if (plus_one)
711 size = size_binop (PLUS_EXPR, size, size_one_node);
712 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
713 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
714 size = fold_convert_loc (loc, size_type_node, size);
715 return size;
716 }
717
718 /* Build an address argument for a memory operation call. */
719
720 static tree
721 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
722 {
723 tree addr_base;
724
725 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
726 addr_base = fold_convert_loc (loc, sizetype, addr_base);
727
728 /* Test for a negative stride, iterating over every element. */
729 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
730 {
731 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
732 fold_convert_loc (loc, sizetype, nb_bytes));
733 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
734 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
735 }
736
737 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
738 }
739
740 /* If VAL memory representation contains the same value in all bytes,
741 return that value, otherwise return -1.
742 E.g. for 0x24242424 return 0x24, for IEEE double
743 747708026454360457216.0 return 0x44, etc. */
744
745 static int
746 const_with_all_bytes_same (tree val)
747 {
748 unsigned char buf[64];
749 int i, len;
750
751 if (integer_zerop (val)
752 || real_zerop (val)
753 || (TREE_CODE (val) == CONSTRUCTOR
754 && !TREE_CLOBBER_P (val)
755 && CONSTRUCTOR_NELTS (val) == 0))
756 return 0;
757
758 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
759 return -1;
760
761 len = native_encode_expr (val, buf, sizeof (buf));
762 if (len == 0)
763 return -1;
764 for (i = 1; i < len; i++)
765 if (buf[i] != buf[0])
766 return -1;
767 return buf[0];
768 }
769
770 /* Generate a call to memset for PARTITION in LOOP. */
771
772 static void
773 generate_memset_builtin (struct loop *loop, partition *partition)
774 {
775 gimple_stmt_iterator gsi;
776 gimple *stmt, *fn_call;
777 tree mem, fn, nb_bytes;
778 location_t loc;
779 tree val;
780
781 stmt = DR_STMT (partition->main_dr);
782 loc = gimple_location (stmt);
783
784 /* The new statements will be placed before LOOP. */
785 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
786
787 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
788 partition->plus_one);
789 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
790 false, GSI_CONTINUE_LINKING);
791 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
792 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
793 false, GSI_CONTINUE_LINKING);
794
795 /* This exactly matches the pattern recognition in classify_partition. */
796 val = gimple_assign_rhs1 (stmt);
797 /* Handle constants like 0x15151515 and similarly
798 floating point constants etc. where all bytes are the same. */
799 int bytev = const_with_all_bytes_same (val);
800 if (bytev != -1)
801 val = build_int_cst (integer_type_node, bytev);
802 else if (TREE_CODE (val) == INTEGER_CST)
803 val = fold_convert (integer_type_node, val);
804 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
805 {
806 tree tem = make_ssa_name (integer_type_node);
807 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
808 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
809 val = tem;
810 }
811
812 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
813 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
814 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
815
816 if (dump_file && (dump_flags & TDF_DETAILS))
817 {
818 fprintf (dump_file, "generated memset");
819 if (bytev == 0)
820 fprintf (dump_file, " zero\n");
821 else
822 fprintf (dump_file, "\n");
823 }
824 }
825
826 /* Generate a call to memcpy for PARTITION in LOOP. */
827
828 static void
829 generate_memcpy_builtin (struct loop *loop, partition *partition)
830 {
831 gimple_stmt_iterator gsi;
832 gimple *stmt, *fn_call;
833 tree dest, src, fn, nb_bytes;
834 location_t loc;
835 enum built_in_function kind;
836
837 stmt = DR_STMT (partition->main_dr);
838 loc = gimple_location (stmt);
839
840 /* The new statements will be placed before LOOP. */
841 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
842
843 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
844 partition->plus_one);
845 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
846 false, GSI_CONTINUE_LINKING);
847 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
848 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
849 if (ptr_derefs_may_alias_p (dest, src))
850 kind = BUILT_IN_MEMMOVE;
851 else
852 kind = BUILT_IN_MEMCPY;
853
854 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
855 false, GSI_CONTINUE_LINKING);
856 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
857 false, GSI_CONTINUE_LINKING);
858 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
859 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
860 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
861
862 if (dump_file && (dump_flags & TDF_DETAILS))
863 {
864 if (kind == BUILT_IN_MEMCPY)
865 fprintf (dump_file, "generated memcpy\n");
866 else
867 fprintf (dump_file, "generated memmove\n");
868 }
869 }
870
871 /* Remove and destroy the loop LOOP. */
872
873 static void
874 destroy_loop (struct loop *loop)
875 {
876 unsigned nbbs = loop->num_nodes;
877 edge exit = single_exit (loop);
878 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
879 basic_block *bbs;
880 unsigned i;
881
882 bbs = get_loop_body_in_dom_order (loop);
883
884 redirect_edge_pred (exit, src);
885 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
886 exit->flags |= EDGE_FALLTHRU;
887 cancel_loop_tree (loop);
888 rescan_loop_exit (exit, false, true);
889
890 for (i = 0; i < nbbs; i++)
891 {
892 /* We have made sure to not leave any dangling uses of SSA
893 names defined in the loop. With the exception of virtuals.
894 Make sure we replace all uses of virtual defs that will remain
895 outside of the loop with the bare symbol as delete_basic_block
896 will release them. */
897 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
898 gsi_next (&gsi))
899 {
900 gphi *phi = gsi.phi ();
901 if (virtual_operand_p (gimple_phi_result (phi)))
902 mark_virtual_phi_result_for_renaming (phi);
903 }
904 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
905 gsi_next (&gsi))
906 {
907 gimple *stmt = gsi_stmt (gsi);
908 tree vdef = gimple_vdef (stmt);
909 if (vdef && TREE_CODE (vdef) == SSA_NAME)
910 mark_virtual_operand_for_renaming (vdef);
911 }
912 delete_basic_block (bbs[i]);
913 }
914 free (bbs);
915
916 set_immediate_dominator (CDI_DOMINATORS, dest,
917 recompute_dominator (CDI_DOMINATORS, dest));
918 }
919
920 /* Generates code for PARTITION. */
921
922 static void
923 generate_code_for_partition (struct loop *loop,
924 partition *partition, bool copy_p)
925 {
926 switch (partition->kind)
927 {
928 case PKIND_NORMAL:
929 /* Reductions all have to be in the last partition. */
930 gcc_assert (!partition_reduction_p (partition)
931 || !copy_p);
932 generate_loops_for_partition (loop, partition, copy_p);
933 return;
934
935 case PKIND_MEMSET:
936 generate_memset_builtin (loop, partition);
937 break;
938
939 case PKIND_MEMCPY:
940 generate_memcpy_builtin (loop, partition);
941 break;
942
943 default:
944 gcc_unreachable ();
945 }
946
947 /* Common tail for partitions we turn into a call. If this was the last
948 partition for which we generate code, we have to destroy the loop. */
949 if (!copy_p)
950 destroy_loop (loop);
951 }
952
953
954 /* Returns a partition with all the statements needed for computing
955 the vertex V of the RDG, also including the loop exit conditions. */
956
957 static partition *
958 build_rdg_partition_for_vertex (struct graph *rdg, int v)
959 {
960 partition *partition = partition_alloc (NULL, NULL);
961 auto_vec<int, 3> nodes;
962 unsigned i;
963 int x;
964
965 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
966
967 FOR_EACH_VEC_ELT (nodes, i, x)
968 {
969 bitmap_set_bit (partition->stmts, x);
970 bitmap_set_bit (partition->loops,
971 loop_containing_stmt (RDG_STMT (rdg, x))->num);
972 }
973
974 return partition;
975 }
976
977 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
978 For the moment we detect only the memset zero pattern. */
979
980 static void
981 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
982 {
983 bitmap_iterator bi;
984 unsigned i;
985 tree nb_iter;
986 data_reference_p single_load, single_store;
987 bool volatiles_p = false;
988 bool plus_one = false;
989
990 partition->kind = PKIND_NORMAL;
991 partition->main_dr = NULL;
992 partition->secondary_dr = NULL;
993 partition->niter = NULL_TREE;
994 partition->plus_one = false;
995
996 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
997 {
998 gimple *stmt = RDG_STMT (rdg, i);
999
1000 if (gimple_has_volatile_ops (stmt))
1001 volatiles_p = true;
1002
1003 /* If the stmt has uses outside of the loop mark it as reduction. */
1004 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1005 {
1006 partition->reduction_p = true;
1007 return;
1008 }
1009 }
1010
1011 /* Perform general partition disqualification for builtins. */
1012 if (volatiles_p
1013 || !flag_tree_loop_distribute_patterns)
1014 return;
1015
1016 /* Detect memset and memcpy. */
1017 single_load = NULL;
1018 single_store = NULL;
1019 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1020 {
1021 gimple *stmt = RDG_STMT (rdg, i);
1022 data_reference_p dr;
1023 unsigned j;
1024
1025 if (gimple_code (stmt) == GIMPLE_PHI)
1026 continue;
1027
1028 /* Any scalar stmts are ok. */
1029 if (!gimple_vuse (stmt))
1030 continue;
1031
1032 /* Otherwise just regular loads/stores. */
1033 if (!gimple_assign_single_p (stmt))
1034 return;
1035
1036 /* But exactly one store and/or load. */
1037 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1038 {
1039 if (DR_IS_READ (dr))
1040 {
1041 if (single_load != NULL)
1042 return;
1043 single_load = dr;
1044 }
1045 else
1046 {
1047 if (single_store != NULL)
1048 return;
1049 single_store = dr;
1050 }
1051 }
1052 }
1053
1054 if (!single_store)
1055 return;
1056
1057 nb_iter = number_of_latch_executions (loop);
1058 if (!nb_iter || nb_iter == chrec_dont_know)
1059 return;
1060 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1061 gimple_bb (DR_STMT (single_store))))
1062 plus_one = true;
1063
1064 if (single_store && !single_load)
1065 {
1066 gimple *stmt = DR_STMT (single_store);
1067 tree rhs = gimple_assign_rhs1 (stmt);
1068 if (const_with_all_bytes_same (rhs) == -1
1069 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1070 || (TYPE_MODE (TREE_TYPE (rhs))
1071 != TYPE_MODE (unsigned_char_type_node))))
1072 return;
1073 if (TREE_CODE (rhs) == SSA_NAME
1074 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1075 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1076 return;
1077 if (!adjacent_dr_p (single_store)
1078 || !dominated_by_p (CDI_DOMINATORS,
1079 loop->latch, gimple_bb (stmt)))
1080 return;
1081 partition->kind = PKIND_MEMSET;
1082 partition->main_dr = single_store;
1083 partition->niter = nb_iter;
1084 partition->plus_one = plus_one;
1085 }
1086 else if (single_store && single_load)
1087 {
1088 gimple *store = DR_STMT (single_store);
1089 gimple *load = DR_STMT (single_load);
1090 /* Direct aggregate copy or via an SSA name temporary. */
1091 if (load != store
1092 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1093 return;
1094 if (!adjacent_dr_p (single_store)
1095 || !adjacent_dr_p (single_load)
1096 || !operand_equal_p (DR_STEP (single_store),
1097 DR_STEP (single_load), 0)
1098 || !dominated_by_p (CDI_DOMINATORS,
1099 loop->latch, gimple_bb (store)))
1100 return;
1101 /* Now check that if there is a dependence this dependence is
1102 of a suitable form for memmove. */
1103 vec<loop_p> loops = vNULL;
1104 ddr_p ddr;
1105 loops.safe_push (loop);
1106 ddr = initialize_data_dependence_relation (single_load, single_store,
1107 loops);
1108 compute_affine_dependence (ddr, loop);
1109 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1110 {
1111 free_dependence_relation (ddr);
1112 loops.release ();
1113 return;
1114 }
1115 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1116 {
1117 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1118 {
1119 free_dependence_relation (ddr);
1120 loops.release ();
1121 return;
1122 }
1123 lambda_vector dist_v;
1124 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1125 {
1126 int dist = dist_v[index_in_loop_nest (loop->num,
1127 DDR_LOOP_NEST (ddr))];
1128 if (dist > 0 && !DDR_REVERSED_P (ddr))
1129 {
1130 free_dependence_relation (ddr);
1131 loops.release ();
1132 return;
1133 }
1134 }
1135 }
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 partition->kind = PKIND_MEMCPY;
1139 partition->main_dr = single_store;
1140 partition->secondary_dr = single_load;
1141 partition->niter = nb_iter;
1142 partition->plus_one = plus_one;
1143 }
1144 }
1145
1146 /* For a data reference REF, return the declaration of its base
1147 address or NULL_TREE if the base is not determined. */
1148
1149 static tree
1150 ref_base_address (data_reference_p dr)
1151 {
1152 tree base_address = DR_BASE_ADDRESS (dr);
1153 if (base_address
1154 && TREE_CODE (base_address) == ADDR_EXPR)
1155 return TREE_OPERAND (base_address, 0);
1156
1157 return base_address;
1158 }
1159
1160 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1161 accesses in RDG. */
1162
1163 static bool
1164 similar_memory_accesses (struct graph *rdg, partition *partition1,
1165 partition *partition2)
1166 {
1167 unsigned i, j, k, l;
1168 bitmap_iterator bi, bj;
1169 data_reference_p ref1, ref2;
1170
1171 /* First check whether in the intersection of the two partitions are
1172 any loads or stores. Common loads are the situation that happens
1173 most often. */
1174 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1175 if (RDG_MEM_WRITE_STMT (rdg, i)
1176 || RDG_MEM_READS_STMT (rdg, i))
1177 return true;
1178
1179 /* Then check all data-references against each other. */
1180 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1181 if (RDG_MEM_WRITE_STMT (rdg, i)
1182 || RDG_MEM_READS_STMT (rdg, i))
1183 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1184 if (RDG_MEM_WRITE_STMT (rdg, j)
1185 || RDG_MEM_READS_STMT (rdg, j))
1186 {
1187 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1188 {
1189 tree base1 = ref_base_address (ref1);
1190 if (base1)
1191 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1192 if (base1 == ref_base_address (ref2))
1193 return true;
1194 }
1195 }
1196
1197 return false;
1198 }
1199
1200 /* Aggregate several components into a useful partition that is
1201 registered in the PARTITIONS vector. Partitions will be
1202 distributed in different loops. */
1203
1204 static void
1205 rdg_build_partitions (struct graph *rdg,
1206 vec<gimple *> starting_stmts,
1207 vec<partition *> *partitions)
1208 {
1209 bitmap processed = BITMAP_ALLOC (NULL);
1210 int i;
1211 gimple *stmt;
1212
1213 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1214 {
1215 int v = rdg_vertex_for_stmt (rdg, stmt);
1216
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 fprintf (dump_file,
1219 "ldist asked to generate code for vertex %d\n", v);
1220
1221 /* If the vertex is already contained in another partition so
1222 is the partition rooted at it. */
1223 if (bitmap_bit_p (processed, v))
1224 continue;
1225
1226 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1227 bitmap_ior_into (processed, partition->stmts);
1228
1229 if (dump_file && (dump_flags & TDF_DETAILS))
1230 {
1231 fprintf (dump_file, "ldist useful partition:\n");
1232 dump_bitmap (dump_file, partition->stmts);
1233 }
1234
1235 partitions->safe_push (partition);
1236 }
1237
1238 /* All vertices should have been assigned to at least one partition now,
1239 other than vertices belonging to dead code. */
1240
1241 BITMAP_FREE (processed);
1242 }
1243
1244 /* Dump to FILE the PARTITIONS. */
1245
1246 static void
1247 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1248 {
1249 int i;
1250 partition *partition;
1251
1252 FOR_EACH_VEC_ELT (partitions, i, partition)
1253 debug_bitmap_file (file, partition->stmts);
1254 }
1255
1256 /* Debug PARTITIONS. */
1257 extern void debug_rdg_partitions (vec<partition *> );
1258
1259 DEBUG_FUNCTION void
1260 debug_rdg_partitions (vec<partition *> partitions)
1261 {
1262 dump_rdg_partitions (stderr, partitions);
1263 }
1264
1265 /* Returns the number of read and write operations in the RDG. */
1266
1267 static int
1268 number_of_rw_in_rdg (struct graph *rdg)
1269 {
1270 int i, res = 0;
1271
1272 for (i = 0; i < rdg->n_vertices; i++)
1273 {
1274 if (RDG_MEM_WRITE_STMT (rdg, i))
1275 ++res;
1276
1277 if (RDG_MEM_READS_STMT (rdg, i))
1278 ++res;
1279 }
1280
1281 return res;
1282 }
1283
1284 /* Returns the number of read and write operations in a PARTITION of
1285 the RDG. */
1286
1287 static int
1288 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1289 {
1290 int res = 0;
1291 unsigned i;
1292 bitmap_iterator ii;
1293
1294 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1295 {
1296 if (RDG_MEM_WRITE_STMT (rdg, i))
1297 ++res;
1298
1299 if (RDG_MEM_READS_STMT (rdg, i))
1300 ++res;
1301 }
1302
1303 return res;
1304 }
1305
1306 /* Returns true when one of the PARTITIONS contains all the read or
1307 write operations of RDG. */
1308
1309 static bool
1310 partition_contains_all_rw (struct graph *rdg,
1311 vec<partition *> partitions)
1312 {
1313 int i;
1314 partition *partition;
1315 int nrw = number_of_rw_in_rdg (rdg);
1316
1317 FOR_EACH_VEC_ELT (partitions, i, partition)
1318 if (nrw == number_of_rw_in_partition (rdg, partition))
1319 return true;
1320
1321 return false;
1322 }
1323
1324 /* Compute partition dependence created by the data references in DRS1
1325 and DRS2 and modify and return DIR according to that. */
1326
1327 static int
1328 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1329 vec<data_reference_p> drs1,
1330 vec<data_reference_p> drs2)
1331 {
1332 data_reference_p dr1, dr2;
1333
1334 /* dependence direction - 0 is no dependence, -1 is back,
1335 1 is forth, 2 is both (we can stop then, merging will occur). */
1336 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1337 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1338 {
1339 data_reference_p saved_dr1 = dr1;
1340 int this_dir = 1;
1341 ddr_p ddr;
1342 /* Re-shuffle data-refs to be in dominator order. */
1343 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1344 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1345 {
1346 std::swap (dr1, dr2);
1347 this_dir = -this_dir;
1348 }
1349 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1350 compute_affine_dependence (ddr, loops[0]);
1351 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1352 this_dir = 2;
1353 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1354 {
1355 if (DDR_REVERSED_P (ddr))
1356 {
1357 std::swap (dr1, dr2);
1358 this_dir = -this_dir;
1359 }
1360 /* Known dependences can still be unordered througout the
1361 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1362 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1363 this_dir = 2;
1364 /* If the overlap is exact preserve stmt order. */
1365 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1366 ;
1367 else
1368 {
1369 /* Else as the distance vector is lexicographic positive
1370 swap the dependence direction. */
1371 this_dir = -this_dir;
1372 }
1373 }
1374 else
1375 this_dir = 0;
1376 free_dependence_relation (ddr);
1377 if (dir == 0)
1378 dir = this_dir;
1379 else if (dir != this_dir)
1380 return 2;
1381 /* Shuffle "back" dr1. */
1382 dr1 = saved_dr1;
1383 }
1384 return dir;
1385 }
1386
1387 /* Compare postorder number of the partition graph vertices V1 and V2. */
1388
1389 static int
1390 pgcmp (const void *v1_, const void *v2_)
1391 {
1392 const vertex *v1 = (const vertex *)v1_;
1393 const vertex *v2 = (const vertex *)v2_;
1394 return v2->post - v1->post;
1395 }
1396
1397 /* Distributes the code from LOOP in such a way that producer
1398 statements are placed before consumer statements. Tries to separate
1399 only the statements from STMTS into separate loops.
1400 Returns the number of distributed loops. */
1401
1402 static int
1403 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1404 control_dependences *cd, int *nb_calls)
1405 {
1406 struct graph *rdg;
1407 partition *partition;
1408 bool any_builtin;
1409 int i, nbp;
1410 graph *pg = NULL;
1411 int num_sccs = 1;
1412
1413 *nb_calls = 0;
1414 auto_vec<loop_p, 3> loop_nest;
1415 if (!find_loop_nest (loop, &loop_nest))
1416 return 0;
1417
1418 rdg = build_rdg (loop_nest, cd);
1419 if (!rdg)
1420 {
1421 if (dump_file && (dump_flags & TDF_DETAILS))
1422 fprintf (dump_file,
1423 "Loop %d not distributed: failed to build the RDG.\n",
1424 loop->num);
1425
1426 return 0;
1427 }
1428
1429 if (dump_file && (dump_flags & TDF_DETAILS))
1430 dump_rdg (dump_file, rdg);
1431
1432 auto_vec<struct partition *, 3> partitions;
1433 rdg_build_partitions (rdg, stmts, &partitions);
1434
1435 any_builtin = false;
1436 FOR_EACH_VEC_ELT (partitions, i, partition)
1437 {
1438 classify_partition (loop, rdg, partition);
1439 any_builtin |= partition_builtin_p (partition);
1440 }
1441
1442 /* If we are only distributing patterns but did not detect any,
1443 simply bail out. */
1444 if (!flag_tree_loop_distribution
1445 && !any_builtin)
1446 {
1447 nbp = 0;
1448 goto ldist_done;
1449 }
1450
1451 /* If we are only distributing patterns fuse all partitions that
1452 were not classified as builtins. This also avoids chopping
1453 a loop into pieces, separated by builtin calls. That is, we
1454 only want no or a single loop body remaining. */
1455 struct partition *into;
1456 if (!flag_tree_loop_distribution)
1457 {
1458 for (i = 0; partitions.iterate (i, &into); ++i)
1459 if (!partition_builtin_p (into))
1460 break;
1461 for (++i; partitions.iterate (i, &partition); ++i)
1462 if (!partition_builtin_p (partition))
1463 {
1464 if (dump_file && (dump_flags & TDF_DETAILS))
1465 {
1466 fprintf (dump_file, "fusing non-builtin partitions\n");
1467 dump_bitmap (dump_file, into->stmts);
1468 dump_bitmap (dump_file, partition->stmts);
1469 }
1470 partition_merge_into (into, partition);
1471 partitions.unordered_remove (i);
1472 partition_free (partition);
1473 i--;
1474 }
1475 }
1476
1477 /* Due to limitations in the transform phase we have to fuse all
1478 reduction partitions into the last partition so the existing
1479 loop will contain all loop-closed PHI nodes. */
1480 for (i = 0; partitions.iterate (i, &into); ++i)
1481 if (partition_reduction_p (into))
1482 break;
1483 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1484 if (partition_reduction_p (partition))
1485 {
1486 if (dump_file && (dump_flags & TDF_DETAILS))
1487 {
1488 fprintf (dump_file, "fusing partitions\n");
1489 dump_bitmap (dump_file, into->stmts);
1490 dump_bitmap (dump_file, partition->stmts);
1491 fprintf (dump_file, "because they have reductions\n");
1492 }
1493 partition_merge_into (into, partition);
1494 partitions.unordered_remove (i);
1495 partition_free (partition);
1496 i--;
1497 }
1498
1499 /* Apply our simple cost model - fuse partitions with similar
1500 memory accesses. */
1501 for (i = 0; partitions.iterate (i, &into); ++i)
1502 {
1503 if (partition_builtin_p (into))
1504 continue;
1505 for (int j = i + 1;
1506 partitions.iterate (j, &partition); ++j)
1507 {
1508 if (!partition_builtin_p (partition)
1509 && similar_memory_accesses (rdg, into, partition))
1510 {
1511 if (dump_file && (dump_flags & TDF_DETAILS))
1512 {
1513 fprintf (dump_file, "fusing partitions\n");
1514 dump_bitmap (dump_file, into->stmts);
1515 dump_bitmap (dump_file, partition->stmts);
1516 fprintf (dump_file, "because they have similar "
1517 "memory accesses\n");
1518 }
1519 partition_merge_into (into, partition);
1520 partitions.unordered_remove (j);
1521 partition_free (partition);
1522 j--;
1523 }
1524 }
1525 }
1526
1527 /* Build the partition dependency graph. */
1528 if (partitions.length () > 1)
1529 {
1530 pg = new_graph (partitions.length ());
1531 struct pgdata {
1532 struct partition *partition;
1533 vec<data_reference_p> writes;
1534 vec<data_reference_p> reads;
1535 };
1536 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1537 for (i = 0; partitions.iterate (i, &partition); ++i)
1538 {
1539 vertex *v = &pg->vertices[i];
1540 pgdata *data = new pgdata;
1541 data_reference_p dr;
1542 /* FIXME - leaks. */
1543 v->data = data;
1544 bitmap_iterator bi;
1545 unsigned j;
1546 data->partition = partition;
1547 data->reads = vNULL;
1548 data->writes = vNULL;
1549 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1550 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1551 if (DR_IS_READ (dr))
1552 data->reads.safe_push (dr);
1553 else
1554 data->writes.safe_push (dr);
1555 }
1556 struct partition *partition1, *partition2;
1557 for (i = 0; partitions.iterate (i, &partition1); ++i)
1558 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1559 {
1560 /* dependence direction - 0 is no dependence, -1 is back,
1561 1 is forth, 2 is both (we can stop then, merging will occur). */
1562 int dir = 0;
1563 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1564 PGDATA(i)->writes,
1565 PGDATA(j)->reads);
1566 if (dir != 2)
1567 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1568 PGDATA(i)->reads,
1569 PGDATA(j)->writes);
1570 if (dir != 2)
1571 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1572 PGDATA(i)->writes,
1573 PGDATA(j)->writes);
1574 if (dir == 1 || dir == 2)
1575 add_edge (pg, i, j);
1576 if (dir == -1 || dir == 2)
1577 add_edge (pg, j, i);
1578 }
1579
1580 /* Add edges to the reduction partition (if any) to force it last. */
1581 unsigned j;
1582 for (j = 0; partitions.iterate (j, &partition); ++j)
1583 if (partition_reduction_p (partition))
1584 break;
1585 if (j < partitions.length ())
1586 {
1587 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1588 if (i != j)
1589 add_edge (pg, i, j);
1590 }
1591
1592 /* Compute partitions we cannot separate and fuse them. */
1593 num_sccs = graphds_scc (pg, NULL);
1594 for (i = 0; i < num_sccs; ++i)
1595 {
1596 struct partition *first;
1597 int j;
1598 for (j = 0; partitions.iterate (j, &first); ++j)
1599 if (pg->vertices[j].component == i)
1600 break;
1601 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1602 if (pg->vertices[j].component == i)
1603 {
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1605 {
1606 fprintf (dump_file, "fusing partitions\n");
1607 dump_bitmap (dump_file, first->stmts);
1608 dump_bitmap (dump_file, partition->stmts);
1609 fprintf (dump_file, "because they are in the same "
1610 "dependence SCC\n");
1611 }
1612 partition_merge_into (first, partition);
1613 partitions[j] = NULL;
1614 partition_free (partition);
1615 PGDATA (j)->partition = NULL;
1616 }
1617 }
1618
1619 /* Now order the remaining nodes in postorder. */
1620 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1621 partitions.truncate (0);
1622 for (i = 0; i < pg->n_vertices; ++i)
1623 {
1624 pgdata *data = PGDATA (i);
1625 if (data->partition)
1626 partitions.safe_push (data->partition);
1627 data->reads.release ();
1628 data->writes.release ();
1629 delete data;
1630 }
1631 gcc_assert (partitions.length () == (unsigned)num_sccs);
1632 free_graph (pg);
1633 }
1634
1635 nbp = partitions.length ();
1636 if (nbp == 0
1637 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1638 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1639 {
1640 nbp = 0;
1641 goto ldist_done;
1642 }
1643
1644 if (dump_file && (dump_flags & TDF_DETAILS))
1645 dump_rdg_partitions (dump_file, partitions);
1646
1647 FOR_EACH_VEC_ELT (partitions, i, partition)
1648 {
1649 if (partition_builtin_p (partition))
1650 (*nb_calls)++;
1651 generate_code_for_partition (loop, partition, i < nbp - 1);
1652 }
1653
1654 ldist_done:
1655
1656 FOR_EACH_VEC_ELT (partitions, i, partition)
1657 partition_free (partition);
1658
1659 free_rdg (rdg);
1660 return nbp - *nb_calls;
1661 }
1662
1663 /* Distribute all loops in the current function. */
1664
1665 namespace {
1666
1667 const pass_data pass_data_loop_distribution =
1668 {
1669 GIMPLE_PASS, /* type */
1670 "ldist", /* name */
1671 OPTGROUP_LOOP, /* optinfo_flags */
1672 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1673 ( PROP_cfg | PROP_ssa ), /* properties_required */
1674 0, /* properties_provided */
1675 0, /* properties_destroyed */
1676 0, /* todo_flags_start */
1677 0, /* todo_flags_finish */
1678 };
1679
1680 class pass_loop_distribution : public gimple_opt_pass
1681 {
1682 public:
1683 pass_loop_distribution (gcc::context *ctxt)
1684 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1685 {}
1686
1687 /* opt_pass methods: */
1688 virtual bool gate (function *)
1689 {
1690 return flag_tree_loop_distribution
1691 || flag_tree_loop_distribute_patterns;
1692 }
1693
1694 virtual unsigned int execute (function *);
1695
1696 }; // class pass_loop_distribution
1697
1698 unsigned int
1699 pass_loop_distribution::execute (function *fun)
1700 {
1701 struct loop *loop;
1702 bool changed = false;
1703 basic_block bb;
1704 control_dependences *cd = NULL;
1705
1706 FOR_ALL_BB_FN (bb, fun)
1707 {
1708 gimple_stmt_iterator gsi;
1709 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1710 gimple_set_uid (gsi_stmt (gsi), -1);
1711 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1712 gimple_set_uid (gsi_stmt (gsi), -1);
1713 }
1714
1715 /* We can at the moment only distribute non-nested loops, thus restrict
1716 walking to innermost loops. */
1717 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1718 {
1719 auto_vec<gimple *> work_list;
1720 basic_block *bbs;
1721 int num = loop->num;
1722 unsigned int i;
1723
1724 /* If the loop doesn't have a single exit we will fail anyway,
1725 so do that early. */
1726 if (!single_exit (loop))
1727 continue;
1728
1729 /* Only optimize hot loops. */
1730 if (!optimize_loop_for_speed_p (loop))
1731 continue;
1732
1733 /* Initialize the worklist with stmts we seed the partitions with. */
1734 bbs = get_loop_body_in_dom_order (loop);
1735 for (i = 0; i < loop->num_nodes; ++i)
1736 {
1737 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1738 !gsi_end_p (gsi);
1739 gsi_next (&gsi))
1740 {
1741 gphi *phi = gsi.phi ();
1742 if (virtual_operand_p (gimple_phi_result (phi)))
1743 continue;
1744 /* Distribute stmts which have defs that are used outside of
1745 the loop. */
1746 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1747 continue;
1748 work_list.safe_push (phi);
1749 }
1750 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1751 !gsi_end_p (gsi);
1752 gsi_next (&gsi))
1753 {
1754 gimple *stmt = gsi_stmt (gsi);
1755
1756 /* If there is a stmt with side-effects bail out - we
1757 cannot and should not distribute this loop. */
1758 if (gimple_has_side_effects (stmt))
1759 {
1760 work_list.truncate (0);
1761 goto out;
1762 }
1763
1764 /* Distribute stmts which have defs that are used outside of
1765 the loop. */
1766 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1767 ;
1768 /* Otherwise only distribute stores for now. */
1769 else if (!gimple_vdef (stmt))
1770 continue;
1771
1772 work_list.safe_push (stmt);
1773 }
1774 }
1775 out:
1776 free (bbs);
1777
1778 int nb_generated_loops = 0;
1779 int nb_generated_calls = 0;
1780 location_t loc = find_loop_location (loop);
1781 if (work_list.length () > 0)
1782 {
1783 if (!cd)
1784 {
1785 calculate_dominance_info (CDI_DOMINATORS);
1786 calculate_dominance_info (CDI_POST_DOMINATORS);
1787 cd = new control_dependences (create_edge_list ());
1788 free_dominance_info (CDI_POST_DOMINATORS);
1789 }
1790 nb_generated_loops = distribute_loop (loop, work_list, cd,
1791 &nb_generated_calls);
1792 }
1793
1794 if (nb_generated_loops + nb_generated_calls > 0)
1795 {
1796 changed = true;
1797 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1798 loc, "Loop %d distributed: split to %d loops "
1799 "and %d library calls.\n",
1800 num, nb_generated_loops, nb_generated_calls);
1801 }
1802 else if (dump_file && (dump_flags & TDF_DETAILS))
1803 fprintf (dump_file, "Loop %d is the same.\n", num);
1804 }
1805
1806 if (cd)
1807 delete cd;
1808
1809 if (changed)
1810 {
1811 /* Cached scalar evolutions now may refer to wrong or non-existing
1812 loops. */
1813 scev_reset_htab ();
1814 mark_virtual_operands_for_renaming (fun);
1815 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1816 }
1817
1818 checking_verify_loop_structure ();
1819
1820 return 0;
1821 }
1822
1823 } // anon namespace
1824
1825 gimple_opt_pass *
1826 make_pass_loop_distribution (gcc::context *ctxt)
1827 {
1828 return new pass_loop_distribution (ctxt);
1829 }