remove elim_graph typedef
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "backend.h"
48 #include "tree.h"
49 #include "gimple.h"
50 #include "cfghooks.h"
51 #include "tree-pass.h"
52 #include "ssa.h"
53 #include "gimple-pretty-print.h"
54 #include "fold-const.h"
55 #include "cfganal.h"
56 #include "gimple-iterator.h"
57 #include "gimplify-me.h"
58 #include "stor-layout.h"
59 #include "tree-cfg.h"
60 #include "tree-ssa-loop-manip.h"
61 #include "tree-ssa-loop.h"
62 #include "tree-into-ssa.h"
63 #include "tree-ssa.h"
64 #include "cfgloop.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-vectorizer.h"
67
68
69 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
70 struct rdg_vertex
71 {
72 /* The statement represented by this vertex. */
73 gimple *stmt;
74
75 /* Vector of data-references in this statement. */
76 vec<data_reference_p> datarefs;
77
78 /* True when the statement contains a write to memory. */
79 bool has_mem_write;
80
81 /* True when the statement contains a read from memory. */
82 bool has_mem_reads;
83 };
84
85 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
86 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
87 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
88 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
89 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
90 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
91 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
92 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
93
94 /* Data dependence type. */
95
96 enum rdg_dep_type
97 {
98 /* Read After Write (RAW). */
99 flow_dd = 'f',
100
101 /* Control dependence (execute conditional on). */
102 control_dd = 'c'
103 };
104
105 /* Dependence information attached to an edge of the RDG. */
106
107 struct rdg_edge
108 {
109 /* Type of the dependence. */
110 enum rdg_dep_type type;
111 };
112
113 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
114
115 /* Dump vertex I in RDG to FILE. */
116
117 static void
118 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
119 {
120 struct vertex *v = &(rdg->vertices[i]);
121 struct graph_edge *e;
122
123 fprintf (file, "(vertex %d: (%s%s) (in:", i,
124 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
125 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
126
127 if (v->pred)
128 for (e = v->pred; e; e = e->pred_next)
129 fprintf (file, " %d", e->src);
130
131 fprintf (file, ") (out:");
132
133 if (v->succ)
134 for (e = v->succ; e; e = e->succ_next)
135 fprintf (file, " %d", e->dest);
136
137 fprintf (file, ")\n");
138 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
139 fprintf (file, ")\n");
140 }
141
142 /* Call dump_rdg_vertex on stderr. */
143
144 DEBUG_FUNCTION void
145 debug_rdg_vertex (struct graph *rdg, int i)
146 {
147 dump_rdg_vertex (stderr, rdg, i);
148 }
149
150 /* Dump the reduced dependence graph RDG to FILE. */
151
152 static void
153 dump_rdg (FILE *file, struct graph *rdg)
154 {
155 fprintf (file, "(rdg\n");
156 for (int i = 0; i < rdg->n_vertices; i++)
157 dump_rdg_vertex (file, rdg, i);
158 fprintf (file, ")\n");
159 }
160
161 /* Call dump_rdg on stderr. */
162
163 DEBUG_FUNCTION void
164 debug_rdg (struct graph *rdg)
165 {
166 dump_rdg (stderr, rdg);
167 }
168
169 static void
170 dot_rdg_1 (FILE *file, struct graph *rdg)
171 {
172 int i;
173 pretty_printer buffer;
174 pp_needs_newline (&buffer) = false;
175 buffer.buffer->stream = file;
176
177 fprintf (file, "digraph RDG {\n");
178
179 for (i = 0; i < rdg->n_vertices; i++)
180 {
181 struct vertex *v = &(rdg->vertices[i]);
182 struct graph_edge *e;
183
184 fprintf (file, "%d [label=\"[%d] ", i, i);
185 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
186 pp_flush (&buffer);
187 fprintf (file, "\"]\n");
188
189 /* Highlight reads from memory. */
190 if (RDG_MEM_READS_STMT (rdg, i))
191 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
192
193 /* Highlight stores to memory. */
194 if (RDG_MEM_WRITE_STMT (rdg, i))
195 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
196
197 if (v->succ)
198 for (e = v->succ; e; e = e->succ_next)
199 switch (RDGE_TYPE (e))
200 {
201 case flow_dd:
202 /* These are the most common dependences: don't print these. */
203 fprintf (file, "%d -> %d \n", i, e->dest);
204 break;
205
206 case control_dd:
207 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
208 break;
209
210 default:
211 gcc_unreachable ();
212 }
213 }
214
215 fprintf (file, "}\n\n");
216 }
217
218 /* Display the Reduced Dependence Graph using dotty. */
219
220 DEBUG_FUNCTION void
221 dot_rdg (struct graph *rdg)
222 {
223 /* When debugging, you may want to enable the following code. */
224 #ifdef HAVE_POPEN
225 FILE *file = popen ("dot -Tx11", "w");
226 if (!file)
227 return;
228 dot_rdg_1 (file, rdg);
229 fflush (file);
230 close (fileno (file));
231 pclose (file);
232 #else
233 dot_rdg_1 (stderr, rdg);
234 #endif
235 }
236
237 /* Returns the index of STMT in RDG. */
238
239 static int
240 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
241 {
242 int index = gimple_uid (stmt);
243 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
244 return index;
245 }
246
247 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
248 the index of DEF in RDG. */
249
250 static void
251 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
252 {
253 use_operand_p imm_use_p;
254 imm_use_iterator iterator;
255
256 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
257 {
258 struct graph_edge *e;
259 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
260
261 if (use < 0)
262 continue;
263
264 e = add_edge (rdg, idef, use);
265 e->data = XNEW (struct rdg_edge);
266 RDGE_TYPE (e) = flow_dd;
267 }
268 }
269
270 /* Creates an edge for the control dependences of BB to the vertex V. */
271
272 static void
273 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
274 int v, control_dependences *cd)
275 {
276 bitmap_iterator bi;
277 unsigned edge_n;
278 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
279 0, edge_n, bi)
280 {
281 basic_block cond_bb = cd->get_edge_src (edge_n);
282 gimple *stmt = last_stmt (cond_bb);
283 if (stmt && is_ctrl_stmt (stmt))
284 {
285 struct graph_edge *e;
286 int c = rdg_vertex_for_stmt (rdg, stmt);
287 if (c < 0)
288 continue;
289
290 e = add_edge (rdg, c, v);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = control_dd;
293 }
294 }
295 }
296
297 /* Creates the edges of the reduced dependence graph RDG. */
298
299 static void
300 create_rdg_flow_edges (struct graph *rdg)
301 {
302 int i;
303 def_operand_p def_p;
304 ssa_op_iter iter;
305
306 for (i = 0; i < rdg->n_vertices; i++)
307 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
308 iter, SSA_OP_DEF)
309 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
310 }
311
312 /* Creates the edges of the reduced dependence graph RDG. */
313
314 static void
315 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
316 {
317 int i;
318
319 for (i = 0; i < rdg->n_vertices; i++)
320 {
321 gimple *stmt = RDG_STMT (rdg, i);
322 if (gimple_code (stmt) == GIMPLE_PHI)
323 {
324 edge_iterator ei;
325 edge e;
326 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
327 if (flow_bb_inside_loop_p (loop, e->src))
328 create_edge_for_control_dependence (rdg, e->src, i, cd);
329 }
330 else
331 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
332 }
333 }
334
335 /* Build the vertices of the reduced dependence graph RDG. Return false
336 if that failed. */
337
338 static bool
339 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop,
340 vec<data_reference_p> *datarefs)
341 {
342 int i;
343 gimple *stmt;
344
345 FOR_EACH_VEC_ELT (stmts, i, stmt)
346 {
347 struct vertex *v = &(rdg->vertices[i]);
348
349 /* Record statement to vertex mapping. */
350 gimple_set_uid (stmt, i);
351
352 v->data = XNEW (struct rdg_vertex);
353 RDGV_STMT (v) = stmt;
354 RDGV_DATAREFS (v).create (0);
355 RDGV_HAS_MEM_WRITE (v) = false;
356 RDGV_HAS_MEM_READS (v) = false;
357 if (gimple_code (stmt) == GIMPLE_PHI)
358 continue;
359
360 unsigned drp = datarefs->length ();
361 if (!find_data_references_in_stmt (loop, stmt, datarefs))
362 return false;
363 for (unsigned j = drp; j < datarefs->length (); ++j)
364 {
365 data_reference_p dr = (*datarefs)[j];
366 if (DR_IS_READ (dr))
367 RDGV_HAS_MEM_READS (v) = true;
368 else
369 RDGV_HAS_MEM_WRITE (v) = true;
370 RDGV_DATAREFS (v).safe_push (dr);
371 }
372 }
373 return true;
374 }
375
376 /* Initialize STMTS with all the statements of LOOP. The order in
377 which we discover statements is important as
378 generate_loops_for_partition is using the same traversal for
379 identifying statements in loop copies. */
380
381 static void
382 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
383 {
384 unsigned int i;
385 basic_block *bbs = get_loop_body_in_dom_order (loop);
386
387 for (i = 0; i < loop->num_nodes; i++)
388 {
389 basic_block bb = bbs[i];
390
391 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
392 gsi_next (&bsi))
393 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
394 stmts->safe_push (bsi.phi ());
395
396 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
397 gsi_next (&bsi))
398 {
399 gimple *stmt = gsi_stmt (bsi);
400 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
401 stmts->safe_push (stmt);
402 }
403 }
404
405 free (bbs);
406 }
407
408 /* Free the reduced dependence graph RDG. */
409
410 static void
411 free_rdg (struct graph *rdg)
412 {
413 int i;
414
415 for (i = 0; i < rdg->n_vertices; i++)
416 {
417 struct vertex *v = &(rdg->vertices[i]);
418 struct graph_edge *e;
419
420 for (e = v->succ; e; e = e->succ_next)
421 free (e->data);
422
423 if (v->data)
424 {
425 gimple_set_uid (RDGV_STMT (v), -1);
426 free_data_refs (RDGV_DATAREFS (v));
427 free (v->data);
428 }
429 }
430
431 free_graph (rdg);
432 }
433
434 /* Build the Reduced Dependence Graph (RDG) with one vertex per
435 statement of the loop nest LOOP_NEST, and one edge per data dependence or
436 scalar dependence. */
437
438 static struct graph *
439 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
440 {
441 struct graph *rdg;
442 vec<data_reference_p> datarefs;
443
444 /* Create the RDG vertices from the stmts of the loop nest. */
445 auto_vec<gimple *, 10> stmts;
446 stmts_from_loop (loop_nest[0], &stmts);
447 rdg = new_graph (stmts.length ());
448 datarefs.create (10);
449 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
450 {
451 datarefs.release ();
452 free_rdg (rdg);
453 return NULL;
454 }
455 stmts.release ();
456
457 create_rdg_flow_edges (rdg);
458 if (cd)
459 create_rdg_cd_edges (rdg, cd, loop_nest[0]);
460
461 datarefs.release ();
462
463 return rdg;
464 }
465
466
467
468 enum partition_kind {
469 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
470 };
471
472 struct partition
473 {
474 bitmap stmts;
475 bitmap loops;
476 bool reduction_p;
477 enum partition_kind kind;
478 /* data-references a kind != PKIND_NORMAL partition is about. */
479 data_reference_p main_dr;
480 data_reference_p secondary_dr;
481 tree niter;
482 bool plus_one;
483 };
484
485
486 /* Allocate and initialize a partition from BITMAP. */
487
488 static partition *
489 partition_alloc (bitmap stmts, bitmap loops)
490 {
491 partition *partition = XCNEW (struct partition);
492 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
493 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
494 partition->reduction_p = false;
495 partition->kind = PKIND_NORMAL;
496 return partition;
497 }
498
499 /* Free PARTITION. */
500
501 static void
502 partition_free (partition *partition)
503 {
504 BITMAP_FREE (partition->stmts);
505 BITMAP_FREE (partition->loops);
506 free (partition);
507 }
508
509 /* Returns true if the partition can be generated as a builtin. */
510
511 static bool
512 partition_builtin_p (partition *partition)
513 {
514 return partition->kind != PKIND_NORMAL;
515 }
516
517 /* Returns true if the partition contains a reduction. */
518
519 static bool
520 partition_reduction_p (partition *partition)
521 {
522 return partition->reduction_p;
523 }
524
525 /* Merge PARTITION into the partition DEST. */
526
527 static void
528 partition_merge_into (partition *dest, partition *partition)
529 {
530 dest->kind = PKIND_NORMAL;
531 bitmap_ior_into (dest->stmts, partition->stmts);
532 if (partition_reduction_p (partition))
533 dest->reduction_p = true;
534 }
535
536
537 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
538 the LOOP. */
539
540 static bool
541 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
542 {
543 imm_use_iterator imm_iter;
544 use_operand_p use_p;
545
546 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
547 {
548 gimple *use_stmt = USE_STMT (use_p);
549 if (!is_gimple_debug (use_stmt)
550 && loop != loop_containing_stmt (use_stmt))
551 return true;
552 }
553
554 return false;
555 }
556
557 /* Returns true when STMT defines a scalar variable used after the
558 loop LOOP. */
559
560 static bool
561 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
562 {
563 def_operand_p def_p;
564 ssa_op_iter op_iter;
565
566 if (gimple_code (stmt) == GIMPLE_PHI)
567 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
568
569 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
570 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
571 return true;
572
573 return false;
574 }
575
576 /* Return a copy of LOOP placed before LOOP. */
577
578 static struct loop *
579 copy_loop_before (struct loop *loop)
580 {
581 struct loop *res;
582 edge preheader = loop_preheader_edge (loop);
583
584 initialize_original_copy_tables ();
585 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
586 gcc_assert (res != NULL);
587 free_original_copy_tables ();
588 delete_update_ssa ();
589
590 return res;
591 }
592
593 /* Creates an empty basic block after LOOP. */
594
595 static void
596 create_bb_after_loop (struct loop *loop)
597 {
598 edge exit = single_exit (loop);
599
600 if (!exit)
601 return;
602
603 split_edge (exit);
604 }
605
606 /* Generate code for PARTITION from the code in LOOP. The loop is
607 copied when COPY_P is true. All the statements not flagged in the
608 PARTITION bitmap are removed from the loop or from its copy. The
609 statements are indexed in sequence inside a basic block, and the
610 basic blocks of a loop are taken in dom order. */
611
612 static void
613 generate_loops_for_partition (struct loop *loop, partition *partition,
614 bool copy_p)
615 {
616 unsigned i;
617 basic_block *bbs;
618
619 if (copy_p)
620 {
621 loop = copy_loop_before (loop);
622 gcc_assert (loop != NULL);
623 create_preheader (loop, CP_SIMPLE_PREHEADERS);
624 create_bb_after_loop (loop);
625 }
626
627 /* Remove stmts not in the PARTITION bitmap. */
628 bbs = get_loop_body_in_dom_order (loop);
629
630 if (MAY_HAVE_DEBUG_STMTS)
631 for (i = 0; i < loop->num_nodes; i++)
632 {
633 basic_block bb = bbs[i];
634
635 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
636 gsi_next (&bsi))
637 {
638 gphi *phi = bsi.phi ();
639 if (!virtual_operand_p (gimple_phi_result (phi))
640 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
641 reset_debug_uses (phi);
642 }
643
644 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
645 {
646 gimple *stmt = gsi_stmt (bsi);
647 if (gimple_code (stmt) != GIMPLE_LABEL
648 && !is_gimple_debug (stmt)
649 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
650 reset_debug_uses (stmt);
651 }
652 }
653
654 for (i = 0; i < loop->num_nodes; i++)
655 {
656 basic_block bb = bbs[i];
657
658 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
659 {
660 gphi *phi = bsi.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 remove_phi_node (&bsi, true);
664 else
665 gsi_next (&bsi);
666 }
667
668 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
669 {
670 gimple *stmt = gsi_stmt (bsi);
671 if (gimple_code (stmt) != GIMPLE_LABEL
672 && !is_gimple_debug (stmt)
673 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
674 {
675 /* Choose an arbitrary path through the empty CFG part
676 that this unnecessary control stmt controls. */
677 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
678 {
679 gimple_cond_make_false (cond_stmt);
680 update_stmt (stmt);
681 }
682 else if (gimple_code (stmt) == GIMPLE_SWITCH)
683 {
684 gswitch *switch_stmt = as_a <gswitch *> (stmt);
685 gimple_switch_set_index
686 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
687 update_stmt (stmt);
688 }
689 else
690 {
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
695 }
696 }
697 gsi_next (&bsi);
698 }
699 }
700
701 free (bbs);
702 }
703
704 /* Build the size argument for a memory operation call. */
705
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
709 {
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
717 }
718
719 /* Build an address argument for a memory operation call. */
720
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
723 {
724 tree addr_base;
725
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
728
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
731 {
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
736 }
737
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
739 }
740
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
745
746 static int
747 const_with_all_bytes_same (tree val)
748 {
749 unsigned char buf[64];
750 int i, len;
751
752 if (integer_zerop (val)
753 || real_zerop (val)
754 || (TREE_CODE (val) == CONSTRUCTOR
755 && !TREE_CLOBBER_P (val)
756 && CONSTRUCTOR_NELTS (val) == 0))
757 return 0;
758
759 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
760 return -1;
761
762 len = native_encode_expr (val, buf, sizeof (buf));
763 if (len == 0)
764 return -1;
765 for (i = 1; i < len; i++)
766 if (buf[i] != buf[0])
767 return -1;
768 return buf[0];
769 }
770
771 /* Generate a call to memset for PARTITION in LOOP. */
772
773 static void
774 generate_memset_builtin (struct loop *loop, partition *partition)
775 {
776 gimple_stmt_iterator gsi;
777 gimple *stmt, *fn_call;
778 tree mem, fn, nb_bytes;
779 location_t loc;
780 tree val;
781
782 stmt = DR_STMT (partition->main_dr);
783 loc = gimple_location (stmt);
784
785 /* The new statements will be placed before LOOP. */
786 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
787
788 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
789 partition->plus_one);
790 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
791 false, GSI_CONTINUE_LINKING);
792 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
793 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
794 false, GSI_CONTINUE_LINKING);
795
796 /* This exactly matches the pattern recognition in classify_partition. */
797 val = gimple_assign_rhs1 (stmt);
798 /* Handle constants like 0x15151515 and similarly
799 floating point constants etc. where all bytes are the same. */
800 int bytev = const_with_all_bytes_same (val);
801 if (bytev != -1)
802 val = build_int_cst (integer_type_node, bytev);
803 else if (TREE_CODE (val) == INTEGER_CST)
804 val = fold_convert (integer_type_node, val);
805 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
806 {
807 tree tem = make_ssa_name (integer_type_node);
808 gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val);
809 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
810 val = tem;
811 }
812
813 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
814 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
815 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
816
817 if (dump_file && (dump_flags & TDF_DETAILS))
818 {
819 fprintf (dump_file, "generated memset");
820 if (bytev == 0)
821 fprintf (dump_file, " zero\n");
822 else
823 fprintf (dump_file, "\n");
824 }
825 }
826
827 /* Generate a call to memcpy for PARTITION in LOOP. */
828
829 static void
830 generate_memcpy_builtin (struct loop *loop, partition *partition)
831 {
832 gimple_stmt_iterator gsi;
833 gimple *stmt, *fn_call;
834 tree dest, src, fn, nb_bytes;
835 location_t loc;
836 enum built_in_function kind;
837
838 stmt = DR_STMT (partition->main_dr);
839 loc = gimple_location (stmt);
840
841 /* The new statements will be placed before LOOP. */
842 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
843
844 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
845 partition->plus_one);
846 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
847 false, GSI_CONTINUE_LINKING);
848 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
849 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
850 if (ptr_derefs_may_alias_p (dest, src))
851 kind = BUILT_IN_MEMMOVE;
852 else
853 kind = BUILT_IN_MEMCPY;
854
855 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
856 false, GSI_CONTINUE_LINKING);
857 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
858 false, GSI_CONTINUE_LINKING);
859 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
860 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
861 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
862
863 if (dump_file && (dump_flags & TDF_DETAILS))
864 {
865 if (kind == BUILT_IN_MEMCPY)
866 fprintf (dump_file, "generated memcpy\n");
867 else
868 fprintf (dump_file, "generated memmove\n");
869 }
870 }
871
872 /* Remove and destroy the loop LOOP. */
873
874 static void
875 destroy_loop (struct loop *loop)
876 {
877 unsigned nbbs = loop->num_nodes;
878 edge exit = single_exit (loop);
879 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
880 basic_block *bbs;
881 unsigned i;
882
883 bbs = get_loop_body_in_dom_order (loop);
884
885 redirect_edge_pred (exit, src);
886 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
887 exit->flags |= EDGE_FALLTHRU;
888 cancel_loop_tree (loop);
889 rescan_loop_exit (exit, false, true);
890
891 i = nbbs;
892 do
893 {
894 /* We have made sure to not leave any dangling uses of SSA
895 names defined in the loop. With the exception of virtuals.
896 Make sure we replace all uses of virtual defs that will remain
897 outside of the loop with the bare symbol as delete_basic_block
898 will release them. */
899 --i;
900 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
901 gsi_next (&gsi))
902 {
903 gphi *phi = gsi.phi ();
904 if (virtual_operand_p (gimple_phi_result (phi)))
905 mark_virtual_phi_result_for_renaming (phi);
906 }
907 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
908 gsi_next (&gsi))
909 {
910 gimple *stmt = gsi_stmt (gsi);
911 tree vdef = gimple_vdef (stmt);
912 if (vdef && TREE_CODE (vdef) == SSA_NAME)
913 mark_virtual_operand_for_renaming (vdef);
914 }
915 delete_basic_block (bbs[i]);
916 }
917 while (i != 0);
918
919 free (bbs);
920
921 set_immediate_dominator (CDI_DOMINATORS, dest,
922 recompute_dominator (CDI_DOMINATORS, dest));
923 }
924
925 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
926
927 static bool
928 generate_code_for_partition (struct loop *loop,
929 partition *partition, bool copy_p)
930 {
931 switch (partition->kind)
932 {
933 case PKIND_NORMAL:
934 /* Reductions all have to be in the last partition. */
935 gcc_assert (!partition_reduction_p (partition)
936 || !copy_p);
937 generate_loops_for_partition (loop, partition, copy_p);
938 return false;
939
940 case PKIND_MEMSET:
941 generate_memset_builtin (loop, partition);
942 break;
943
944 case PKIND_MEMCPY:
945 generate_memcpy_builtin (loop, partition);
946 break;
947
948 default:
949 gcc_unreachable ();
950 }
951
952 /* Common tail for partitions we turn into a call. If this was the last
953 partition for which we generate code, we have to destroy the loop. */
954 if (!copy_p)
955 return true;
956 return false;
957 }
958
959
960 /* Returns a partition with all the statements needed for computing
961 the vertex V of the RDG, also including the loop exit conditions. */
962
963 static partition *
964 build_rdg_partition_for_vertex (struct graph *rdg, int v)
965 {
966 partition *partition = partition_alloc (NULL, NULL);
967 auto_vec<int, 3> nodes;
968 unsigned i;
969 int x;
970
971 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
972
973 FOR_EACH_VEC_ELT (nodes, i, x)
974 {
975 bitmap_set_bit (partition->stmts, x);
976 bitmap_set_bit (partition->loops,
977 loop_containing_stmt (RDG_STMT (rdg, x))->num);
978 }
979
980 return partition;
981 }
982
983 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
984 For the moment we detect only the memset zero pattern. */
985
986 static void
987 classify_partition (loop_p loop, struct graph *rdg, partition *partition)
988 {
989 bitmap_iterator bi;
990 unsigned i;
991 tree nb_iter;
992 data_reference_p single_load, single_store;
993 bool volatiles_p = false;
994 bool plus_one = false;
995
996 partition->kind = PKIND_NORMAL;
997 partition->main_dr = NULL;
998 partition->secondary_dr = NULL;
999 partition->niter = NULL_TREE;
1000 partition->plus_one = false;
1001
1002 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1003 {
1004 gimple *stmt = RDG_STMT (rdg, i);
1005
1006 if (gimple_has_volatile_ops (stmt))
1007 volatiles_p = true;
1008
1009 /* If the stmt has uses outside of the loop mark it as reduction. */
1010 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1011 {
1012 partition->reduction_p = true;
1013 return;
1014 }
1015 }
1016
1017 /* Perform general partition disqualification for builtins. */
1018 if (volatiles_p
1019 || !flag_tree_loop_distribute_patterns)
1020 return;
1021
1022 /* Detect memset and memcpy. */
1023 single_load = NULL;
1024 single_store = NULL;
1025 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1026 {
1027 gimple *stmt = RDG_STMT (rdg, i);
1028 data_reference_p dr;
1029 unsigned j;
1030
1031 if (gimple_code (stmt) == GIMPLE_PHI)
1032 continue;
1033
1034 /* Any scalar stmts are ok. */
1035 if (!gimple_vuse (stmt))
1036 continue;
1037
1038 /* Otherwise just regular loads/stores. */
1039 if (!gimple_assign_single_p (stmt))
1040 return;
1041
1042 /* But exactly one store and/or load. */
1043 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1044 {
1045 if (DR_IS_READ (dr))
1046 {
1047 if (single_load != NULL)
1048 return;
1049 single_load = dr;
1050 }
1051 else
1052 {
1053 if (single_store != NULL)
1054 return;
1055 single_store = dr;
1056 }
1057 }
1058 }
1059
1060 if (!single_store)
1061 return;
1062
1063 nb_iter = number_of_latch_executions (loop);
1064 if (!nb_iter || nb_iter == chrec_dont_know)
1065 return;
1066 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1067 gimple_bb (DR_STMT (single_store))))
1068 plus_one = true;
1069
1070 if (single_store && !single_load)
1071 {
1072 gimple *stmt = DR_STMT (single_store);
1073 tree rhs = gimple_assign_rhs1 (stmt);
1074 if (const_with_all_bytes_same (rhs) == -1
1075 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1076 || (TYPE_MODE (TREE_TYPE (rhs))
1077 != TYPE_MODE (unsigned_char_type_node))))
1078 return;
1079 if (TREE_CODE (rhs) == SSA_NAME
1080 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1081 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1082 return;
1083 if (!adjacent_dr_p (single_store)
1084 || !dominated_by_p (CDI_DOMINATORS,
1085 loop->latch, gimple_bb (stmt)))
1086 return;
1087 partition->kind = PKIND_MEMSET;
1088 partition->main_dr = single_store;
1089 partition->niter = nb_iter;
1090 partition->plus_one = plus_one;
1091 }
1092 else if (single_store && single_load)
1093 {
1094 gimple *store = DR_STMT (single_store);
1095 gimple *load = DR_STMT (single_load);
1096 /* Direct aggregate copy or via an SSA name temporary. */
1097 if (load != store
1098 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1099 return;
1100 if (!adjacent_dr_p (single_store)
1101 || !adjacent_dr_p (single_load)
1102 || !operand_equal_p (DR_STEP (single_store),
1103 DR_STEP (single_load), 0)
1104 || !dominated_by_p (CDI_DOMINATORS,
1105 loop->latch, gimple_bb (store)))
1106 return;
1107 /* Now check that if there is a dependence this dependence is
1108 of a suitable form for memmove. */
1109 vec<loop_p> loops = vNULL;
1110 ddr_p ddr;
1111 loops.safe_push (loop);
1112 ddr = initialize_data_dependence_relation (single_load, single_store,
1113 loops);
1114 compute_affine_dependence (ddr, loop);
1115 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1116 {
1117 free_dependence_relation (ddr);
1118 loops.release ();
1119 return;
1120 }
1121 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1122 {
1123 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1124 {
1125 free_dependence_relation (ddr);
1126 loops.release ();
1127 return;
1128 }
1129 lambda_vector dist_v;
1130 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1131 {
1132 int dist = dist_v[index_in_loop_nest (loop->num,
1133 DDR_LOOP_NEST (ddr))];
1134 if (dist > 0 && !DDR_REVERSED_P (ddr))
1135 {
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1139 }
1140 }
1141 }
1142 free_dependence_relation (ddr);
1143 loops.release ();
1144 partition->kind = PKIND_MEMCPY;
1145 partition->main_dr = single_store;
1146 partition->secondary_dr = single_load;
1147 partition->niter = nb_iter;
1148 partition->plus_one = plus_one;
1149 }
1150 }
1151
1152 /* For a data reference REF, return the declaration of its base
1153 address or NULL_TREE if the base is not determined. */
1154
1155 static tree
1156 ref_base_address (data_reference_p dr)
1157 {
1158 tree base_address = DR_BASE_ADDRESS (dr);
1159 if (base_address
1160 && TREE_CODE (base_address) == ADDR_EXPR)
1161 return TREE_OPERAND (base_address, 0);
1162
1163 return base_address;
1164 }
1165
1166 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1167 accesses in RDG. */
1168
1169 static bool
1170 similar_memory_accesses (struct graph *rdg, partition *partition1,
1171 partition *partition2)
1172 {
1173 unsigned i, j, k, l;
1174 bitmap_iterator bi, bj;
1175 data_reference_p ref1, ref2;
1176
1177 /* First check whether in the intersection of the two partitions are
1178 any loads or stores. Common loads are the situation that happens
1179 most often. */
1180 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1181 if (RDG_MEM_WRITE_STMT (rdg, i)
1182 || RDG_MEM_READS_STMT (rdg, i))
1183 return true;
1184
1185 /* Then check all data-references against each other. */
1186 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1187 if (RDG_MEM_WRITE_STMT (rdg, i)
1188 || RDG_MEM_READS_STMT (rdg, i))
1189 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1190 if (RDG_MEM_WRITE_STMT (rdg, j)
1191 || RDG_MEM_READS_STMT (rdg, j))
1192 {
1193 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1194 {
1195 tree base1 = ref_base_address (ref1);
1196 if (base1)
1197 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1198 if (base1 == ref_base_address (ref2))
1199 return true;
1200 }
1201 }
1202
1203 return false;
1204 }
1205
1206 /* Aggregate several components into a useful partition that is
1207 registered in the PARTITIONS vector. Partitions will be
1208 distributed in different loops. */
1209
1210 static void
1211 rdg_build_partitions (struct graph *rdg,
1212 vec<gimple *> starting_stmts,
1213 vec<partition *> *partitions)
1214 {
1215 bitmap processed = BITMAP_ALLOC (NULL);
1216 int i;
1217 gimple *stmt;
1218
1219 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1220 {
1221 int v = rdg_vertex_for_stmt (rdg, stmt);
1222
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1224 fprintf (dump_file,
1225 "ldist asked to generate code for vertex %d\n", v);
1226
1227 /* If the vertex is already contained in another partition so
1228 is the partition rooted at it. */
1229 if (bitmap_bit_p (processed, v))
1230 continue;
1231
1232 partition *partition = build_rdg_partition_for_vertex (rdg, v);
1233 bitmap_ior_into (processed, partition->stmts);
1234
1235 if (dump_file && (dump_flags & TDF_DETAILS))
1236 {
1237 fprintf (dump_file, "ldist useful partition:\n");
1238 dump_bitmap (dump_file, partition->stmts);
1239 }
1240
1241 partitions->safe_push (partition);
1242 }
1243
1244 /* All vertices should have been assigned to at least one partition now,
1245 other than vertices belonging to dead code. */
1246
1247 BITMAP_FREE (processed);
1248 }
1249
1250 /* Dump to FILE the PARTITIONS. */
1251
1252 static void
1253 dump_rdg_partitions (FILE *file, vec<partition *> partitions)
1254 {
1255 int i;
1256 partition *partition;
1257
1258 FOR_EACH_VEC_ELT (partitions, i, partition)
1259 debug_bitmap_file (file, partition->stmts);
1260 }
1261
1262 /* Debug PARTITIONS. */
1263 extern void debug_rdg_partitions (vec<partition *> );
1264
1265 DEBUG_FUNCTION void
1266 debug_rdg_partitions (vec<partition *> partitions)
1267 {
1268 dump_rdg_partitions (stderr, partitions);
1269 }
1270
1271 /* Returns the number of read and write operations in the RDG. */
1272
1273 static int
1274 number_of_rw_in_rdg (struct graph *rdg)
1275 {
1276 int i, res = 0;
1277
1278 for (i = 0; i < rdg->n_vertices; i++)
1279 {
1280 if (RDG_MEM_WRITE_STMT (rdg, i))
1281 ++res;
1282
1283 if (RDG_MEM_READS_STMT (rdg, i))
1284 ++res;
1285 }
1286
1287 return res;
1288 }
1289
1290 /* Returns the number of read and write operations in a PARTITION of
1291 the RDG. */
1292
1293 static int
1294 number_of_rw_in_partition (struct graph *rdg, partition *partition)
1295 {
1296 int res = 0;
1297 unsigned i;
1298 bitmap_iterator ii;
1299
1300 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1301 {
1302 if (RDG_MEM_WRITE_STMT (rdg, i))
1303 ++res;
1304
1305 if (RDG_MEM_READS_STMT (rdg, i))
1306 ++res;
1307 }
1308
1309 return res;
1310 }
1311
1312 /* Returns true when one of the PARTITIONS contains all the read or
1313 write operations of RDG. */
1314
1315 static bool
1316 partition_contains_all_rw (struct graph *rdg,
1317 vec<partition *> partitions)
1318 {
1319 int i;
1320 partition *partition;
1321 int nrw = number_of_rw_in_rdg (rdg);
1322
1323 FOR_EACH_VEC_ELT (partitions, i, partition)
1324 if (nrw == number_of_rw_in_partition (rdg, partition))
1325 return true;
1326
1327 return false;
1328 }
1329
1330 /* Compute partition dependence created by the data references in DRS1
1331 and DRS2 and modify and return DIR according to that. */
1332
1333 static int
1334 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1335 vec<data_reference_p> drs1,
1336 vec<data_reference_p> drs2)
1337 {
1338 data_reference_p dr1, dr2;
1339
1340 /* dependence direction - 0 is no dependence, -1 is back,
1341 1 is forth, 2 is both (we can stop then, merging will occur). */
1342 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1343 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1344 {
1345 data_reference_p saved_dr1 = dr1;
1346 int this_dir = 1;
1347 ddr_p ddr;
1348 /* Re-shuffle data-refs to be in dominator order. */
1349 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1350 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1351 {
1352 std::swap (dr1, dr2);
1353 this_dir = -this_dir;
1354 }
1355 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1356 compute_affine_dependence (ddr, loops[0]);
1357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1358 this_dir = 2;
1359 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1360 {
1361 if (DDR_REVERSED_P (ddr))
1362 {
1363 std::swap (dr1, dr2);
1364 this_dir = -this_dir;
1365 }
1366 /* Known dependences can still be unordered througout the
1367 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1368 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1369 this_dir = 2;
1370 /* If the overlap is exact preserve stmt order. */
1371 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1372 ;
1373 else
1374 {
1375 /* Else as the distance vector is lexicographic positive
1376 swap the dependence direction. */
1377 this_dir = -this_dir;
1378 }
1379 }
1380 else
1381 this_dir = 0;
1382 free_dependence_relation (ddr);
1383 if (dir == 0)
1384 dir = this_dir;
1385 else if (dir != this_dir)
1386 return 2;
1387 /* Shuffle "back" dr1. */
1388 dr1 = saved_dr1;
1389 }
1390 return dir;
1391 }
1392
1393 /* Compare postorder number of the partition graph vertices V1 and V2. */
1394
1395 static int
1396 pgcmp (const void *v1_, const void *v2_)
1397 {
1398 const vertex *v1 = (const vertex *)v1_;
1399 const vertex *v2 = (const vertex *)v2_;
1400 return v2->post - v1->post;
1401 }
1402
1403 /* Distributes the code from LOOP in such a way that producer
1404 statements are placed before consumer statements. Tries to separate
1405 only the statements from STMTS into separate loops.
1406 Returns the number of distributed loops. Set *DESTROY_P to whether
1407 LOOP needs to be destroyed. */
1408
1409 static int
1410 distribute_loop (struct loop *loop, vec<gimple *> stmts,
1411 control_dependences *cd, int *nb_calls, bool *destroy_p)
1412 {
1413 struct graph *rdg;
1414 partition *partition;
1415 bool any_builtin;
1416 int i, nbp;
1417 graph *pg = NULL;
1418 int num_sccs = 1;
1419
1420 *destroy_p = false;
1421 *nb_calls = 0;
1422 auto_vec<loop_p, 3> loop_nest;
1423 if (!find_loop_nest (loop, &loop_nest))
1424 return 0;
1425
1426 rdg = build_rdg (loop_nest, cd);
1427 if (!rdg)
1428 {
1429 if (dump_file && (dump_flags & TDF_DETAILS))
1430 fprintf (dump_file,
1431 "Loop %d not distributed: failed to build the RDG.\n",
1432 loop->num);
1433
1434 return 0;
1435 }
1436
1437 if (dump_file && (dump_flags & TDF_DETAILS))
1438 dump_rdg (dump_file, rdg);
1439
1440 auto_vec<struct partition *, 3> partitions;
1441 rdg_build_partitions (rdg, stmts, &partitions);
1442
1443 any_builtin = false;
1444 FOR_EACH_VEC_ELT (partitions, i, partition)
1445 {
1446 classify_partition (loop, rdg, partition);
1447 any_builtin |= partition_builtin_p (partition);
1448 }
1449
1450 /* If we are only distributing patterns but did not detect any,
1451 simply bail out. */
1452 if (!flag_tree_loop_distribution
1453 && !any_builtin)
1454 {
1455 nbp = 0;
1456 goto ldist_done;
1457 }
1458
1459 /* If we are only distributing patterns fuse all partitions that
1460 were not classified as builtins. This also avoids chopping
1461 a loop into pieces, separated by builtin calls. That is, we
1462 only want no or a single loop body remaining. */
1463 struct partition *into;
1464 if (!flag_tree_loop_distribution)
1465 {
1466 for (i = 0; partitions.iterate (i, &into); ++i)
1467 if (!partition_builtin_p (into))
1468 break;
1469 for (++i; partitions.iterate (i, &partition); ++i)
1470 if (!partition_builtin_p (partition))
1471 {
1472 if (dump_file && (dump_flags & TDF_DETAILS))
1473 {
1474 fprintf (dump_file, "fusing non-builtin partitions\n");
1475 dump_bitmap (dump_file, into->stmts);
1476 dump_bitmap (dump_file, partition->stmts);
1477 }
1478 partition_merge_into (into, partition);
1479 partitions.unordered_remove (i);
1480 partition_free (partition);
1481 i--;
1482 }
1483 }
1484
1485 /* Due to limitations in the transform phase we have to fuse all
1486 reduction partitions into the last partition so the existing
1487 loop will contain all loop-closed PHI nodes. */
1488 for (i = 0; partitions.iterate (i, &into); ++i)
1489 if (partition_reduction_p (into))
1490 break;
1491 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1492 if (partition_reduction_p (partition))
1493 {
1494 if (dump_file && (dump_flags & TDF_DETAILS))
1495 {
1496 fprintf (dump_file, "fusing partitions\n");
1497 dump_bitmap (dump_file, into->stmts);
1498 dump_bitmap (dump_file, partition->stmts);
1499 fprintf (dump_file, "because they have reductions\n");
1500 }
1501 partition_merge_into (into, partition);
1502 partitions.unordered_remove (i);
1503 partition_free (partition);
1504 i--;
1505 }
1506
1507 /* Apply our simple cost model - fuse partitions with similar
1508 memory accesses. */
1509 for (i = 0; partitions.iterate (i, &into); ++i)
1510 {
1511 bool changed = false;
1512 if (partition_builtin_p (into))
1513 continue;
1514 for (int j = i + 1;
1515 partitions.iterate (j, &partition); ++j)
1516 {
1517 if (!partition_builtin_p (partition)
1518 && similar_memory_accesses (rdg, into, partition))
1519 {
1520 if (dump_file && (dump_flags & TDF_DETAILS))
1521 {
1522 fprintf (dump_file, "fusing partitions\n");
1523 dump_bitmap (dump_file, into->stmts);
1524 dump_bitmap (dump_file, partition->stmts);
1525 fprintf (dump_file, "because they have similar "
1526 "memory accesses\n");
1527 }
1528 partition_merge_into (into, partition);
1529 partitions.unordered_remove (j);
1530 partition_free (partition);
1531 j--;
1532 changed = true;
1533 }
1534 }
1535 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
1536 accesses when 1 and 2 have similar accesses but not 0 and 1
1537 then in the next iteration we will fail to consider merging
1538 1 into 0,2. So try again if we did any merging into 0. */
1539 if (changed)
1540 i--;
1541 }
1542
1543 /* Build the partition dependency graph. */
1544 if (partitions.length () > 1)
1545 {
1546 pg = new_graph (partitions.length ());
1547 struct pgdata {
1548 struct partition *partition;
1549 vec<data_reference_p> writes;
1550 vec<data_reference_p> reads;
1551 };
1552 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1553 for (i = 0; partitions.iterate (i, &partition); ++i)
1554 {
1555 vertex *v = &pg->vertices[i];
1556 pgdata *data = new pgdata;
1557 data_reference_p dr;
1558 /* FIXME - leaks. */
1559 v->data = data;
1560 bitmap_iterator bi;
1561 unsigned j;
1562 data->partition = partition;
1563 data->reads = vNULL;
1564 data->writes = vNULL;
1565 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1566 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1567 if (DR_IS_READ (dr))
1568 data->reads.safe_push (dr);
1569 else
1570 data->writes.safe_push (dr);
1571 }
1572 struct partition *partition1, *partition2;
1573 for (i = 0; partitions.iterate (i, &partition1); ++i)
1574 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1575 {
1576 /* dependence direction - 0 is no dependence, -1 is back,
1577 1 is forth, 2 is both (we can stop then, merging will occur). */
1578 int dir = 0;
1579 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1580 PGDATA(i)->writes,
1581 PGDATA(j)->reads);
1582 if (dir != 2)
1583 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1584 PGDATA(i)->reads,
1585 PGDATA(j)->writes);
1586 if (dir != 2)
1587 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1588 PGDATA(i)->writes,
1589 PGDATA(j)->writes);
1590 if (dir == 1 || dir == 2)
1591 add_edge (pg, i, j);
1592 if (dir == -1 || dir == 2)
1593 add_edge (pg, j, i);
1594 }
1595
1596 /* Add edges to the reduction partition (if any) to force it last. */
1597 unsigned j;
1598 for (j = 0; partitions.iterate (j, &partition); ++j)
1599 if (partition_reduction_p (partition))
1600 break;
1601 if (j < partitions.length ())
1602 {
1603 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1604 if (i != j)
1605 add_edge (pg, i, j);
1606 }
1607
1608 /* Compute partitions we cannot separate and fuse them. */
1609 num_sccs = graphds_scc (pg, NULL);
1610 for (i = 0; i < num_sccs; ++i)
1611 {
1612 struct partition *first;
1613 int j;
1614 for (j = 0; partitions.iterate (j, &first); ++j)
1615 if (pg->vertices[j].component == i)
1616 break;
1617 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1618 if (pg->vertices[j].component == i)
1619 {
1620 if (dump_file && (dump_flags & TDF_DETAILS))
1621 {
1622 fprintf (dump_file, "fusing partitions\n");
1623 dump_bitmap (dump_file, first->stmts);
1624 dump_bitmap (dump_file, partition->stmts);
1625 fprintf (dump_file, "because they are in the same "
1626 "dependence SCC\n");
1627 }
1628 partition_merge_into (first, partition);
1629 partitions[j] = NULL;
1630 partition_free (partition);
1631 PGDATA (j)->partition = NULL;
1632 }
1633 }
1634
1635 /* Now order the remaining nodes in postorder. */
1636 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1637 partitions.truncate (0);
1638 for (i = 0; i < pg->n_vertices; ++i)
1639 {
1640 pgdata *data = PGDATA (i);
1641 if (data->partition)
1642 partitions.safe_push (data->partition);
1643 data->reads.release ();
1644 data->writes.release ();
1645 delete data;
1646 }
1647 gcc_assert (partitions.length () == (unsigned)num_sccs);
1648 free_graph (pg);
1649 }
1650
1651 nbp = partitions.length ();
1652 if (nbp == 0
1653 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1654 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1655 {
1656 nbp = 0;
1657 goto ldist_done;
1658 }
1659
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1661 dump_rdg_partitions (dump_file, partitions);
1662
1663 FOR_EACH_VEC_ELT (partitions, i, partition)
1664 {
1665 if (partition_builtin_p (partition))
1666 (*nb_calls)++;
1667 *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
1668 }
1669
1670 ldist_done:
1671
1672 FOR_EACH_VEC_ELT (partitions, i, partition)
1673 partition_free (partition);
1674
1675 free_rdg (rdg);
1676 return nbp - *nb_calls;
1677 }
1678
1679 /* Distribute all loops in the current function. */
1680
1681 namespace {
1682
1683 const pass_data pass_data_loop_distribution =
1684 {
1685 GIMPLE_PASS, /* type */
1686 "ldist", /* name */
1687 OPTGROUP_LOOP, /* optinfo_flags */
1688 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1689 ( PROP_cfg | PROP_ssa ), /* properties_required */
1690 0, /* properties_provided */
1691 0, /* properties_destroyed */
1692 0, /* todo_flags_start */
1693 0, /* todo_flags_finish */
1694 };
1695
1696 class pass_loop_distribution : public gimple_opt_pass
1697 {
1698 public:
1699 pass_loop_distribution (gcc::context *ctxt)
1700 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1701 {}
1702
1703 /* opt_pass methods: */
1704 virtual bool gate (function *)
1705 {
1706 return flag_tree_loop_distribution
1707 || flag_tree_loop_distribute_patterns;
1708 }
1709
1710 virtual unsigned int execute (function *);
1711
1712 }; // class pass_loop_distribution
1713
1714 unsigned int
1715 pass_loop_distribution::execute (function *fun)
1716 {
1717 struct loop *loop;
1718 bool changed = false;
1719 basic_block bb;
1720 control_dependences *cd = NULL;
1721 auto_vec<loop_p> loops_to_be_destroyed;
1722
1723 FOR_ALL_BB_FN (bb, fun)
1724 {
1725 gimple_stmt_iterator gsi;
1726 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1727 gimple_set_uid (gsi_stmt (gsi), -1);
1728 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1729 gimple_set_uid (gsi_stmt (gsi), -1);
1730 }
1731
1732 /* We can at the moment only distribute non-nested loops, thus restrict
1733 walking to innermost loops. */
1734 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1735 {
1736 auto_vec<gimple *> work_list;
1737 basic_block *bbs;
1738 int num = loop->num;
1739 unsigned int i;
1740
1741 /* If the loop doesn't have a single exit we will fail anyway,
1742 so do that early. */
1743 if (!single_exit (loop))
1744 continue;
1745
1746 /* Only optimize hot loops. */
1747 if (!optimize_loop_for_speed_p (loop))
1748 continue;
1749
1750 /* Initialize the worklist with stmts we seed the partitions with. */
1751 bbs = get_loop_body_in_dom_order (loop);
1752 for (i = 0; i < loop->num_nodes; ++i)
1753 {
1754 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1755 !gsi_end_p (gsi);
1756 gsi_next (&gsi))
1757 {
1758 gphi *phi = gsi.phi ();
1759 if (virtual_operand_p (gimple_phi_result (phi)))
1760 continue;
1761 /* Distribute stmts which have defs that are used outside of
1762 the loop. */
1763 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1764 continue;
1765 work_list.safe_push (phi);
1766 }
1767 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1768 !gsi_end_p (gsi);
1769 gsi_next (&gsi))
1770 {
1771 gimple *stmt = gsi_stmt (gsi);
1772
1773 /* If there is a stmt with side-effects bail out - we
1774 cannot and should not distribute this loop. */
1775 if (gimple_has_side_effects (stmt))
1776 {
1777 work_list.truncate (0);
1778 goto out;
1779 }
1780
1781 /* Distribute stmts which have defs that are used outside of
1782 the loop. */
1783 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1784 ;
1785 /* Otherwise only distribute stores for now. */
1786 else if (!gimple_vdef (stmt))
1787 continue;
1788
1789 work_list.safe_push (stmt);
1790 }
1791 }
1792 out:
1793 free (bbs);
1794
1795 int nb_generated_loops = 0;
1796 int nb_generated_calls = 0;
1797 location_t loc = find_loop_location (loop);
1798 if (work_list.length () > 0)
1799 {
1800 if (!cd)
1801 {
1802 calculate_dominance_info (CDI_DOMINATORS);
1803 calculate_dominance_info (CDI_POST_DOMINATORS);
1804 cd = new control_dependences ();
1805 free_dominance_info (CDI_POST_DOMINATORS);
1806 }
1807 bool destroy_p;
1808 nb_generated_loops = distribute_loop (loop, work_list, cd,
1809 &nb_generated_calls,
1810 &destroy_p);
1811 if (destroy_p)
1812 loops_to_be_destroyed.safe_push (loop);
1813 }
1814
1815 if (nb_generated_loops + nb_generated_calls > 0)
1816 {
1817 changed = true;
1818 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1819 loc, "Loop %d distributed: split to %d loops "
1820 "and %d library calls.\n",
1821 num, nb_generated_loops, nb_generated_calls);
1822 }
1823 else if (dump_file && (dump_flags & TDF_DETAILS))
1824 fprintf (dump_file, "Loop %d is the same.\n", num);
1825 }
1826
1827 if (cd)
1828 delete cd;
1829
1830 if (changed)
1831 {
1832 /* Destroy loop bodies that could not be reused. Do this late as we
1833 otherwise can end up refering to stale data in control dependences. */
1834 unsigned i;
1835 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
1836 destroy_loop (loop);
1837
1838 /* Cached scalar evolutions now may refer to wrong or non-existing
1839 loops. */
1840 scev_reset_htab ();
1841 mark_virtual_operands_for_renaming (fun);
1842 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1843 }
1844
1845 checking_verify_loop_structure ();
1846
1847 return 0;
1848 }
1849
1850 } // anon namespace
1851
1852 gimple_opt_pass *
1853 make_pass_loop_distribution (gcc::context *ctxt)
1854 {
1855 return new pass_loop_distribution (ctxt);
1856 }