gimple.h (gimple_build_assign_with_ops): Add unary arg overload.
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "predict.h"
49 #include "vec.h"
50 #include "hashtab.h"
51 #include "hash-set.h"
52 #include "machmode.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "input.h"
56 #include "function.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfganal.h"
60 #include "basic-block.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimple-iterator.h"
67 #include "gimplify-me.h"
68 #include "stor-layout.h"
69 #include "gimple-ssa.h"
70 #include "tree-cfg.h"
71 #include "tree-phinodes.h"
72 #include "ssa-iterators.h"
73 #include "stringpool.h"
74 #include "tree-ssanames.h"
75 #include "tree-ssa-loop-manip.h"
76 #include "tree-ssa-loop.h"
77 #include "tree-into-ssa.h"
78 #include "tree-ssa.h"
79 #include "cfgloop.h"
80 #include "tree-chrec.h"
81 #include "tree-data-ref.h"
82 #include "tree-scalar-evolution.h"
83 #include "tree-pass.h"
84 #include "gimple-pretty-print.h"
85 #include "tree-vectorizer.h"
86
87
88 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
89 typedef struct rdg_vertex
90 {
91 /* The statement represented by this vertex. */
92 gimple stmt;
93
94 /* Vector of data-references in this statement. */
95 vec<data_reference_p> datarefs;
96
97 /* True when the statement contains a write to memory. */
98 bool has_mem_write;
99
100 /* True when the statement contains a read from memory. */
101 bool has_mem_reads;
102 } *rdg_vertex_p;
103
104 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
105 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
106 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
107 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
108 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
109 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
110 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
111 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
112
113 /* Data dependence type. */
114
115 enum rdg_dep_type
116 {
117 /* Read After Write (RAW). */
118 flow_dd = 'f',
119
120 /* Control dependence (execute conditional on). */
121 control_dd = 'c'
122 };
123
124 /* Dependence information attached to an edge of the RDG. */
125
126 typedef struct rdg_edge
127 {
128 /* Type of the dependence. */
129 enum rdg_dep_type type;
130 } *rdg_edge_p;
131
132 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
133
134 /* Dump vertex I in RDG to FILE. */
135
136 static void
137 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
138 {
139 struct vertex *v = &(rdg->vertices[i]);
140 struct graph_edge *e;
141
142 fprintf (file, "(vertex %d: (%s%s) (in:", i,
143 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
144 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
145
146 if (v->pred)
147 for (e = v->pred; e; e = e->pred_next)
148 fprintf (file, " %d", e->src);
149
150 fprintf (file, ") (out:");
151
152 if (v->succ)
153 for (e = v->succ; e; e = e->succ_next)
154 fprintf (file, " %d", e->dest);
155
156 fprintf (file, ")\n");
157 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
158 fprintf (file, ")\n");
159 }
160
161 /* Call dump_rdg_vertex on stderr. */
162
163 DEBUG_FUNCTION void
164 debug_rdg_vertex (struct graph *rdg, int i)
165 {
166 dump_rdg_vertex (stderr, rdg, i);
167 }
168
169 /* Dump the reduced dependence graph RDG to FILE. */
170
171 static void
172 dump_rdg (FILE *file, struct graph *rdg)
173 {
174 fprintf (file, "(rdg\n");
175 for (int i = 0; i < rdg->n_vertices; i++)
176 dump_rdg_vertex (file, rdg, i);
177 fprintf (file, ")\n");
178 }
179
180 /* Call dump_rdg on stderr. */
181
182 DEBUG_FUNCTION void
183 debug_rdg (struct graph *rdg)
184 {
185 dump_rdg (stderr, rdg);
186 }
187
188 static void
189 dot_rdg_1 (FILE *file, struct graph *rdg)
190 {
191 int i;
192 pretty_printer buffer;
193 pp_needs_newline (&buffer) = false;
194 buffer.buffer->stream = file;
195
196 fprintf (file, "digraph RDG {\n");
197
198 for (i = 0; i < rdg->n_vertices; i++)
199 {
200 struct vertex *v = &(rdg->vertices[i]);
201 struct graph_edge *e;
202
203 fprintf (file, "%d [label=\"[%d] ", i, i);
204 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
205 pp_flush (&buffer);
206 fprintf (file, "\"]\n");
207
208 /* Highlight reads from memory. */
209 if (RDG_MEM_READS_STMT (rdg, i))
210 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
211
212 /* Highlight stores to memory. */
213 if (RDG_MEM_WRITE_STMT (rdg, i))
214 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
215
216 if (v->succ)
217 for (e = v->succ; e; e = e->succ_next)
218 switch (RDGE_TYPE (e))
219 {
220 case flow_dd:
221 /* These are the most common dependences: don't print these. */
222 fprintf (file, "%d -> %d \n", i, e->dest);
223 break;
224
225 case control_dd:
226 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
227 break;
228
229 default:
230 gcc_unreachable ();
231 }
232 }
233
234 fprintf (file, "}\n\n");
235 }
236
237 /* Display the Reduced Dependence Graph using dotty. */
238
239 DEBUG_FUNCTION void
240 dot_rdg (struct graph *rdg)
241 {
242 /* When debugging, you may want to enable the following code. */
243 #ifdef HAVE_POPEN
244 FILE *file = popen ("dot -Tx11", "w");
245 if (!file)
246 return;
247 dot_rdg_1 (file, rdg);
248 fflush (file);
249 close (fileno (file));
250 pclose (file);
251 #else
252 dot_rdg_1 (stderr, rdg);
253 #endif
254 }
255
256 /* Returns the index of STMT in RDG. */
257
258 static int
259 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
260 {
261 int index = gimple_uid (stmt);
262 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
263 return index;
264 }
265
266 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
267 the index of DEF in RDG. */
268
269 static void
270 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
271 {
272 use_operand_p imm_use_p;
273 imm_use_iterator iterator;
274
275 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
276 {
277 struct graph_edge *e;
278 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
279
280 if (use < 0)
281 continue;
282
283 e = add_edge (rdg, idef, use);
284 e->data = XNEW (struct rdg_edge);
285 RDGE_TYPE (e) = flow_dd;
286 }
287 }
288
289 /* Creates an edge for the control dependences of BB to the vertex V. */
290
291 static void
292 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
293 int v, control_dependences *cd)
294 {
295 bitmap_iterator bi;
296 unsigned edge_n;
297 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
298 0, edge_n, bi)
299 {
300 basic_block cond_bb = cd->get_edge (edge_n)->src;
301 gimple stmt = last_stmt (cond_bb);
302 if (stmt && is_ctrl_stmt (stmt))
303 {
304 struct graph_edge *e;
305 int c = rdg_vertex_for_stmt (rdg, stmt);
306 if (c < 0)
307 continue;
308
309 e = add_edge (rdg, c, v);
310 e->data = XNEW (struct rdg_edge);
311 RDGE_TYPE (e) = control_dd;
312 }
313 }
314 }
315
316 /* Creates the edges of the reduced dependence graph RDG. */
317
318 static void
319 create_rdg_flow_edges (struct graph *rdg)
320 {
321 int i;
322 def_operand_p def_p;
323 ssa_op_iter iter;
324
325 for (i = 0; i < rdg->n_vertices; i++)
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
327 iter, SSA_OP_DEF)
328 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
329 }
330
331 /* Creates the edges of the reduced dependence graph RDG. */
332
333 static void
334 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
335 {
336 int i;
337
338 for (i = 0; i < rdg->n_vertices; i++)
339 {
340 gimple stmt = RDG_STMT (rdg, i);
341 if (gimple_code (stmt) == GIMPLE_PHI)
342 {
343 edge_iterator ei;
344 edge e;
345 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
346 create_edge_for_control_dependence (rdg, e->src, i, cd);
347 }
348 else
349 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
350 }
351 }
352
353 /* Build the vertices of the reduced dependence graph RDG. Return false
354 if that failed. */
355
356 static bool
357 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
358 vec<data_reference_p> *datarefs)
359 {
360 int i;
361 gimple stmt;
362
363 FOR_EACH_VEC_ELT (stmts, i, stmt)
364 {
365 struct vertex *v = &(rdg->vertices[i]);
366
367 /* Record statement to vertex mapping. */
368 gimple_set_uid (stmt, i);
369
370 v->data = XNEW (struct rdg_vertex);
371 RDGV_STMT (v) = stmt;
372 RDGV_DATAREFS (v).create (0);
373 RDGV_HAS_MEM_WRITE (v) = false;
374 RDGV_HAS_MEM_READS (v) = false;
375 if (gimple_code (stmt) == GIMPLE_PHI)
376 continue;
377
378 unsigned drp = datarefs->length ();
379 if (!find_data_references_in_stmt (loop, stmt, datarefs))
380 return false;
381 for (unsigned j = drp; j < datarefs->length (); ++j)
382 {
383 data_reference_p dr = (*datarefs)[j];
384 if (DR_IS_READ (dr))
385 RDGV_HAS_MEM_READS (v) = true;
386 else
387 RDGV_HAS_MEM_WRITE (v) = true;
388 RDGV_DATAREFS (v).safe_push (dr);
389 }
390 }
391 return true;
392 }
393
394 /* Initialize STMTS with all the statements of LOOP. The order in
395 which we discover statements is important as
396 generate_loops_for_partition is using the same traversal for
397 identifying statements in loop copies. */
398
399 static void
400 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
401 {
402 unsigned int i;
403 basic_block *bbs = get_loop_body_in_dom_order (loop);
404
405 for (i = 0; i < loop->num_nodes; i++)
406 {
407 basic_block bb = bbs[i];
408 gimple_stmt_iterator bsi;
409 gimple stmt;
410
411 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
412 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
413 stmts->safe_push (gsi_stmt (bsi));
414
415 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
416 {
417 stmt = gsi_stmt (bsi);
418 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
419 stmts->safe_push (stmt);
420 }
421 }
422
423 free (bbs);
424 }
425
426 /* Free the reduced dependence graph RDG. */
427
428 static void
429 free_rdg (struct graph *rdg)
430 {
431 int i;
432
433 for (i = 0; i < rdg->n_vertices; i++)
434 {
435 struct vertex *v = &(rdg->vertices[i]);
436 struct graph_edge *e;
437
438 for (e = v->succ; e; e = e->succ_next)
439 free (e->data);
440
441 if (v->data)
442 {
443 gimple_set_uid (RDGV_STMT (v), -1);
444 free_data_refs (RDGV_DATAREFS (v));
445 free (v->data);
446 }
447 }
448
449 free_graph (rdg);
450 }
451
452 /* Build the Reduced Dependence Graph (RDG) with one vertex per
453 statement of the loop nest LOOP_NEST, and one edge per data dependence or
454 scalar dependence. */
455
456 static struct graph *
457 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
458 {
459 struct graph *rdg;
460 vec<data_reference_p> datarefs;
461
462 /* Create the RDG vertices from the stmts of the loop nest. */
463 auto_vec<gimple, 10> stmts;
464 stmts_from_loop (loop_nest[0], &stmts);
465 rdg = new_graph (stmts.length ());
466 datarefs.create (10);
467 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
468 {
469 datarefs.release ();
470 free_rdg (rdg);
471 return NULL;
472 }
473 stmts.release ();
474
475 create_rdg_flow_edges (rdg);
476 if (cd)
477 create_rdg_cd_edges (rdg, cd);
478
479 datarefs.release ();
480
481 return rdg;
482 }
483
484
485
486 enum partition_kind {
487 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
488 };
489
490 typedef struct partition_s
491 {
492 bitmap stmts;
493 bitmap loops;
494 bool reduction_p;
495 enum partition_kind kind;
496 /* data-references a kind != PKIND_NORMAL partition is about. */
497 data_reference_p main_dr;
498 data_reference_p secondary_dr;
499 tree niter;
500 bool plus_one;
501 } *partition_t;
502
503
504 /* Allocate and initialize a partition from BITMAP. */
505
506 static partition_t
507 partition_alloc (bitmap stmts, bitmap loops)
508 {
509 partition_t partition = XCNEW (struct partition_s);
510 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
511 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
512 partition->reduction_p = false;
513 partition->kind = PKIND_NORMAL;
514 return partition;
515 }
516
517 /* Free PARTITION. */
518
519 static void
520 partition_free (partition_t partition)
521 {
522 BITMAP_FREE (partition->stmts);
523 BITMAP_FREE (partition->loops);
524 free (partition);
525 }
526
527 /* Returns true if the partition can be generated as a builtin. */
528
529 static bool
530 partition_builtin_p (partition_t partition)
531 {
532 return partition->kind != PKIND_NORMAL;
533 }
534
535 /* Returns true if the partition contains a reduction. */
536
537 static bool
538 partition_reduction_p (partition_t partition)
539 {
540 return partition->reduction_p;
541 }
542
543 /* Merge PARTITION into the partition DEST. */
544
545 static void
546 partition_merge_into (partition_t dest, partition_t partition)
547 {
548 dest->kind = PKIND_NORMAL;
549 bitmap_ior_into (dest->stmts, partition->stmts);
550 if (partition_reduction_p (partition))
551 dest->reduction_p = true;
552 }
553
554
555 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
556 the LOOP. */
557
558 static bool
559 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
560 {
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
563
564 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
565 {
566 gimple use_stmt = USE_STMT (use_p);
567 if (!is_gimple_debug (use_stmt)
568 && loop != loop_containing_stmt (use_stmt))
569 return true;
570 }
571
572 return false;
573 }
574
575 /* Returns true when STMT defines a scalar variable used after the
576 loop LOOP. */
577
578 static bool
579 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
580 {
581 def_operand_p def_p;
582 ssa_op_iter op_iter;
583
584 if (gimple_code (stmt) == GIMPLE_PHI)
585 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
586
587 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
588 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
589 return true;
590
591 return false;
592 }
593
594 /* Return a copy of LOOP placed before LOOP. */
595
596 static struct loop *
597 copy_loop_before (struct loop *loop)
598 {
599 struct loop *res;
600 edge preheader = loop_preheader_edge (loop);
601
602 initialize_original_copy_tables ();
603 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
604 gcc_assert (res != NULL);
605 free_original_copy_tables ();
606 delete_update_ssa ();
607
608 return res;
609 }
610
611 /* Creates an empty basic block after LOOP. */
612
613 static void
614 create_bb_after_loop (struct loop *loop)
615 {
616 edge exit = single_exit (loop);
617
618 if (!exit)
619 return;
620
621 split_edge (exit);
622 }
623
624 /* Generate code for PARTITION from the code in LOOP. The loop is
625 copied when COPY_P is true. All the statements not flagged in the
626 PARTITION bitmap are removed from the loop or from its copy. The
627 statements are indexed in sequence inside a basic block, and the
628 basic blocks of a loop are taken in dom order. */
629
630 static void
631 generate_loops_for_partition (struct loop *loop, partition_t partition,
632 bool copy_p)
633 {
634 unsigned i;
635 gimple_stmt_iterator bsi;
636 basic_block *bbs;
637
638 if (copy_p)
639 {
640 loop = copy_loop_before (loop);
641 gcc_assert (loop != NULL);
642 create_preheader (loop, CP_SIMPLE_PREHEADERS);
643 create_bb_after_loop (loop);
644 }
645
646 /* Remove stmts not in the PARTITION bitmap. */
647 bbs = get_loop_body_in_dom_order (loop);
648
649 if (MAY_HAVE_DEBUG_STMTS)
650 for (i = 0; i < loop->num_nodes; i++)
651 {
652 basic_block bb = bbs[i];
653
654 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
655 {
656 gimple phi = gsi_stmt (bsi);
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
659 reset_debug_uses (phi);
660 }
661
662 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
663 {
664 gimple stmt = gsi_stmt (bsi);
665 if (gimple_code (stmt) != GIMPLE_LABEL
666 && !is_gimple_debug (stmt)
667 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
668 reset_debug_uses (stmt);
669 }
670 }
671
672 for (i = 0; i < loop->num_nodes; i++)
673 {
674 basic_block bb = bbs[i];
675
676 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
677 {
678 gimple phi = gsi_stmt (bsi);
679 if (!virtual_operand_p (gimple_phi_result (phi))
680 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
681 remove_phi_node (&bsi, true);
682 else
683 gsi_next (&bsi);
684 }
685
686 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
687 {
688 gimple stmt = gsi_stmt (bsi);
689 if (gimple_code (stmt) != GIMPLE_LABEL
690 && !is_gimple_debug (stmt)
691 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
692 {
693 /* Choose an arbitrary path through the empty CFG part
694 that this unnecessary control stmt controls. */
695 if (gimple_code (stmt) == GIMPLE_COND)
696 {
697 gimple_cond_make_false (stmt);
698 update_stmt (stmt);
699 }
700 else if (gimple_code (stmt) == GIMPLE_SWITCH)
701 {
702 gimple_switch_set_index
703 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
704 update_stmt (stmt);
705 }
706 else
707 {
708 unlink_stmt_vdef (stmt);
709 gsi_remove (&bsi, true);
710 release_defs (stmt);
711 continue;
712 }
713 }
714 gsi_next (&bsi);
715 }
716 }
717
718 free (bbs);
719 }
720
721 /* Build the size argument for a memory operation call. */
722
723 static tree
724 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
725 bool plus_one)
726 {
727 tree size = fold_convert_loc (loc, sizetype, nb_iter);
728 if (plus_one)
729 size = size_binop (PLUS_EXPR, size, size_one_node);
730 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
731 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
732 size = fold_convert_loc (loc, size_type_node, size);
733 return size;
734 }
735
736 /* Build an address argument for a memory operation call. */
737
738 static tree
739 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
740 {
741 tree addr_base;
742
743 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
744 addr_base = fold_convert_loc (loc, sizetype, addr_base);
745
746 /* Test for a negative stride, iterating over every element. */
747 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
748 {
749 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
750 fold_convert_loc (loc, sizetype, nb_bytes));
751 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
752 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
753 }
754
755 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
756 }
757
758 /* If VAL memory representation contains the same value in all bytes,
759 return that value, otherwise return -1.
760 E.g. for 0x24242424 return 0x24, for IEEE double
761 747708026454360457216.0 return 0x44, etc. */
762
763 static int
764 const_with_all_bytes_same (tree val)
765 {
766 unsigned char buf[64];
767 int i, len;
768
769 if (integer_zerop (val)
770 || real_zerop (val)
771 || (TREE_CODE (val) == CONSTRUCTOR
772 && !TREE_CLOBBER_P (val)
773 && CONSTRUCTOR_NELTS (val) == 0))
774 return 0;
775
776 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
777 return -1;
778
779 len = native_encode_expr (val, buf, sizeof (buf));
780 if (len == 0)
781 return -1;
782 for (i = 1; i < len; i++)
783 if (buf[i] != buf[0])
784 return -1;
785 return buf[0];
786 }
787
788 /* Generate a call to memset for PARTITION in LOOP. */
789
790 static void
791 generate_memset_builtin (struct loop *loop, partition_t partition)
792 {
793 gimple_stmt_iterator gsi;
794 gimple stmt, fn_call;
795 tree mem, fn, nb_bytes;
796 location_t loc;
797 tree val;
798
799 stmt = DR_STMT (partition->main_dr);
800 loc = gimple_location (stmt);
801
802 /* The new statements will be placed before LOOP. */
803 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
804
805 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
806 partition->plus_one);
807 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
808 false, GSI_CONTINUE_LINKING);
809 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
810 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
811 false, GSI_CONTINUE_LINKING);
812
813 /* This exactly matches the pattern recognition in classify_partition. */
814 val = gimple_assign_rhs1 (stmt);
815 /* Handle constants like 0x15151515 and similarly
816 floating point constants etc. where all bytes are the same. */
817 int bytev = const_with_all_bytes_same (val);
818 if (bytev != -1)
819 val = build_int_cst (integer_type_node, bytev);
820 else if (TREE_CODE (val) == INTEGER_CST)
821 val = fold_convert (integer_type_node, val);
822 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
823 {
824 tree tem = make_ssa_name (integer_type_node, NULL);
825 gimple cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val);
826 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
827 val = tem;
828 }
829
830 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
831 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
832 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
833
834 if (dump_file && (dump_flags & TDF_DETAILS))
835 {
836 fprintf (dump_file, "generated memset");
837 if (bytev == 0)
838 fprintf (dump_file, " zero\n");
839 else
840 fprintf (dump_file, "\n");
841 }
842 }
843
844 /* Generate a call to memcpy for PARTITION in LOOP. */
845
846 static void
847 generate_memcpy_builtin (struct loop *loop, partition_t partition)
848 {
849 gimple_stmt_iterator gsi;
850 gimple stmt, fn_call;
851 tree dest, src, fn, nb_bytes;
852 location_t loc;
853 enum built_in_function kind;
854
855 stmt = DR_STMT (partition->main_dr);
856 loc = gimple_location (stmt);
857
858 /* The new statements will be placed before LOOP. */
859 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
860
861 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
862 partition->plus_one);
863 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
864 false, GSI_CONTINUE_LINKING);
865 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
866 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
867 if (ptr_derefs_may_alias_p (dest, src))
868 kind = BUILT_IN_MEMMOVE;
869 else
870 kind = BUILT_IN_MEMCPY;
871
872 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
873 false, GSI_CONTINUE_LINKING);
874 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
875 false, GSI_CONTINUE_LINKING);
876 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
877 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
878 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
879
880 if (dump_file && (dump_flags & TDF_DETAILS))
881 {
882 if (kind == BUILT_IN_MEMCPY)
883 fprintf (dump_file, "generated memcpy\n");
884 else
885 fprintf (dump_file, "generated memmove\n");
886 }
887 }
888
889 /* Remove and destroy the loop LOOP. */
890
891 static void
892 destroy_loop (struct loop *loop)
893 {
894 unsigned nbbs = loop->num_nodes;
895 edge exit = single_exit (loop);
896 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
897 basic_block *bbs;
898 unsigned i;
899
900 bbs = get_loop_body_in_dom_order (loop);
901
902 redirect_edge_pred (exit, src);
903 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
904 exit->flags |= EDGE_FALLTHRU;
905 cancel_loop_tree (loop);
906 rescan_loop_exit (exit, false, true);
907
908 for (i = 0; i < nbbs; i++)
909 {
910 /* We have made sure to not leave any dangling uses of SSA
911 names defined in the loop. With the exception of virtuals.
912 Make sure we replace all uses of virtual defs that will remain
913 outside of the loop with the bare symbol as delete_basic_block
914 will release them. */
915 gimple_stmt_iterator gsi;
916 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
917 {
918 gimple phi = gsi_stmt (gsi);
919 if (virtual_operand_p (gimple_phi_result (phi)))
920 mark_virtual_phi_result_for_renaming (phi);
921 }
922 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
923 {
924 gimple stmt = gsi_stmt (gsi);
925 tree vdef = gimple_vdef (stmt);
926 if (vdef && TREE_CODE (vdef) == SSA_NAME)
927 mark_virtual_operand_for_renaming (vdef);
928 }
929 delete_basic_block (bbs[i]);
930 }
931 free (bbs);
932
933 set_immediate_dominator (CDI_DOMINATORS, dest,
934 recompute_dominator (CDI_DOMINATORS, dest));
935 }
936
937 /* Generates code for PARTITION. */
938
939 static void
940 generate_code_for_partition (struct loop *loop,
941 partition_t partition, bool copy_p)
942 {
943 switch (partition->kind)
944 {
945 case PKIND_NORMAL:
946 /* Reductions all have to be in the last partition. */
947 gcc_assert (!partition_reduction_p (partition)
948 || !copy_p);
949 generate_loops_for_partition (loop, partition, copy_p);
950 return;
951
952 case PKIND_MEMSET:
953 generate_memset_builtin (loop, partition);
954 break;
955
956 case PKIND_MEMCPY:
957 generate_memcpy_builtin (loop, partition);
958 break;
959
960 default:
961 gcc_unreachable ();
962 }
963
964 /* Common tail for partitions we turn into a call. If this was the last
965 partition for which we generate code, we have to destroy the loop. */
966 if (!copy_p)
967 destroy_loop (loop);
968 }
969
970
971 /* Returns a partition with all the statements needed for computing
972 the vertex V of the RDG, also including the loop exit conditions. */
973
974 static partition_t
975 build_rdg_partition_for_vertex (struct graph *rdg, int v)
976 {
977 partition_t partition = partition_alloc (NULL, NULL);
978 auto_vec<int, 3> nodes;
979 unsigned i;
980 int x;
981
982 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
983
984 FOR_EACH_VEC_ELT (nodes, i, x)
985 {
986 bitmap_set_bit (partition->stmts, x);
987 bitmap_set_bit (partition->loops,
988 loop_containing_stmt (RDG_STMT (rdg, x))->num);
989 }
990
991 return partition;
992 }
993
994 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
995 For the moment we detect only the memset zero pattern. */
996
997 static void
998 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
999 {
1000 bitmap_iterator bi;
1001 unsigned i;
1002 tree nb_iter;
1003 data_reference_p single_load, single_store;
1004 bool volatiles_p = false;
1005 bool plus_one = false;
1006
1007 partition->kind = PKIND_NORMAL;
1008 partition->main_dr = NULL;
1009 partition->secondary_dr = NULL;
1010 partition->niter = NULL_TREE;
1011 partition->plus_one = false;
1012
1013 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1014 {
1015 gimple stmt = RDG_STMT (rdg, i);
1016
1017 if (gimple_has_volatile_ops (stmt))
1018 volatiles_p = true;
1019
1020 /* If the stmt has uses outside of the loop mark it as reduction. */
1021 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1022 {
1023 partition->reduction_p = true;
1024 return;
1025 }
1026 }
1027
1028 /* Perform general partition disqualification for builtins. */
1029 if (volatiles_p
1030 || !flag_tree_loop_distribute_patterns)
1031 return;
1032
1033 /* Detect memset and memcpy. */
1034 single_load = NULL;
1035 single_store = NULL;
1036 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1037 {
1038 gimple stmt = RDG_STMT (rdg, i);
1039 data_reference_p dr;
1040 unsigned j;
1041
1042 if (gimple_code (stmt) == GIMPLE_PHI)
1043 continue;
1044
1045 /* Any scalar stmts are ok. */
1046 if (!gimple_vuse (stmt))
1047 continue;
1048
1049 /* Otherwise just regular loads/stores. */
1050 if (!gimple_assign_single_p (stmt))
1051 return;
1052
1053 /* But exactly one store and/or load. */
1054 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1055 {
1056 if (DR_IS_READ (dr))
1057 {
1058 if (single_load != NULL)
1059 return;
1060 single_load = dr;
1061 }
1062 else
1063 {
1064 if (single_store != NULL)
1065 return;
1066 single_store = dr;
1067 }
1068 }
1069 }
1070
1071 if (!single_store)
1072 return;
1073
1074 nb_iter = number_of_latch_executions (loop);
1075 if (!nb_iter || nb_iter == chrec_dont_know)
1076 return;
1077 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1078 gimple_bb (DR_STMT (single_store))))
1079 plus_one = true;
1080
1081 if (single_store && !single_load)
1082 {
1083 gimple stmt = DR_STMT (single_store);
1084 tree rhs = gimple_assign_rhs1 (stmt);
1085 if (const_with_all_bytes_same (rhs) == -1
1086 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1087 || (TYPE_MODE (TREE_TYPE (rhs))
1088 != TYPE_MODE (unsigned_char_type_node))))
1089 return;
1090 if (TREE_CODE (rhs) == SSA_NAME
1091 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1092 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1093 return;
1094 if (!adjacent_dr_p (single_store)
1095 || !dominated_by_p (CDI_DOMINATORS,
1096 loop->latch, gimple_bb (stmt)))
1097 return;
1098 partition->kind = PKIND_MEMSET;
1099 partition->main_dr = single_store;
1100 partition->niter = nb_iter;
1101 partition->plus_one = plus_one;
1102 }
1103 else if (single_store && single_load)
1104 {
1105 gimple store = DR_STMT (single_store);
1106 gimple load = DR_STMT (single_load);
1107 /* Direct aggregate copy or via an SSA name temporary. */
1108 if (load != store
1109 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1110 return;
1111 if (!adjacent_dr_p (single_store)
1112 || !adjacent_dr_p (single_load)
1113 || !operand_equal_p (DR_STEP (single_store),
1114 DR_STEP (single_load), 0)
1115 || !dominated_by_p (CDI_DOMINATORS,
1116 loop->latch, gimple_bb (store)))
1117 return;
1118 /* Now check that if there is a dependence this dependence is
1119 of a suitable form for memmove. */
1120 vec<loop_p> loops = vNULL;
1121 ddr_p ddr;
1122 loops.safe_push (loop);
1123 ddr = initialize_data_dependence_relation (single_load, single_store,
1124 loops);
1125 compute_affine_dependence (ddr, loop);
1126 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1127 {
1128 free_dependence_relation (ddr);
1129 loops.release ();
1130 return;
1131 }
1132 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1133 {
1134 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1135 {
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1139 }
1140 lambda_vector dist_v;
1141 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1142 {
1143 int dist = dist_v[index_in_loop_nest (loop->num,
1144 DDR_LOOP_NEST (ddr))];
1145 if (dist > 0 && !DDR_REVERSED_P (ddr))
1146 {
1147 free_dependence_relation (ddr);
1148 loops.release ();
1149 return;
1150 }
1151 }
1152 }
1153 free_dependence_relation (ddr);
1154 loops.release ();
1155 partition->kind = PKIND_MEMCPY;
1156 partition->main_dr = single_store;
1157 partition->secondary_dr = single_load;
1158 partition->niter = nb_iter;
1159 partition->plus_one = plus_one;
1160 }
1161 }
1162
1163 /* For a data reference REF, return the declaration of its base
1164 address or NULL_TREE if the base is not determined. */
1165
1166 static tree
1167 ref_base_address (data_reference_p dr)
1168 {
1169 tree base_address = DR_BASE_ADDRESS (dr);
1170 if (base_address
1171 && TREE_CODE (base_address) == ADDR_EXPR)
1172 return TREE_OPERAND (base_address, 0);
1173
1174 return base_address;
1175 }
1176
1177 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1178 accesses in RDG. */
1179
1180 static bool
1181 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1182 partition_t partition2)
1183 {
1184 unsigned i, j, k, l;
1185 bitmap_iterator bi, bj;
1186 data_reference_p ref1, ref2;
1187
1188 /* First check whether in the intersection of the two partitions are
1189 any loads or stores. Common loads are the situation that happens
1190 most often. */
1191 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1192 if (RDG_MEM_WRITE_STMT (rdg, i)
1193 || RDG_MEM_READS_STMT (rdg, i))
1194 return true;
1195
1196 /* Then check all data-references against each other. */
1197 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1198 if (RDG_MEM_WRITE_STMT (rdg, i)
1199 || RDG_MEM_READS_STMT (rdg, i))
1200 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1201 if (RDG_MEM_WRITE_STMT (rdg, j)
1202 || RDG_MEM_READS_STMT (rdg, j))
1203 {
1204 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1205 {
1206 tree base1 = ref_base_address (ref1);
1207 if (base1)
1208 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1209 if (base1 == ref_base_address (ref2))
1210 return true;
1211 }
1212 }
1213
1214 return false;
1215 }
1216
1217 /* Aggregate several components into a useful partition that is
1218 registered in the PARTITIONS vector. Partitions will be
1219 distributed in different loops. */
1220
1221 static void
1222 rdg_build_partitions (struct graph *rdg,
1223 vec<gimple> starting_stmts,
1224 vec<partition_t> *partitions)
1225 {
1226 bitmap processed = BITMAP_ALLOC (NULL);
1227 int i;
1228 gimple stmt;
1229
1230 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1231 {
1232 int v = rdg_vertex_for_stmt (rdg, stmt);
1233
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 fprintf (dump_file,
1236 "ldist asked to generate code for vertex %d\n", v);
1237
1238 /* If the vertex is already contained in another partition so
1239 is the partition rooted at it. */
1240 if (bitmap_bit_p (processed, v))
1241 continue;
1242
1243 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1244 bitmap_ior_into (processed, partition->stmts);
1245
1246 if (dump_file && (dump_flags & TDF_DETAILS))
1247 {
1248 fprintf (dump_file, "ldist useful partition:\n");
1249 dump_bitmap (dump_file, partition->stmts);
1250 }
1251
1252 partitions->safe_push (partition);
1253 }
1254
1255 /* All vertices should have been assigned to at least one partition now,
1256 other than vertices belonging to dead code. */
1257
1258 BITMAP_FREE (processed);
1259 }
1260
1261 /* Dump to FILE the PARTITIONS. */
1262
1263 static void
1264 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1265 {
1266 int i;
1267 partition_t partition;
1268
1269 FOR_EACH_VEC_ELT (partitions, i, partition)
1270 debug_bitmap_file (file, partition->stmts);
1271 }
1272
1273 /* Debug PARTITIONS. */
1274 extern void debug_rdg_partitions (vec<partition_t> );
1275
1276 DEBUG_FUNCTION void
1277 debug_rdg_partitions (vec<partition_t> partitions)
1278 {
1279 dump_rdg_partitions (stderr, partitions);
1280 }
1281
1282 /* Returns the number of read and write operations in the RDG. */
1283
1284 static int
1285 number_of_rw_in_rdg (struct graph *rdg)
1286 {
1287 int i, res = 0;
1288
1289 for (i = 0; i < rdg->n_vertices; i++)
1290 {
1291 if (RDG_MEM_WRITE_STMT (rdg, i))
1292 ++res;
1293
1294 if (RDG_MEM_READS_STMT (rdg, i))
1295 ++res;
1296 }
1297
1298 return res;
1299 }
1300
1301 /* Returns the number of read and write operations in a PARTITION of
1302 the RDG. */
1303
1304 static int
1305 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1306 {
1307 int res = 0;
1308 unsigned i;
1309 bitmap_iterator ii;
1310
1311 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1312 {
1313 if (RDG_MEM_WRITE_STMT (rdg, i))
1314 ++res;
1315
1316 if (RDG_MEM_READS_STMT (rdg, i))
1317 ++res;
1318 }
1319
1320 return res;
1321 }
1322
1323 /* Returns true when one of the PARTITIONS contains all the read or
1324 write operations of RDG. */
1325
1326 static bool
1327 partition_contains_all_rw (struct graph *rdg,
1328 vec<partition_t> partitions)
1329 {
1330 int i;
1331 partition_t partition;
1332 int nrw = number_of_rw_in_rdg (rdg);
1333
1334 FOR_EACH_VEC_ELT (partitions, i, partition)
1335 if (nrw == number_of_rw_in_partition (rdg, partition))
1336 return true;
1337
1338 return false;
1339 }
1340
1341 /* Compute partition dependence created by the data references in DRS1
1342 and DRS2 and modify and return DIR according to that. */
1343
1344 static int
1345 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1346 vec<data_reference_p> drs1,
1347 vec<data_reference_p> drs2)
1348 {
1349 data_reference_p dr1, dr2;
1350
1351 /* dependence direction - 0 is no dependence, -1 is back,
1352 1 is forth, 2 is both (we can stop then, merging will occur). */
1353 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1354 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1355 {
1356 int this_dir = 1;
1357 ddr_p ddr;
1358 /* Re-shuffle data-refs to be in dominator order. */
1359 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1360 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1361 {
1362 data_reference_p tem = dr1;
1363 dr1 = dr2;
1364 dr2 = tem;
1365 this_dir = -this_dir;
1366 }
1367 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1368 compute_affine_dependence (ddr, loops[0]);
1369 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1370 this_dir = 2;
1371 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1372 {
1373 if (DDR_REVERSED_P (ddr))
1374 {
1375 data_reference_p tem = dr1;
1376 dr1 = dr2;
1377 dr2 = tem;
1378 this_dir = -this_dir;
1379 }
1380 /* Known dependences can still be unordered througout the
1381 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1382 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1383 this_dir = 2;
1384 /* If the overlap is exact preserve stmt order. */
1385 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1386 ;
1387 else
1388 {
1389 /* Else as the distance vector is lexicographic positive
1390 swap the dependence direction. */
1391 this_dir = -this_dir;
1392 }
1393 }
1394 else
1395 this_dir = 0;
1396 free_dependence_relation (ddr);
1397 if (dir == 0)
1398 dir = this_dir;
1399 else if (dir != this_dir)
1400 return 2;
1401 }
1402 return dir;
1403 }
1404
1405 /* Compare postorder number of the partition graph vertices V1 and V2. */
1406
1407 static int
1408 pgcmp (const void *v1_, const void *v2_)
1409 {
1410 const vertex *v1 = (const vertex *)v1_;
1411 const vertex *v2 = (const vertex *)v2_;
1412 return v2->post - v1->post;
1413 }
1414
1415 /* Distributes the code from LOOP in such a way that producer
1416 statements are placed before consumer statements. Tries to separate
1417 only the statements from STMTS into separate loops.
1418 Returns the number of distributed loops. */
1419
1420 static int
1421 distribute_loop (struct loop *loop, vec<gimple> stmts,
1422 control_dependences *cd, int *nb_calls)
1423 {
1424 struct graph *rdg;
1425 partition_t partition;
1426 bool any_builtin;
1427 int i, nbp;
1428 graph *pg = NULL;
1429 int num_sccs = 1;
1430
1431 *nb_calls = 0;
1432 auto_vec<loop_p, 3> loop_nest;
1433 if (!find_loop_nest (loop, &loop_nest))
1434 return 0;
1435
1436 rdg = build_rdg (loop_nest, cd);
1437 if (!rdg)
1438 {
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1440 fprintf (dump_file,
1441 "Loop %d not distributed: failed to build the RDG.\n",
1442 loop->num);
1443
1444 return 0;
1445 }
1446
1447 if (dump_file && (dump_flags & TDF_DETAILS))
1448 dump_rdg (dump_file, rdg);
1449
1450 auto_vec<partition_t, 3> partitions;
1451 rdg_build_partitions (rdg, stmts, &partitions);
1452
1453 any_builtin = false;
1454 FOR_EACH_VEC_ELT (partitions, i, partition)
1455 {
1456 classify_partition (loop, rdg, partition);
1457 any_builtin |= partition_builtin_p (partition);
1458 }
1459
1460 /* If we are only distributing patterns but did not detect any,
1461 simply bail out. */
1462 if (!flag_tree_loop_distribution
1463 && !any_builtin)
1464 {
1465 nbp = 0;
1466 goto ldist_done;
1467 }
1468
1469 /* If we are only distributing patterns fuse all partitions that
1470 were not classified as builtins. This also avoids chopping
1471 a loop into pieces, separated by builtin calls. That is, we
1472 only want no or a single loop body remaining. */
1473 partition_t into;
1474 if (!flag_tree_loop_distribution)
1475 {
1476 for (i = 0; partitions.iterate (i, &into); ++i)
1477 if (!partition_builtin_p (into))
1478 break;
1479 for (++i; partitions.iterate (i, &partition); ++i)
1480 if (!partition_builtin_p (partition))
1481 {
1482 if (dump_file && (dump_flags & TDF_DETAILS))
1483 {
1484 fprintf (dump_file, "fusing non-builtin partitions\n");
1485 dump_bitmap (dump_file, into->stmts);
1486 dump_bitmap (dump_file, partition->stmts);
1487 }
1488 partition_merge_into (into, partition);
1489 partitions.unordered_remove (i);
1490 partition_free (partition);
1491 i--;
1492 }
1493 }
1494
1495 /* Due to limitations in the transform phase we have to fuse all
1496 reduction partitions into the last partition so the existing
1497 loop will contain all loop-closed PHI nodes. */
1498 for (i = 0; partitions.iterate (i, &into); ++i)
1499 if (partition_reduction_p (into))
1500 break;
1501 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1502 if (partition_reduction_p (partition))
1503 {
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1505 {
1506 fprintf (dump_file, "fusing partitions\n");
1507 dump_bitmap (dump_file, into->stmts);
1508 dump_bitmap (dump_file, partition->stmts);
1509 fprintf (dump_file, "because they have reductions\n");
1510 }
1511 partition_merge_into (into, partition);
1512 partitions.unordered_remove (i);
1513 partition_free (partition);
1514 i--;
1515 }
1516
1517 /* Apply our simple cost model - fuse partitions with similar
1518 memory accesses. */
1519 for (i = 0; partitions.iterate (i, &into); ++i)
1520 {
1521 if (partition_builtin_p (into))
1522 continue;
1523 for (int j = i + 1;
1524 partitions.iterate (j, &partition); ++j)
1525 {
1526 if (!partition_builtin_p (partition)
1527 && similar_memory_accesses (rdg, into, partition))
1528 {
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 {
1531 fprintf (dump_file, "fusing partitions\n");
1532 dump_bitmap (dump_file, into->stmts);
1533 dump_bitmap (dump_file, partition->stmts);
1534 fprintf (dump_file, "because they have similar "
1535 "memory accesses\n");
1536 }
1537 partition_merge_into (into, partition);
1538 partitions.unordered_remove (j);
1539 partition_free (partition);
1540 j--;
1541 }
1542 }
1543 }
1544
1545 /* Build the partition dependency graph. */
1546 if (partitions.length () > 1)
1547 {
1548 pg = new_graph (partitions.length ());
1549 struct pgdata {
1550 partition_t partition;
1551 vec<data_reference_p> writes;
1552 vec<data_reference_p> reads;
1553 };
1554 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1555 for (i = 0; partitions.iterate (i, &partition); ++i)
1556 {
1557 vertex *v = &pg->vertices[i];
1558 pgdata *data = new pgdata;
1559 data_reference_p dr;
1560 /* FIXME - leaks. */
1561 v->data = data;
1562 bitmap_iterator bi;
1563 unsigned j;
1564 data->partition = partition;
1565 data->reads = vNULL;
1566 data->writes = vNULL;
1567 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1568 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1569 if (DR_IS_READ (dr))
1570 data->reads.safe_push (dr);
1571 else
1572 data->writes.safe_push (dr);
1573 }
1574 partition_t partition1, partition2;
1575 for (i = 0; partitions.iterate (i, &partition1); ++i)
1576 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1577 {
1578 /* dependence direction - 0 is no dependence, -1 is back,
1579 1 is forth, 2 is both (we can stop then, merging will occur). */
1580 int dir = 0;
1581 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1582 PGDATA(i)->writes,
1583 PGDATA(j)->reads);
1584 if (dir != 2)
1585 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1586 PGDATA(i)->reads,
1587 PGDATA(j)->writes);
1588 if (dir != 2)
1589 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1590 PGDATA(i)->writes,
1591 PGDATA(j)->writes);
1592 if (dir == 1 || dir == 2)
1593 add_edge (pg, i, j);
1594 if (dir == -1 || dir == 2)
1595 add_edge (pg, j, i);
1596 }
1597
1598 /* Add edges to the reduction partition (if any) to force it last. */
1599 unsigned j;
1600 for (j = 0; partitions.iterate (j, &partition); ++j)
1601 if (partition_reduction_p (partition))
1602 break;
1603 if (j < partitions.length ())
1604 {
1605 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1606 if (i != j)
1607 add_edge (pg, i, j);
1608 }
1609
1610 /* Compute partitions we cannot separate and fuse them. */
1611 num_sccs = graphds_scc (pg, NULL);
1612 for (i = 0; i < num_sccs; ++i)
1613 {
1614 partition_t first;
1615 int j;
1616 for (j = 0; partitions.iterate (j, &first); ++j)
1617 if (pg->vertices[j].component == i)
1618 break;
1619 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1620 if (pg->vertices[j].component == i)
1621 {
1622 if (dump_file && (dump_flags & TDF_DETAILS))
1623 {
1624 fprintf (dump_file, "fusing partitions\n");
1625 dump_bitmap (dump_file, first->stmts);
1626 dump_bitmap (dump_file, partition->stmts);
1627 fprintf (dump_file, "because they are in the same "
1628 "dependence SCC\n");
1629 }
1630 partition_merge_into (first, partition);
1631 partitions[j] = NULL;
1632 partition_free (partition);
1633 PGDATA (j)->partition = NULL;
1634 }
1635 }
1636
1637 /* Now order the remaining nodes in postorder. */
1638 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1639 partitions.truncate (0);
1640 for (i = 0; i < pg->n_vertices; ++i)
1641 {
1642 pgdata *data = PGDATA (i);
1643 if (data->partition)
1644 partitions.safe_push (data->partition);
1645 data->reads.release ();
1646 data->writes.release ();
1647 delete data;
1648 }
1649 gcc_assert (partitions.length () == (unsigned)num_sccs);
1650 free_graph (pg);
1651 }
1652
1653 nbp = partitions.length ();
1654 if (nbp == 0
1655 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1656 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1657 {
1658 nbp = 0;
1659 goto ldist_done;
1660 }
1661
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 dump_rdg_partitions (dump_file, partitions);
1664
1665 FOR_EACH_VEC_ELT (partitions, i, partition)
1666 {
1667 if (partition_builtin_p (partition))
1668 (*nb_calls)++;
1669 generate_code_for_partition (loop, partition, i < nbp - 1);
1670 }
1671
1672 ldist_done:
1673
1674 FOR_EACH_VEC_ELT (partitions, i, partition)
1675 partition_free (partition);
1676
1677 free_rdg (rdg);
1678 return nbp - *nb_calls;
1679 }
1680
1681 /* Distribute all loops in the current function. */
1682
1683 namespace {
1684
1685 const pass_data pass_data_loop_distribution =
1686 {
1687 GIMPLE_PASS, /* type */
1688 "ldist", /* name */
1689 OPTGROUP_LOOP, /* optinfo_flags */
1690 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1691 ( PROP_cfg | PROP_ssa ), /* properties_required */
1692 0, /* properties_provided */
1693 0, /* properties_destroyed */
1694 0, /* todo_flags_start */
1695 0, /* todo_flags_finish */
1696 };
1697
1698 class pass_loop_distribution : public gimple_opt_pass
1699 {
1700 public:
1701 pass_loop_distribution (gcc::context *ctxt)
1702 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1703 {}
1704
1705 /* opt_pass methods: */
1706 virtual bool gate (function *)
1707 {
1708 return flag_tree_loop_distribution
1709 || flag_tree_loop_distribute_patterns;
1710 }
1711
1712 virtual unsigned int execute (function *);
1713
1714 }; // class pass_loop_distribution
1715
1716 unsigned int
1717 pass_loop_distribution::execute (function *fun)
1718 {
1719 struct loop *loop;
1720 bool changed = false;
1721 basic_block bb;
1722 control_dependences *cd = NULL;
1723
1724 FOR_ALL_BB_FN (bb, fun)
1725 {
1726 gimple_stmt_iterator gsi;
1727 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1728 gimple_set_uid (gsi_stmt (gsi), -1);
1729 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1730 gimple_set_uid (gsi_stmt (gsi), -1);
1731 }
1732
1733 /* We can at the moment only distribute non-nested loops, thus restrict
1734 walking to innermost loops. */
1735 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1736 {
1737 auto_vec<gimple> work_list;
1738 basic_block *bbs;
1739 int num = loop->num;
1740 unsigned int i;
1741
1742 /* If the loop doesn't have a single exit we will fail anyway,
1743 so do that early. */
1744 if (!single_exit (loop))
1745 continue;
1746
1747 /* Only optimize hot loops. */
1748 if (!optimize_loop_for_speed_p (loop))
1749 continue;
1750
1751 /* Initialize the worklist with stmts we seed the partitions with. */
1752 bbs = get_loop_body_in_dom_order (loop);
1753 for (i = 0; i < loop->num_nodes; ++i)
1754 {
1755 gimple_stmt_iterator gsi;
1756 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1757 {
1758 gimple phi = gsi_stmt (gsi);
1759 if (virtual_operand_p (gimple_phi_result (phi)))
1760 continue;
1761 /* Distribute stmts which have defs that are used outside of
1762 the loop. */
1763 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1764 continue;
1765 work_list.safe_push (phi);
1766 }
1767 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1768 {
1769 gimple stmt = gsi_stmt (gsi);
1770
1771 /* If there is a stmt with side-effects bail out - we
1772 cannot and should not distribute this loop. */
1773 if (gimple_has_side_effects (stmt))
1774 {
1775 work_list.truncate (0);
1776 goto out;
1777 }
1778
1779 /* Distribute stmts which have defs that are used outside of
1780 the loop. */
1781 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1782 ;
1783 /* Otherwise only distribute stores for now. */
1784 else if (!gimple_vdef (stmt))
1785 continue;
1786
1787 work_list.safe_push (stmt);
1788 }
1789 }
1790 out:
1791 free (bbs);
1792
1793 int nb_generated_loops = 0;
1794 int nb_generated_calls = 0;
1795 location_t loc = find_loop_location (loop);
1796 if (work_list.length () > 0)
1797 {
1798 if (!cd)
1799 {
1800 calculate_dominance_info (CDI_DOMINATORS);
1801 calculate_dominance_info (CDI_POST_DOMINATORS);
1802 cd = new control_dependences (create_edge_list ());
1803 free_dominance_info (CDI_POST_DOMINATORS);
1804 }
1805 nb_generated_loops = distribute_loop (loop, work_list, cd,
1806 &nb_generated_calls);
1807 }
1808
1809 if (nb_generated_loops + nb_generated_calls > 0)
1810 {
1811 changed = true;
1812 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1813 loc, "Loop %d distributed: split to %d loops "
1814 "and %d library calls.\n",
1815 num, nb_generated_loops, nb_generated_calls);
1816 }
1817 else if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, "Loop %d is the same.\n", num);
1819 }
1820
1821 if (cd)
1822 delete cd;
1823
1824 if (changed)
1825 {
1826 mark_virtual_operands_for_renaming (fun);
1827 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1828 }
1829
1830 #ifdef ENABLE_CHECKING
1831 verify_loop_structure ();
1832 #endif
1833
1834 return 0;
1835 }
1836
1837 } // anon namespace
1838
1839 gimple_opt_pass *
1840 make_pass_loop_distribution (gcc::context *ctxt)
1841 {
1842 return new pass_loop_distribution (ctxt);
1843 }