cond.md (stzx_16): Use register_operand for operand 0.
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "gimple-ssa.h"
53 #include "tree-cfg.h"
54 #include "tree-phinodes.h"
55 #include "ssa-iterators.h"
56 #include "stringpool.h"
57 #include "tree-ssanames.h"
58 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-into-ssa.h"
61 #include "tree-ssa.h"
62 #include "cfgloop.h"
63 #include "tree-chrec.h"
64 #include "tree-data-ref.h"
65 #include "tree-scalar-evolution.h"
66 #include "tree-pass.h"
67 #include "gimple-pretty-print.h"
68 #include "tree-vectorizer.h"
69
70
71 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
72 typedef struct rdg_vertex
73 {
74 /* The statement represented by this vertex. */
75 gimple stmt;
76
77 /* Vector of data-references in this statement. */
78 vec<data_reference_p> datarefs;
79
80 /* True when the statement contains a write to memory. */
81 bool has_mem_write;
82
83 /* True when the statement contains a read from memory. */
84 bool has_mem_reads;
85 } *rdg_vertex_p;
86
87 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
88 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
89 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
90 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
91 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
92 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
93 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
94 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
95
96 /* Data dependence type. */
97
98 enum rdg_dep_type
99 {
100 /* Read After Write (RAW). */
101 flow_dd = 'f',
102
103 /* Control dependence (execute conditional on). */
104 control_dd = 'c'
105 };
106
107 /* Dependence information attached to an edge of the RDG. */
108
109 typedef struct rdg_edge
110 {
111 /* Type of the dependence. */
112 enum rdg_dep_type type;
113 } *rdg_edge_p;
114
115 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
116
117 /* Dump vertex I in RDG to FILE. */
118
119 static void
120 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
121 {
122 struct vertex *v = &(rdg->vertices[i]);
123 struct graph_edge *e;
124
125 fprintf (file, "(vertex %d: (%s%s) (in:", i,
126 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
127 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
128
129 if (v->pred)
130 for (e = v->pred; e; e = e->pred_next)
131 fprintf (file, " %d", e->src);
132
133 fprintf (file, ") (out:");
134
135 if (v->succ)
136 for (e = v->succ; e; e = e->succ_next)
137 fprintf (file, " %d", e->dest);
138
139 fprintf (file, ")\n");
140 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
141 fprintf (file, ")\n");
142 }
143
144 /* Call dump_rdg_vertex on stderr. */
145
146 DEBUG_FUNCTION void
147 debug_rdg_vertex (struct graph *rdg, int i)
148 {
149 dump_rdg_vertex (stderr, rdg, i);
150 }
151
152 /* Dump the reduced dependence graph RDG to FILE. */
153
154 static void
155 dump_rdg (FILE *file, struct graph *rdg)
156 {
157 fprintf (file, "(rdg\n");
158 for (int i = 0; i < rdg->n_vertices; i++)
159 dump_rdg_vertex (file, rdg, i);
160 fprintf (file, ")\n");
161 }
162
163 /* Call dump_rdg on stderr. */
164
165 DEBUG_FUNCTION void
166 debug_rdg (struct graph *rdg)
167 {
168 dump_rdg (stderr, rdg);
169 }
170
171 static void
172 dot_rdg_1 (FILE *file, struct graph *rdg)
173 {
174 int i;
175 pretty_printer buffer;
176 pp_needs_newline (&buffer) = false;
177 buffer.buffer->stream = file;
178
179 fprintf (file, "digraph RDG {\n");
180
181 for (i = 0; i < rdg->n_vertices; i++)
182 {
183 struct vertex *v = &(rdg->vertices[i]);
184 struct graph_edge *e;
185
186 fprintf (file, "%d [label=\"[%d] ", i, i);
187 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
188 pp_flush (&buffer);
189 fprintf (file, "\"]\n");
190
191 /* Highlight reads from memory. */
192 if (RDG_MEM_READS_STMT (rdg, i))
193 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
194
195 /* Highlight stores to memory. */
196 if (RDG_MEM_WRITE_STMT (rdg, i))
197 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
198
199 if (v->succ)
200 for (e = v->succ; e; e = e->succ_next)
201 switch (RDGE_TYPE (e))
202 {
203 case flow_dd:
204 /* These are the most common dependences: don't print these. */
205 fprintf (file, "%d -> %d \n", i, e->dest);
206 break;
207
208 case control_dd:
209 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
210 break;
211
212 default:
213 gcc_unreachable ();
214 }
215 }
216
217 fprintf (file, "}\n\n");
218 }
219
220 /* Display the Reduced Dependence Graph using dotty. */
221
222 DEBUG_FUNCTION void
223 dot_rdg (struct graph *rdg)
224 {
225 /* When debugging, you may want to enable the following code. */
226 #if 1
227 FILE *file = popen ("dot -Tx11", "w");
228 if (!file)
229 return;
230 dot_rdg_1 (file, rdg);
231 fflush (file);
232 close (fileno (file));
233 pclose (file);
234 #else
235 dot_rdg_1 (stderr, rdg);
236 #endif
237 }
238
239 /* Returns the index of STMT in RDG. */
240
241 static int
242 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
243 {
244 int index = gimple_uid (stmt);
245 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
246 return index;
247 }
248
249 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
250 the index of DEF in RDG. */
251
252 static void
253 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
254 {
255 use_operand_p imm_use_p;
256 imm_use_iterator iterator;
257
258 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
259 {
260 struct graph_edge *e;
261 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
262
263 if (use < 0)
264 continue;
265
266 e = add_edge (rdg, idef, use);
267 e->data = XNEW (struct rdg_edge);
268 RDGE_TYPE (e) = flow_dd;
269 }
270 }
271
272 /* Creates an edge for the control dependences of BB to the vertex V. */
273
274 static void
275 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
276 int v, control_dependences *cd)
277 {
278 bitmap_iterator bi;
279 unsigned edge_n;
280 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
281 0, edge_n, bi)
282 {
283 basic_block cond_bb = cd->get_edge (edge_n)->src;
284 gimple stmt = last_stmt (cond_bb);
285 if (stmt && is_ctrl_stmt (stmt))
286 {
287 struct graph_edge *e;
288 int c = rdg_vertex_for_stmt (rdg, stmt);
289 if (c < 0)
290 continue;
291
292 e = add_edge (rdg, c, v);
293 e->data = XNEW (struct rdg_edge);
294 RDGE_TYPE (e) = control_dd;
295 }
296 }
297 }
298
299 /* Creates the edges of the reduced dependence graph RDG. */
300
301 static void
302 create_rdg_flow_edges (struct graph *rdg)
303 {
304 int i;
305 def_operand_p def_p;
306 ssa_op_iter iter;
307
308 for (i = 0; i < rdg->n_vertices; i++)
309 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
310 iter, SSA_OP_DEF)
311 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
312 }
313
314 /* Creates the edges of the reduced dependence graph RDG. */
315
316 static void
317 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
318 {
319 int i;
320
321 for (i = 0; i < rdg->n_vertices; i++)
322 {
323 gimple stmt = RDG_STMT (rdg, i);
324 if (gimple_code (stmt) == GIMPLE_PHI)
325 {
326 edge_iterator ei;
327 edge e;
328 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
329 create_edge_for_control_dependence (rdg, e->src, i, cd);
330 }
331 else
332 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
333 }
334 }
335
336 /* Build the vertices of the reduced dependence graph RDG. Return false
337 if that failed. */
338
339 static bool
340 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
341 vec<data_reference_p> *datarefs)
342 {
343 int i;
344 gimple stmt;
345
346 FOR_EACH_VEC_ELT (stmts, i, stmt)
347 {
348 struct vertex *v = &(rdg->vertices[i]);
349
350 /* Record statement to vertex mapping. */
351 gimple_set_uid (stmt, i);
352
353 v->data = XNEW (struct rdg_vertex);
354 RDGV_STMT (v) = stmt;
355 RDGV_DATAREFS (v).create (0);
356 RDGV_HAS_MEM_WRITE (v) = false;
357 RDGV_HAS_MEM_READS (v) = false;
358 if (gimple_code (stmt) == GIMPLE_PHI)
359 continue;
360
361 unsigned drp = datarefs->length ();
362 if (!find_data_references_in_stmt (loop, stmt, datarefs))
363 return false;
364 for (unsigned j = drp; j < datarefs->length (); ++j)
365 {
366 data_reference_p dr = (*datarefs)[j];
367 if (DR_IS_READ (dr))
368 RDGV_HAS_MEM_READS (v) = true;
369 else
370 RDGV_HAS_MEM_WRITE (v) = true;
371 RDGV_DATAREFS (v).safe_push (dr);
372 }
373 }
374 return true;
375 }
376
377 /* Initialize STMTS with all the statements of LOOP. The order in
378 which we discover statements is important as
379 generate_loops_for_partition is using the same traversal for
380 identifying statements in loop copies. */
381
382 static void
383 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
384 {
385 unsigned int i;
386 basic_block *bbs = get_loop_body_in_dom_order (loop);
387
388 for (i = 0; i < loop->num_nodes; i++)
389 {
390 basic_block bb = bbs[i];
391 gimple_stmt_iterator bsi;
392 gimple stmt;
393
394 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
395 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
396 stmts->safe_push (gsi_stmt (bsi));
397
398 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
399 {
400 stmt = gsi_stmt (bsi);
401 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
402 stmts->safe_push (stmt);
403 }
404 }
405
406 free (bbs);
407 }
408
409 /* Free the reduced dependence graph RDG. */
410
411 static void
412 free_rdg (struct graph *rdg)
413 {
414 int i;
415
416 for (i = 0; i < rdg->n_vertices; i++)
417 {
418 struct vertex *v = &(rdg->vertices[i]);
419 struct graph_edge *e;
420
421 for (e = v->succ; e; e = e->succ_next)
422 free (e->data);
423
424 if (v->data)
425 {
426 gimple_set_uid (RDGV_STMT (v), -1);
427 free_data_refs (RDGV_DATAREFS (v));
428 free (v->data);
429 }
430 }
431
432 free_graph (rdg);
433 }
434
435 /* Build the Reduced Dependence Graph (RDG) with one vertex per
436 statement of the loop nest LOOP_NEST, and one edge per data dependence or
437 scalar dependence. */
438
439 static struct graph *
440 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
441 {
442 struct graph *rdg;
443 vec<data_reference_p> datarefs;
444
445 /* Create the RDG vertices from the stmts of the loop nest. */
446 stack_vec<gimple, 10> stmts;
447 stmts_from_loop (loop_nest[0], &stmts);
448 rdg = new_graph (stmts.length ());
449 datarefs.create (10);
450 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
451 {
452 datarefs.release ();
453 free_rdg (rdg);
454 return NULL;
455 }
456 stmts.release ();
457
458 create_rdg_flow_edges (rdg);
459 if (cd)
460 create_rdg_cd_edges (rdg, cd);
461
462 datarefs.release ();
463
464 return rdg;
465 }
466
467
468
469 enum partition_kind {
470 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
471 };
472
473 typedef struct partition_s
474 {
475 bitmap stmts;
476 bitmap loops;
477 bool reduction_p;
478 enum partition_kind kind;
479 /* data-references a kind != PKIND_NORMAL partition is about. */
480 data_reference_p main_dr;
481 data_reference_p secondary_dr;
482 tree niter;
483 bool plus_one;
484 } *partition_t;
485
486
487 /* Allocate and initialize a partition from BITMAP. */
488
489 static partition_t
490 partition_alloc (bitmap stmts, bitmap loops)
491 {
492 partition_t partition = XCNEW (struct partition_s);
493 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
494 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
495 partition->reduction_p = false;
496 partition->kind = PKIND_NORMAL;
497 return partition;
498 }
499
500 /* Free PARTITION. */
501
502 static void
503 partition_free (partition_t partition)
504 {
505 BITMAP_FREE (partition->stmts);
506 BITMAP_FREE (partition->loops);
507 free (partition);
508 }
509
510 /* Returns true if the partition can be generated as a builtin. */
511
512 static bool
513 partition_builtin_p (partition_t partition)
514 {
515 return partition->kind != PKIND_NORMAL;
516 }
517
518 /* Returns true if the partition contains a reduction. */
519
520 static bool
521 partition_reduction_p (partition_t partition)
522 {
523 return partition->reduction_p;
524 }
525
526 /* Merge PARTITION into the partition DEST. */
527
528 static void
529 partition_merge_into (partition_t dest, partition_t partition)
530 {
531 dest->kind = PKIND_NORMAL;
532 bitmap_ior_into (dest->stmts, partition->stmts);
533 if (partition_reduction_p (partition))
534 dest->reduction_p = true;
535 }
536
537
538 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
539 the LOOP. */
540
541 static bool
542 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
543 {
544 imm_use_iterator imm_iter;
545 use_operand_p use_p;
546
547 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
548 {
549 gimple use_stmt = USE_STMT (use_p);
550 if (!is_gimple_debug (use_stmt)
551 && loop != loop_containing_stmt (use_stmt))
552 return true;
553 }
554
555 return false;
556 }
557
558 /* Returns true when STMT defines a scalar variable used after the
559 loop LOOP. */
560
561 static bool
562 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
563 {
564 def_operand_p def_p;
565 ssa_op_iter op_iter;
566
567 if (gimple_code (stmt) == GIMPLE_PHI)
568 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
569
570 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
571 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
572 return true;
573
574 return false;
575 }
576
577 /* Return a copy of LOOP placed before LOOP. */
578
579 static struct loop *
580 copy_loop_before (struct loop *loop)
581 {
582 struct loop *res;
583 edge preheader = loop_preheader_edge (loop);
584
585 initialize_original_copy_tables ();
586 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
587 gcc_assert (res != NULL);
588 free_original_copy_tables ();
589 delete_update_ssa ();
590
591 return res;
592 }
593
594 /* Creates an empty basic block after LOOP. */
595
596 static void
597 create_bb_after_loop (struct loop *loop)
598 {
599 edge exit = single_exit (loop);
600
601 if (!exit)
602 return;
603
604 split_edge (exit);
605 }
606
607 /* Generate code for PARTITION from the code in LOOP. The loop is
608 copied when COPY_P is true. All the statements not flagged in the
609 PARTITION bitmap are removed from the loop or from its copy. The
610 statements are indexed in sequence inside a basic block, and the
611 basic blocks of a loop are taken in dom order. */
612
613 static void
614 generate_loops_for_partition (struct loop *loop, partition_t partition,
615 bool copy_p)
616 {
617 unsigned i;
618 gimple_stmt_iterator bsi;
619 basic_block *bbs;
620
621 if (copy_p)
622 {
623 loop = copy_loop_before (loop);
624 gcc_assert (loop != NULL);
625 create_preheader (loop, CP_SIMPLE_PREHEADERS);
626 create_bb_after_loop (loop);
627 }
628
629 /* Remove stmts not in the PARTITION bitmap. */
630 bbs = get_loop_body_in_dom_order (loop);
631
632 if (MAY_HAVE_DEBUG_STMTS)
633 for (i = 0; i < loop->num_nodes; i++)
634 {
635 basic_block bb = bbs[i];
636
637 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
638 {
639 gimple phi = gsi_stmt (bsi);
640 if (!virtual_operand_p (gimple_phi_result (phi))
641 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
642 reset_debug_uses (phi);
643 }
644
645 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
646 {
647 gimple stmt = gsi_stmt (bsi);
648 if (gimple_code (stmt) != GIMPLE_LABEL
649 && !is_gimple_debug (stmt)
650 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
651 reset_debug_uses (stmt);
652 }
653 }
654
655 for (i = 0; i < loop->num_nodes; i++)
656 {
657 basic_block bb = bbs[i];
658
659 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
660 {
661 gimple phi = gsi_stmt (bsi);
662 if (!virtual_operand_p (gimple_phi_result (phi))
663 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
664 remove_phi_node (&bsi, true);
665 else
666 gsi_next (&bsi);
667 }
668
669 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
670 {
671 gimple stmt = gsi_stmt (bsi);
672 if (gimple_code (stmt) != GIMPLE_LABEL
673 && !is_gimple_debug (stmt)
674 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 {
676 /* Choose an arbitrary path through the empty CFG part
677 that this unnecessary control stmt controls. */
678 if (gimple_code (stmt) == GIMPLE_COND)
679 {
680 gimple_cond_make_false (stmt);
681 update_stmt (stmt);
682 }
683 else if (gimple_code (stmt) == GIMPLE_SWITCH)
684 {
685 gimple_switch_set_index
686 (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
687 update_stmt (stmt);
688 }
689 else
690 {
691 unlink_stmt_vdef (stmt);
692 gsi_remove (&bsi, true);
693 release_defs (stmt);
694 continue;
695 }
696 }
697 gsi_next (&bsi);
698 }
699 }
700
701 free (bbs);
702 }
703
704 /* Build the size argument for a memory operation call. */
705
706 static tree
707 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
708 bool plus_one)
709 {
710 tree size = fold_convert_loc (loc, sizetype, nb_iter);
711 if (plus_one)
712 size = size_binop (PLUS_EXPR, size, size_one_node);
713 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
714 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
715 size = fold_convert_loc (loc, size_type_node, size);
716 return size;
717 }
718
719 /* Build an address argument for a memory operation call. */
720
721 static tree
722 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
723 {
724 tree addr_base;
725
726 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
727 addr_base = fold_convert_loc (loc, sizetype, addr_base);
728
729 /* Test for a negative stride, iterating over every element. */
730 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
731 {
732 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
733 fold_convert_loc (loc, sizetype, nb_bytes));
734 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
735 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
736 }
737
738 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
739 }
740
741 /* If VAL memory representation contains the same value in all bytes,
742 return that value, otherwise return -1.
743 E.g. for 0x24242424 return 0x24, for IEEE double
744 747708026454360457216.0 return 0x44, etc. */
745
746 static int
747 const_with_all_bytes_same (tree val)
748 {
749 unsigned char buf[64];
750 int i, len;
751
752 if (integer_zerop (val)
753 || real_zerop (val)
754 || (TREE_CODE (val) == CONSTRUCTOR
755 && !TREE_CLOBBER_P (val)
756 && CONSTRUCTOR_NELTS (val) == 0))
757 return 0;
758
759 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
760 return -1;
761
762 len = native_encode_expr (val, buf, sizeof (buf));
763 if (len == 0)
764 return -1;
765 for (i = 1; i < len; i++)
766 if (buf[i] != buf[0])
767 return -1;
768 return buf[0];
769 }
770
771 /* Generate a call to memset for PARTITION in LOOP. */
772
773 static void
774 generate_memset_builtin (struct loop *loop, partition_t partition)
775 {
776 gimple_stmt_iterator gsi;
777 gimple stmt, fn_call;
778 tree mem, fn, nb_bytes;
779 location_t loc;
780 tree val;
781
782 stmt = DR_STMT (partition->main_dr);
783 loc = gimple_location (stmt);
784
785 /* The new statements will be placed before LOOP. */
786 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
787
788 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
789 partition->plus_one);
790 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
791 false, GSI_CONTINUE_LINKING);
792 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
793 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
794 false, GSI_CONTINUE_LINKING);
795
796 /* This exactly matches the pattern recognition in classify_partition. */
797 val = gimple_assign_rhs1 (stmt);
798 /* Handle constants like 0x15151515 and similarly
799 floating point constants etc. where all bytes are the same. */
800 int bytev = const_with_all_bytes_same (val);
801 if (bytev != -1)
802 val = build_int_cst (integer_type_node, bytev);
803 else if (TREE_CODE (val) == INTEGER_CST)
804 val = fold_convert (integer_type_node, val);
805 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
806 {
807 gimple cstmt;
808 tree tem = make_ssa_name (integer_type_node, NULL);
809 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
810 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
811 val = tem;
812 }
813
814 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
815 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
816 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
817
818 if (dump_file && (dump_flags & TDF_DETAILS))
819 {
820 fprintf (dump_file, "generated memset");
821 if (bytev == 0)
822 fprintf (dump_file, " zero\n");
823 else
824 fprintf (dump_file, "\n");
825 }
826 }
827
828 /* Generate a call to memcpy for PARTITION in LOOP. */
829
830 static void
831 generate_memcpy_builtin (struct loop *loop, partition_t partition)
832 {
833 gimple_stmt_iterator gsi;
834 gimple stmt, fn_call;
835 tree dest, src, fn, nb_bytes;
836 location_t loc;
837 enum built_in_function kind;
838
839 stmt = DR_STMT (partition->main_dr);
840 loc = gimple_location (stmt);
841
842 /* The new statements will be placed before LOOP. */
843 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
844
845 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
846 partition->plus_one);
847 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
848 false, GSI_CONTINUE_LINKING);
849 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
850 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
851 if (ptr_derefs_may_alias_p (dest, src))
852 kind = BUILT_IN_MEMMOVE;
853 else
854 kind = BUILT_IN_MEMCPY;
855
856 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
857 false, GSI_CONTINUE_LINKING);
858 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
859 false, GSI_CONTINUE_LINKING);
860 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
861 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
862 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
863
864 if (dump_file && (dump_flags & TDF_DETAILS))
865 {
866 if (kind == BUILT_IN_MEMCPY)
867 fprintf (dump_file, "generated memcpy\n");
868 else
869 fprintf (dump_file, "generated memmove\n");
870 }
871 }
872
873 /* Remove and destroy the loop LOOP. */
874
875 static void
876 destroy_loop (struct loop *loop)
877 {
878 unsigned nbbs = loop->num_nodes;
879 edge exit = single_exit (loop);
880 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
881 basic_block *bbs;
882 unsigned i;
883
884 bbs = get_loop_body_in_dom_order (loop);
885
886 redirect_edge_pred (exit, src);
887 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
888 exit->flags |= EDGE_FALLTHRU;
889 cancel_loop_tree (loop);
890 rescan_loop_exit (exit, false, true);
891
892 for (i = 0; i < nbbs; i++)
893 {
894 /* We have made sure to not leave any dangling uses of SSA
895 names defined in the loop. With the exception of virtuals.
896 Make sure we replace all uses of virtual defs that will remain
897 outside of the loop with the bare symbol as delete_basic_block
898 will release them. */
899 gimple_stmt_iterator gsi;
900 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
901 {
902 gimple phi = gsi_stmt (gsi);
903 if (virtual_operand_p (gimple_phi_result (phi)))
904 mark_virtual_phi_result_for_renaming (phi);
905 }
906 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
907 {
908 gimple stmt = gsi_stmt (gsi);
909 tree vdef = gimple_vdef (stmt);
910 if (vdef && TREE_CODE (vdef) == SSA_NAME)
911 mark_virtual_operand_for_renaming (vdef);
912 }
913 delete_basic_block (bbs[i]);
914 }
915 free (bbs);
916
917 set_immediate_dominator (CDI_DOMINATORS, dest,
918 recompute_dominator (CDI_DOMINATORS, dest));
919 }
920
921 /* Generates code for PARTITION. */
922
923 static void
924 generate_code_for_partition (struct loop *loop,
925 partition_t partition, bool copy_p)
926 {
927 switch (partition->kind)
928 {
929 case PKIND_NORMAL:
930 /* Reductions all have to be in the last partition. */
931 gcc_assert (!partition_reduction_p (partition)
932 || !copy_p);
933 generate_loops_for_partition (loop, partition, copy_p);
934 return;
935
936 case PKIND_MEMSET:
937 generate_memset_builtin (loop, partition);
938 break;
939
940 case PKIND_MEMCPY:
941 generate_memcpy_builtin (loop, partition);
942 break;
943
944 default:
945 gcc_unreachable ();
946 }
947
948 /* Common tail for partitions we turn into a call. If this was the last
949 partition for which we generate code, we have to destroy the loop. */
950 if (!copy_p)
951 destroy_loop (loop);
952 }
953
954
955 /* Returns a partition with all the statements needed for computing
956 the vertex V of the RDG, also including the loop exit conditions. */
957
958 static partition_t
959 build_rdg_partition_for_vertex (struct graph *rdg, int v)
960 {
961 partition_t partition = partition_alloc (NULL, NULL);
962 stack_vec<int, 3> nodes;
963 unsigned i;
964 int x;
965
966 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
967
968 FOR_EACH_VEC_ELT (nodes, i, x)
969 {
970 bitmap_set_bit (partition->stmts, x);
971 bitmap_set_bit (partition->loops,
972 loop_containing_stmt (RDG_STMT (rdg, x))->num);
973 }
974
975 return partition;
976 }
977
978 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
979 For the moment we detect only the memset zero pattern. */
980
981 static void
982 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
983 {
984 bitmap_iterator bi;
985 unsigned i;
986 tree nb_iter;
987 data_reference_p single_load, single_store;
988 bool volatiles_p = false;
989 bool plus_one = false;
990
991 partition->kind = PKIND_NORMAL;
992 partition->main_dr = NULL;
993 partition->secondary_dr = NULL;
994 partition->niter = NULL_TREE;
995 partition->plus_one = false;
996
997 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
998 {
999 gimple stmt = RDG_STMT (rdg, i);
1000
1001 if (gimple_has_volatile_ops (stmt))
1002 volatiles_p = true;
1003
1004 /* If the stmt has uses outside of the loop mark it as reduction. */
1005 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1006 {
1007 partition->reduction_p = true;
1008 return;
1009 }
1010 }
1011
1012 /* Perform general partition disqualification for builtins. */
1013 if (volatiles_p
1014 || !flag_tree_loop_distribute_patterns)
1015 return;
1016
1017 /* Detect memset and memcpy. */
1018 single_load = NULL;
1019 single_store = NULL;
1020 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1021 {
1022 gimple stmt = RDG_STMT (rdg, i);
1023 data_reference_p dr;
1024 unsigned j;
1025
1026 if (gimple_code (stmt) == GIMPLE_PHI)
1027 continue;
1028
1029 /* Any scalar stmts are ok. */
1030 if (!gimple_vuse (stmt))
1031 continue;
1032
1033 /* Otherwise just regular loads/stores. */
1034 if (!gimple_assign_single_p (stmt))
1035 return;
1036
1037 /* But exactly one store and/or load. */
1038 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1039 {
1040 if (DR_IS_READ (dr))
1041 {
1042 if (single_load != NULL)
1043 return;
1044 single_load = dr;
1045 }
1046 else
1047 {
1048 if (single_store != NULL)
1049 return;
1050 single_store = dr;
1051 }
1052 }
1053 }
1054
1055 if (!single_store)
1056 return;
1057
1058 nb_iter = number_of_latch_executions (loop);
1059 if (!nb_iter || nb_iter == chrec_dont_know)
1060 return;
1061 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1062 gimple_bb (DR_STMT (single_store))))
1063 plus_one = true;
1064
1065 if (single_store && !single_load)
1066 {
1067 gimple stmt = DR_STMT (single_store);
1068 tree rhs = gimple_assign_rhs1 (stmt);
1069 if (const_with_all_bytes_same (rhs) == -1
1070 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1071 || (TYPE_MODE (TREE_TYPE (rhs))
1072 != TYPE_MODE (unsigned_char_type_node))))
1073 return;
1074 if (TREE_CODE (rhs) == SSA_NAME
1075 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1076 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1077 return;
1078 if (!adjacent_dr_p (single_store)
1079 || !dominated_by_p (CDI_DOMINATORS,
1080 loop->latch, gimple_bb (stmt)))
1081 return;
1082 partition->kind = PKIND_MEMSET;
1083 partition->main_dr = single_store;
1084 partition->niter = nb_iter;
1085 partition->plus_one = plus_one;
1086 }
1087 else if (single_store && single_load)
1088 {
1089 gimple store = DR_STMT (single_store);
1090 gimple load = DR_STMT (single_load);
1091 /* Direct aggregate copy or via an SSA name temporary. */
1092 if (load != store
1093 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1094 return;
1095 if (!adjacent_dr_p (single_store)
1096 || !adjacent_dr_p (single_load)
1097 || !operand_equal_p (DR_STEP (single_store),
1098 DR_STEP (single_load), 0)
1099 || !dominated_by_p (CDI_DOMINATORS,
1100 loop->latch, gimple_bb (store)))
1101 return;
1102 /* Now check that if there is a dependence this dependence is
1103 of a suitable form for memmove. */
1104 vec<loop_p> loops = vNULL;
1105 ddr_p ddr;
1106 loops.safe_push (loop);
1107 ddr = initialize_data_dependence_relation (single_load, single_store,
1108 loops);
1109 compute_affine_dependence (ddr, loop);
1110 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1111 {
1112 free_dependence_relation (ddr);
1113 loops.release ();
1114 return;
1115 }
1116 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1117 {
1118 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1119 {
1120 free_dependence_relation (ddr);
1121 loops.release ();
1122 return;
1123 }
1124 lambda_vector dist_v;
1125 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1126 {
1127 int dist = dist_v[index_in_loop_nest (loop->num,
1128 DDR_LOOP_NEST (ddr))];
1129 if (dist > 0 && !DDR_REVERSED_P (ddr))
1130 {
1131 free_dependence_relation (ddr);
1132 loops.release ();
1133 return;
1134 }
1135 }
1136 }
1137 free_dependence_relation (ddr);
1138 loops.release ();
1139 partition->kind = PKIND_MEMCPY;
1140 partition->main_dr = single_store;
1141 partition->secondary_dr = single_load;
1142 partition->niter = nb_iter;
1143 partition->plus_one = plus_one;
1144 }
1145 }
1146
1147 /* For a data reference REF, return the declaration of its base
1148 address or NULL_TREE if the base is not determined. */
1149
1150 static tree
1151 ref_base_address (data_reference_p dr)
1152 {
1153 tree base_address = DR_BASE_ADDRESS (dr);
1154 if (base_address
1155 && TREE_CODE (base_address) == ADDR_EXPR)
1156 return TREE_OPERAND (base_address, 0);
1157
1158 return base_address;
1159 }
1160
1161 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1162 accesses in RDG. */
1163
1164 static bool
1165 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1166 partition_t partition2)
1167 {
1168 unsigned i, j, k, l;
1169 bitmap_iterator bi, bj;
1170 data_reference_p ref1, ref2;
1171
1172 /* First check whether in the intersection of the two partitions are
1173 any loads or stores. Common loads are the situation that happens
1174 most often. */
1175 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1176 if (RDG_MEM_WRITE_STMT (rdg, i)
1177 || RDG_MEM_READS_STMT (rdg, i))
1178 return true;
1179
1180 /* Then check all data-references against each other. */
1181 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1182 if (RDG_MEM_WRITE_STMT (rdg, i)
1183 || RDG_MEM_READS_STMT (rdg, i))
1184 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1185 if (RDG_MEM_WRITE_STMT (rdg, j)
1186 || RDG_MEM_READS_STMT (rdg, j))
1187 {
1188 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1189 {
1190 tree base1 = ref_base_address (ref1);
1191 if (base1)
1192 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1193 if (base1 == ref_base_address (ref2))
1194 return true;
1195 }
1196 }
1197
1198 return false;
1199 }
1200
1201 /* Aggregate several components into a useful partition that is
1202 registered in the PARTITIONS vector. Partitions will be
1203 distributed in different loops. */
1204
1205 static void
1206 rdg_build_partitions (struct graph *rdg,
1207 vec<gimple> starting_stmts,
1208 vec<partition_t> *partitions)
1209 {
1210 bitmap processed = BITMAP_ALLOC (NULL);
1211 int i;
1212 gimple stmt;
1213
1214 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1215 {
1216 int v = rdg_vertex_for_stmt (rdg, stmt);
1217
1218 if (dump_file && (dump_flags & TDF_DETAILS))
1219 fprintf (dump_file,
1220 "ldist asked to generate code for vertex %d\n", v);
1221
1222 /* If the vertex is already contained in another partition so
1223 is the partition rooted at it. */
1224 if (bitmap_bit_p (processed, v))
1225 continue;
1226
1227 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1228 bitmap_ior_into (processed, partition->stmts);
1229
1230 if (dump_file && (dump_flags & TDF_DETAILS))
1231 {
1232 fprintf (dump_file, "ldist useful partition:\n");
1233 dump_bitmap (dump_file, partition->stmts);
1234 }
1235
1236 partitions->safe_push (partition);
1237 }
1238
1239 /* All vertices should have been assigned to at least one partition now,
1240 other than vertices belonging to dead code. */
1241
1242 BITMAP_FREE (processed);
1243 }
1244
1245 /* Dump to FILE the PARTITIONS. */
1246
1247 static void
1248 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1249 {
1250 int i;
1251 partition_t partition;
1252
1253 FOR_EACH_VEC_ELT (partitions, i, partition)
1254 debug_bitmap_file (file, partition->stmts);
1255 }
1256
1257 /* Debug PARTITIONS. */
1258 extern void debug_rdg_partitions (vec<partition_t> );
1259
1260 DEBUG_FUNCTION void
1261 debug_rdg_partitions (vec<partition_t> partitions)
1262 {
1263 dump_rdg_partitions (stderr, partitions);
1264 }
1265
1266 /* Returns the number of read and write operations in the RDG. */
1267
1268 static int
1269 number_of_rw_in_rdg (struct graph *rdg)
1270 {
1271 int i, res = 0;
1272
1273 for (i = 0; i < rdg->n_vertices; i++)
1274 {
1275 if (RDG_MEM_WRITE_STMT (rdg, i))
1276 ++res;
1277
1278 if (RDG_MEM_READS_STMT (rdg, i))
1279 ++res;
1280 }
1281
1282 return res;
1283 }
1284
1285 /* Returns the number of read and write operations in a PARTITION of
1286 the RDG. */
1287
1288 static int
1289 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1290 {
1291 int res = 0;
1292 unsigned i;
1293 bitmap_iterator ii;
1294
1295 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1296 {
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1298 ++res;
1299
1300 if (RDG_MEM_READS_STMT (rdg, i))
1301 ++res;
1302 }
1303
1304 return res;
1305 }
1306
1307 /* Returns true when one of the PARTITIONS contains all the read or
1308 write operations of RDG. */
1309
1310 static bool
1311 partition_contains_all_rw (struct graph *rdg,
1312 vec<partition_t> partitions)
1313 {
1314 int i;
1315 partition_t partition;
1316 int nrw = number_of_rw_in_rdg (rdg);
1317
1318 FOR_EACH_VEC_ELT (partitions, i, partition)
1319 if (nrw == number_of_rw_in_partition (rdg, partition))
1320 return true;
1321
1322 return false;
1323 }
1324
1325 /* Compute partition dependence created by the data references in DRS1
1326 and DRS2 and modify and return DIR according to that. */
1327
1328 static int
1329 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1330 vec<data_reference_p> drs1,
1331 vec<data_reference_p> drs2)
1332 {
1333 data_reference_p dr1, dr2;
1334
1335 /* dependence direction - 0 is no dependence, -1 is back,
1336 1 is forth, 2 is both (we can stop then, merging will occur). */
1337 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1338 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1339 {
1340 int this_dir = 1;
1341 ddr_p ddr;
1342 /* Re-shuffle data-refs to be in dominator order. */
1343 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1344 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1345 {
1346 data_reference_p tem = dr1;
1347 dr1 = dr2;
1348 dr2 = tem;
1349 this_dir = -this_dir;
1350 }
1351 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1352 compute_affine_dependence (ddr, loops[0]);
1353 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1354 this_dir = 2;
1355 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1356 {
1357 if (DDR_REVERSED_P (ddr))
1358 {
1359 data_reference_p tem = dr1;
1360 dr1 = dr2;
1361 dr2 = tem;
1362 this_dir = -this_dir;
1363 }
1364 /* Known dependences can still be unordered througout the
1365 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1366 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1367 this_dir = 2;
1368 /* If the overlap is exact preserve stmt order. */
1369 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1370 ;
1371 else
1372 {
1373 /* Else as the distance vector is lexicographic positive
1374 swap the dependence direction. */
1375 this_dir = -this_dir;
1376 }
1377 }
1378 else
1379 this_dir = 0;
1380 free_dependence_relation (ddr);
1381 if (dir == 0)
1382 dir = this_dir;
1383 else if (dir != this_dir)
1384 return 2;
1385 }
1386 return dir;
1387 }
1388
1389 /* Compare postorder number of the partition graph vertices V1 and V2. */
1390
1391 static int
1392 pgcmp (const void *v1_, const void *v2_)
1393 {
1394 const vertex *v1 = (const vertex *)v1_;
1395 const vertex *v2 = (const vertex *)v2_;
1396 return v2->post - v1->post;
1397 }
1398
1399 /* Distributes the code from LOOP in such a way that producer
1400 statements are placed before consumer statements. Tries to separate
1401 only the statements from STMTS into separate loops.
1402 Returns the number of distributed loops. */
1403
1404 static int
1405 distribute_loop (struct loop *loop, vec<gimple> stmts,
1406 control_dependences *cd, int *nb_calls)
1407 {
1408 struct graph *rdg;
1409 partition_t partition;
1410 bool any_builtin;
1411 int i, nbp;
1412 graph *pg = NULL;
1413 int num_sccs = 1;
1414
1415 *nb_calls = 0;
1416 stack_vec<loop_p, 3> loop_nest;
1417 if (!find_loop_nest (loop, &loop_nest))
1418 return 0;
1419
1420 rdg = build_rdg (loop_nest, cd);
1421 if (!rdg)
1422 {
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1424 fprintf (dump_file,
1425 "Loop %d not distributed: failed to build the RDG.\n",
1426 loop->num);
1427
1428 return 0;
1429 }
1430
1431 if (dump_file && (dump_flags & TDF_DETAILS))
1432 dump_rdg (dump_file, rdg);
1433
1434 stack_vec<partition_t, 3> partitions;
1435 rdg_build_partitions (rdg, stmts, &partitions);
1436
1437 any_builtin = false;
1438 FOR_EACH_VEC_ELT (partitions, i, partition)
1439 {
1440 classify_partition (loop, rdg, partition);
1441 any_builtin |= partition_builtin_p (partition);
1442 }
1443
1444 /* If we are only distributing patterns but did not detect any,
1445 simply bail out. */
1446 if (!flag_tree_loop_distribution
1447 && !any_builtin)
1448 {
1449 nbp = 0;
1450 goto ldist_done;
1451 }
1452
1453 /* If we are only distributing patterns fuse all partitions that
1454 were not classified as builtins. This also avoids chopping
1455 a loop into pieces, separated by builtin calls. That is, we
1456 only want no or a single loop body remaining. */
1457 partition_t into;
1458 if (!flag_tree_loop_distribution)
1459 {
1460 for (i = 0; partitions.iterate (i, &into); ++i)
1461 if (!partition_builtin_p (into))
1462 break;
1463 for (++i; partitions.iterate (i, &partition); ++i)
1464 if (!partition_builtin_p (partition))
1465 {
1466 if (dump_file && (dump_flags & TDF_DETAILS))
1467 {
1468 fprintf (dump_file, "fusing non-builtin partitions\n");
1469 dump_bitmap (dump_file, into->stmts);
1470 dump_bitmap (dump_file, partition->stmts);
1471 }
1472 partition_merge_into (into, partition);
1473 partitions.unordered_remove (i);
1474 partition_free (partition);
1475 i--;
1476 }
1477 }
1478
1479 /* Due to limitations in the transform phase we have to fuse all
1480 reduction partitions into the last partition so the existing
1481 loop will contain all loop-closed PHI nodes. */
1482 for (i = 0; partitions.iterate (i, &into); ++i)
1483 if (partition_reduction_p (into))
1484 break;
1485 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1486 if (partition_reduction_p (partition))
1487 {
1488 if (dump_file && (dump_flags & TDF_DETAILS))
1489 {
1490 fprintf (dump_file, "fusing partitions\n");
1491 dump_bitmap (dump_file, into->stmts);
1492 dump_bitmap (dump_file, partition->stmts);
1493 fprintf (dump_file, "because they have reductions\n");
1494 }
1495 partition_merge_into (into, partition);
1496 partitions.unordered_remove (i);
1497 partition_free (partition);
1498 i--;
1499 }
1500
1501 /* Apply our simple cost model - fuse partitions with similar
1502 memory accesses. */
1503 for (i = 0; partitions.iterate (i, &into); ++i)
1504 {
1505 if (partition_builtin_p (into))
1506 continue;
1507 for (int j = i + 1;
1508 partitions.iterate (j, &partition); ++j)
1509 {
1510 if (!partition_builtin_p (partition)
1511 && similar_memory_accesses (rdg, into, partition))
1512 {
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1514 {
1515 fprintf (dump_file, "fusing partitions\n");
1516 dump_bitmap (dump_file, into->stmts);
1517 dump_bitmap (dump_file, partition->stmts);
1518 fprintf (dump_file, "because they have similar "
1519 "memory accesses\n");
1520 }
1521 partition_merge_into (into, partition);
1522 partitions.unordered_remove (j);
1523 partition_free (partition);
1524 j--;
1525 }
1526 }
1527 }
1528
1529 /* Build the partition dependency graph. */
1530 if (partitions.length () > 1)
1531 {
1532 pg = new_graph (partitions.length ());
1533 struct pgdata {
1534 partition_t partition;
1535 vec<data_reference_p> writes;
1536 vec<data_reference_p> reads;
1537 };
1538 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1539 for (i = 0; partitions.iterate (i, &partition); ++i)
1540 {
1541 vertex *v = &pg->vertices[i];
1542 pgdata *data = new pgdata;
1543 data_reference_p dr;
1544 /* FIXME - leaks. */
1545 v->data = data;
1546 bitmap_iterator bi;
1547 unsigned j;
1548 data->partition = partition;
1549 data->reads = vNULL;
1550 data->writes = vNULL;
1551 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1552 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1553 if (DR_IS_READ (dr))
1554 data->reads.safe_push (dr);
1555 else
1556 data->writes.safe_push (dr);
1557 }
1558 partition_t partition1, partition2;
1559 for (i = 0; partitions.iterate (i, &partition1); ++i)
1560 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1561 {
1562 /* dependence direction - 0 is no dependence, -1 is back,
1563 1 is forth, 2 is both (we can stop then, merging will occur). */
1564 int dir = 0;
1565 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1566 PGDATA(i)->writes,
1567 PGDATA(j)->reads);
1568 if (dir != 2)
1569 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1570 PGDATA(i)->reads,
1571 PGDATA(j)->writes);
1572 if (dir != 2)
1573 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1574 PGDATA(i)->writes,
1575 PGDATA(j)->writes);
1576 if (dir == 1 || dir == 2)
1577 add_edge (pg, i, j);
1578 if (dir == -1 || dir == 2)
1579 add_edge (pg, j, i);
1580 }
1581
1582 /* Add edges to the reduction partition (if any) to force it last. */
1583 unsigned j;
1584 for (j = 0; partitions.iterate (j, &partition); ++j)
1585 if (partition_reduction_p (partition))
1586 break;
1587 if (j < partitions.length ())
1588 {
1589 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1590 if (i != j)
1591 add_edge (pg, i, j);
1592 }
1593
1594 /* Compute partitions we cannot separate and fuse them. */
1595 num_sccs = graphds_scc (pg, NULL);
1596 for (i = 0; i < num_sccs; ++i)
1597 {
1598 partition_t first;
1599 int j;
1600 for (j = 0; partitions.iterate (j, &first); ++j)
1601 if (pg->vertices[j].component == i)
1602 break;
1603 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1604 if (pg->vertices[j].component == i)
1605 {
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 {
1608 fprintf (dump_file, "fusing partitions\n");
1609 dump_bitmap (dump_file, first->stmts);
1610 dump_bitmap (dump_file, partition->stmts);
1611 fprintf (dump_file, "because they are in the same "
1612 "dependence SCC\n");
1613 }
1614 partition_merge_into (first, partition);
1615 partitions[j] = NULL;
1616 partition_free (partition);
1617 PGDATA (j)->partition = NULL;
1618 }
1619 }
1620
1621 /* Now order the remaining nodes in postorder. */
1622 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1623 partitions.truncate (0);
1624 for (i = 0; i < pg->n_vertices; ++i)
1625 {
1626 pgdata *data = PGDATA (i);
1627 if (data->partition)
1628 partitions.safe_push (data->partition);
1629 data->reads.release ();
1630 data->writes.release ();
1631 delete data;
1632 }
1633 gcc_assert (partitions.length () == (unsigned)num_sccs);
1634 free_graph (pg);
1635 }
1636
1637 nbp = partitions.length ();
1638 if (nbp == 0
1639 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1640 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1641 {
1642 nbp = 0;
1643 goto ldist_done;
1644 }
1645
1646 if (dump_file && (dump_flags & TDF_DETAILS))
1647 dump_rdg_partitions (dump_file, partitions);
1648
1649 FOR_EACH_VEC_ELT (partitions, i, partition)
1650 {
1651 if (partition_builtin_p (partition))
1652 (*nb_calls)++;
1653 generate_code_for_partition (loop, partition, i < nbp - 1);
1654 }
1655
1656 ldist_done:
1657
1658 FOR_EACH_VEC_ELT (partitions, i, partition)
1659 partition_free (partition);
1660
1661 free_rdg (rdg);
1662 return nbp - *nb_calls;
1663 }
1664
1665 /* Distribute all loops in the current function. */
1666
1667 static unsigned int
1668 tree_loop_distribution (void)
1669 {
1670 struct loop *loop;
1671 bool changed = false;
1672 basic_block bb;
1673 control_dependences *cd = NULL;
1674
1675 FOR_ALL_BB (bb)
1676 {
1677 gimple_stmt_iterator gsi;
1678 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1679 gimple_set_uid (gsi_stmt (gsi), -1);
1680 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1681 gimple_set_uid (gsi_stmt (gsi), -1);
1682 }
1683
1684 /* We can at the moment only distribute non-nested loops, thus restrict
1685 walking to innermost loops. */
1686 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1687 {
1688 auto_vec<gimple> work_list;
1689 basic_block *bbs;
1690 int num = loop->num;
1691 unsigned int i;
1692
1693 /* If the loop doesn't have a single exit we will fail anyway,
1694 so do that early. */
1695 if (!single_exit (loop))
1696 continue;
1697
1698 /* Only optimize hot loops. */
1699 if (!optimize_loop_for_speed_p (loop))
1700 continue;
1701
1702 /* Initialize the worklist with stmts we seed the partitions with. */
1703 bbs = get_loop_body_in_dom_order (loop);
1704 for (i = 0; i < loop->num_nodes; ++i)
1705 {
1706 gimple_stmt_iterator gsi;
1707 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1708 {
1709 gimple phi = gsi_stmt (gsi);
1710 if (virtual_operand_p (gimple_phi_result (phi)))
1711 continue;
1712 /* Distribute stmts which have defs that are used outside of
1713 the loop. */
1714 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1715 continue;
1716 work_list.safe_push (phi);
1717 }
1718 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1719 {
1720 gimple stmt = gsi_stmt (gsi);
1721
1722 /* If there is a stmt with side-effects bail out - we
1723 cannot and should not distribute this loop. */
1724 if (gimple_has_side_effects (stmt))
1725 {
1726 work_list.truncate (0);
1727 goto out;
1728 }
1729
1730 /* Distribute stmts which have defs that are used outside of
1731 the loop. */
1732 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1733 ;
1734 /* Otherwise only distribute stores for now. */
1735 else if (!gimple_vdef (stmt))
1736 continue;
1737
1738 work_list.safe_push (stmt);
1739 }
1740 }
1741 out:
1742 free (bbs);
1743
1744 int nb_generated_loops = 0;
1745 int nb_generated_calls = 0;
1746 location_t loc = find_loop_location (loop);
1747 if (work_list.length () > 0)
1748 {
1749 if (!cd)
1750 {
1751 calculate_dominance_info (CDI_DOMINATORS);
1752 calculate_dominance_info (CDI_POST_DOMINATORS);
1753 cd = new control_dependences (create_edge_list ());
1754 free_dominance_info (CDI_POST_DOMINATORS);
1755 }
1756 nb_generated_loops = distribute_loop (loop, work_list, cd,
1757 &nb_generated_calls);
1758 }
1759
1760 if (nb_generated_loops + nb_generated_calls > 0)
1761 {
1762 changed = true;
1763 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1764 loc, "Loop %d distributed: split to %d loops "
1765 "and %d library calls.\n",
1766 num, nb_generated_loops, nb_generated_calls);
1767 }
1768 else if (dump_file && (dump_flags & TDF_DETAILS))
1769 fprintf (dump_file, "Loop %d is the same.\n", num);
1770 }
1771
1772 if (cd)
1773 delete cd;
1774
1775 if (changed)
1776 {
1777 mark_virtual_operands_for_renaming (cfun);
1778 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1779 }
1780
1781 #ifdef ENABLE_CHECKING
1782 verify_loop_structure ();
1783 #endif
1784
1785 return 0;
1786 }
1787
1788 static bool
1789 gate_tree_loop_distribution (void)
1790 {
1791 return flag_tree_loop_distribution
1792 || flag_tree_loop_distribute_patterns;
1793 }
1794
1795 namespace {
1796
1797 const pass_data pass_data_loop_distribution =
1798 {
1799 GIMPLE_PASS, /* type */
1800 "ldist", /* name */
1801 OPTGROUP_LOOP, /* optinfo_flags */
1802 true, /* has_gate */
1803 true, /* has_execute */
1804 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1805 ( PROP_cfg | PROP_ssa ), /* properties_required */
1806 0, /* properties_provided */
1807 0, /* properties_destroyed */
1808 0, /* todo_flags_start */
1809 TODO_verify_ssa, /* todo_flags_finish */
1810 };
1811
1812 class pass_loop_distribution : public gimple_opt_pass
1813 {
1814 public:
1815 pass_loop_distribution (gcc::context *ctxt)
1816 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1817 {}
1818
1819 /* opt_pass methods: */
1820 bool gate () { return gate_tree_loop_distribution (); }
1821 unsigned int execute () { return tree_loop_distribution (); }
1822
1823 }; // class pass_loop_distribution
1824
1825 } // anon namespace
1826
1827 gimple_opt_pass *
1828 make_pass_loop_distribution (gcc::context *ctxt)
1829 {
1830 return new pass_loop_distribution (ctxt);
1831 }