Daily bump.
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* This pass performs loop distribution: for example, the loop
24
25 |DO I = 2, N
26 | A(I) = B(I) + C
27 | D(I) = A(I-1)*E
28 |ENDDO
29
30 is transformed to
31
32 |DOALL I = 2, N
33 | A(I) = B(I) + C
34 |ENDDO
35 |
36 |DOALL I = 2, N
37 | D(I) = A(I-1)*E
38 |ENDDO
39
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
44
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tree-flow.h"
49 #include "cfgloop.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
54
55 enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
56
57 typedef struct partition_s
58 {
59 bitmap stmts;
60 bool has_writes;
61 enum partition_kind kind;
62 /* data-references a kind != PKIND_NORMAL partition is about. */
63 data_reference_p main_dr;
64 data_reference_p secondary_dr;
65 } *partition_t;
66
67
68 /* Allocate and initialize a partition from BITMAP. */
69
70 static partition_t
71 partition_alloc (bitmap stmts)
72 {
73 partition_t partition = XCNEW (struct partition_s);
74 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
75 partition->has_writes = false;
76 partition->kind = PKIND_NORMAL;
77 return partition;
78 }
79
80 /* Free PARTITION. */
81
82 static void
83 partition_free (partition_t partition)
84 {
85 BITMAP_FREE (partition->stmts);
86 free (partition);
87 }
88
89 /* Returns true if the partition can be generated as a builtin. */
90
91 static bool
92 partition_builtin_p (partition_t partition)
93 {
94 return partition->kind != PKIND_NORMAL;
95 }
96
97 /* Returns true if the partition has an writes. */
98
99 static bool
100 partition_has_writes (partition_t partition)
101 {
102 return partition->has_writes;
103 }
104
105 /* If bit I is not set, it means that this node represents an
106 operation that has already been performed, and that should not be
107 performed again. This is the subgraph of remaining important
108 computations that is passed to the DFS algorithm for avoiding to
109 include several times the same stores in different loops. */
110 static bitmap remaining_stmts;
111
112 /* A node of the RDG is marked in this bitmap when it has as a
113 predecessor a node that writes to memory. */
114 static bitmap upstream_mem_writes;
115
116 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
117 the LOOP. */
118
119 static bool
120 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
121 {
122 imm_use_iterator imm_iter;
123 use_operand_p use_p;
124
125 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
126 {
127 gimple use_stmt = USE_STMT (use_p);
128 if (!is_gimple_debug (use_stmt)
129 && loop != loop_containing_stmt (use_stmt))
130 return true;
131 }
132
133 return false;
134 }
135
136 /* Returns true when STMT defines a scalar variable used after the
137 loop LOOP. */
138
139 static bool
140 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
141 {
142 def_operand_p def_p;
143 ssa_op_iter op_iter;
144
145 if (gimple_code (stmt) == GIMPLE_PHI)
146 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
147
148 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
149 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
150 return true;
151
152 return false;
153 }
154
155 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
156 ORIG_LOOP. */
157
158 static void
159 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
160 {
161 tree new_ssa_name;
162 gimple_stmt_iterator si_new, si_orig;
163 edge orig_loop_latch = loop_latch_edge (orig_loop);
164 edge orig_entry_e = loop_preheader_edge (orig_loop);
165 edge new_loop_entry_e = loop_preheader_edge (new_loop);
166
167 /* Scan the phis in the headers of the old and new loops
168 (they are organized in exactly the same order). */
169 for (si_new = gsi_start_phis (new_loop->header),
170 si_orig = gsi_start_phis (orig_loop->header);
171 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
172 gsi_next (&si_new), gsi_next (&si_orig))
173 {
174 tree def;
175 source_location locus;
176 gimple phi_new = gsi_stmt (si_new);
177 gimple phi_orig = gsi_stmt (si_orig);
178
179 /* Add the first phi argument for the phi in NEW_LOOP (the one
180 associated with the entry of NEW_LOOP) */
181 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
182 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
183 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
184
185 /* Add the second phi argument for the phi in NEW_LOOP (the one
186 associated with the latch of NEW_LOOP) */
187 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
188 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
189
190 if (TREE_CODE (def) == SSA_NAME)
191 {
192 new_ssa_name = get_current_def (def);
193
194 if (!new_ssa_name)
195 /* This only happens if there are no definitions inside the
196 loop. Use the the invariant in the new loop as is. */
197 new_ssa_name = def;
198 }
199 else
200 /* Could be an integer. */
201 new_ssa_name = def;
202
203 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
204 }
205 }
206
207 /* Return a copy of LOOP placed before LOOP. */
208
209 static struct loop *
210 copy_loop_before (struct loop *loop)
211 {
212 struct loop *res;
213 edge preheader = loop_preheader_edge (loop);
214
215 initialize_original_copy_tables ();
216 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
217 gcc_assert (res != NULL);
218 free_original_copy_tables ();
219
220 update_phis_for_loop_copy (loop, res);
221 rename_variables_in_loop (res);
222
223 return res;
224 }
225
226 /* Creates an empty basic block after LOOP. */
227
228 static void
229 create_bb_after_loop (struct loop *loop)
230 {
231 edge exit = single_exit (loop);
232
233 if (!exit)
234 return;
235
236 split_edge (exit);
237 }
238
239 /* Generate code for PARTITION from the code in LOOP. The loop is
240 copied when COPY_P is true. All the statements not flagged in the
241 PARTITION bitmap are removed from the loop or from its copy. The
242 statements are indexed in sequence inside a basic block, and the
243 basic blocks of a loop are taken in dom order. */
244
245 static void
246 generate_loops_for_partition (struct loop *loop, partition_t partition,
247 bool copy_p)
248 {
249 unsigned i, x;
250 gimple_stmt_iterator bsi;
251 basic_block *bbs;
252
253 if (copy_p)
254 {
255 loop = copy_loop_before (loop);
256 gcc_assert (loop != NULL);
257 create_preheader (loop, CP_SIMPLE_PREHEADERS);
258 create_bb_after_loop (loop);
259 }
260
261 /* Remove stmts not in the PARTITION bitmap. The order in which we
262 visit the phi nodes and the statements is exactly as in
263 stmts_from_loop. */
264 bbs = get_loop_body_in_dom_order (loop);
265
266 if (MAY_HAVE_DEBUG_STMTS)
267 for (x = 0, i = 0; i < loop->num_nodes; i++)
268 {
269 basic_block bb = bbs[i];
270
271 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
272 if (!bitmap_bit_p (partition->stmts, x++))
273 reset_debug_uses (gsi_stmt (bsi));
274
275 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
276 {
277 gimple stmt = gsi_stmt (bsi);
278 if (gimple_code (stmt) != GIMPLE_LABEL
279 && !is_gimple_debug (stmt)
280 && !bitmap_bit_p (partition->stmts, x++))
281 reset_debug_uses (stmt);
282 }
283 }
284
285 for (x = 0, i = 0; i < loop->num_nodes; i++)
286 {
287 basic_block bb = bbs[i];
288
289 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
290 if (!bitmap_bit_p (partition->stmts, x++))
291 {
292 gimple phi = gsi_stmt (bsi);
293 if (virtual_operand_p (gimple_phi_result (phi)))
294 mark_virtual_phi_result_for_renaming (phi);
295 remove_phi_node (&bsi, true);
296 }
297 else
298 gsi_next (&bsi);
299
300 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
301 {
302 gimple stmt = gsi_stmt (bsi);
303 if (gimple_code (stmt) != GIMPLE_LABEL
304 && !is_gimple_debug (stmt)
305 && !bitmap_bit_p (partition->stmts, x++))
306 {
307 unlink_stmt_vdef (stmt);
308 gsi_remove (&bsi, true);
309 release_defs (stmt);
310 }
311 else
312 gsi_next (&bsi);
313 }
314 }
315
316 free (bbs);
317 }
318
319 /* Build the size argument for a memory operation call. */
320
321 static tree
322 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
323 {
324 tree size;
325 size = fold_build2_loc (loc, MULT_EXPR, sizetype,
326 fold_convert_loc (loc, sizetype, nb_iter),
327 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
328 return fold_convert_loc (loc, size_type_node, size);
329 }
330
331 /* Build an address argument for a memory operation call. */
332
333 static tree
334 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
335 {
336 tree addr_base;
337
338 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
339 addr_base = fold_convert_loc (loc, sizetype, addr_base);
340
341 /* Test for a negative stride, iterating over every element. */
342 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
343 {
344 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
345 fold_convert_loc (loc, sizetype, nb_bytes));
346 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
347 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
348 }
349
350 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
351 }
352
353 /* Generate a call to memset for PARTITION in LOOP. */
354
355 static void
356 generate_memset_builtin (struct loop *loop, partition_t partition)
357 {
358 gimple_stmt_iterator gsi;
359 gimple stmt, fn_call;
360 tree nb_iter, mem, fn, nb_bytes;
361 location_t loc;
362 tree val;
363
364 stmt = DR_STMT (partition->main_dr);
365 loc = gimple_location (stmt);
366 if (gimple_bb (stmt) == loop->latch)
367 nb_iter = number_of_latch_executions (loop);
368 else
369 nb_iter = number_of_exit_cond_executions (loop);
370
371 /* The new statements will be placed before LOOP. */
372 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
373
374 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
375 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
376 false, GSI_CONTINUE_LINKING);
377 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
378 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
379 false, GSI_CONTINUE_LINKING);
380
381 /* This exactly matches the pattern recognition in classify_partition. */
382 val = gimple_assign_rhs1 (stmt);
383 if (integer_zerop (val)
384 || real_zerop (val)
385 || TREE_CODE (val) == CONSTRUCTOR)
386 val = integer_zero_node;
387 else if (integer_all_onesp (val))
388 val = build_int_cst (integer_type_node, -1);
389 else
390 {
391 if (TREE_CODE (val) == INTEGER_CST)
392 val = fold_convert (integer_type_node, val);
393 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
394 {
395 gimple cstmt;
396 tree tem = make_ssa_name (integer_type_node, NULL);
397 cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
398 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
399 val = tem;
400 }
401 }
402
403 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
404 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
405 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
406
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 {
409 fprintf (dump_file, "generated memset");
410 if (integer_zerop (val))
411 fprintf (dump_file, " zero\n");
412 else if (integer_all_onesp (val))
413 fprintf (dump_file, " minus one\n");
414 else
415 fprintf (dump_file, "\n");
416 }
417 }
418
419 /* Generate a call to memcpy for PARTITION in LOOP. */
420
421 static void
422 generate_memcpy_builtin (struct loop *loop, partition_t partition)
423 {
424 gimple_stmt_iterator gsi;
425 gimple stmt, fn_call;
426 tree nb_iter, dest, src, fn, nb_bytes;
427 location_t loc;
428 enum built_in_function kind;
429
430 stmt = DR_STMT (partition->main_dr);
431 loc = gimple_location (stmt);
432 if (gimple_bb (stmt) == loop->latch)
433 nb_iter = number_of_latch_executions (loop);
434 else
435 nb_iter = number_of_exit_cond_executions (loop);
436
437 /* The new statements will be placed before LOOP. */
438 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
439
440 nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
441 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
442 false, GSI_CONTINUE_LINKING);
443 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
444 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
445 if (ptr_derefs_may_alias_p (dest, src))
446 kind = BUILT_IN_MEMMOVE;
447 else
448 kind = BUILT_IN_MEMCPY;
449
450 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
451 false, GSI_CONTINUE_LINKING);
452 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
453 false, GSI_CONTINUE_LINKING);
454 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
455 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
456 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
457
458 if (dump_file && (dump_flags & TDF_DETAILS))
459 {
460 if (kind == BUILT_IN_MEMCPY)
461 fprintf (dump_file, "generated memcpy\n");
462 else
463 fprintf (dump_file, "generated memmove\n");
464 }
465 }
466
467 /* Remove and destroy the loop LOOP. */
468
469 static void
470 destroy_loop (struct loop *loop)
471 {
472 unsigned nbbs = loop->num_nodes;
473 edge exit = single_exit (loop);
474 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
475 basic_block *bbs;
476 unsigned i;
477
478 bbs = get_loop_body_in_dom_order (loop);
479
480 redirect_edge_pred (exit, src);
481 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
482 exit->flags |= EDGE_FALLTHRU;
483 cancel_loop_tree (loop);
484 rescan_loop_exit (exit, false, true);
485
486 for (i = 0; i < nbbs; i++)
487 {
488 /* We have made sure to not leave any dangling uses of SSA
489 names defined in the loop. With the exception of virtuals.
490 Make sure we replace all uses of virtual defs that will remain
491 outside of the loop with the bare symbol as delete_basic_block
492 will release them. */
493 gimple_stmt_iterator gsi;
494 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
495 {
496 gimple phi = gsi_stmt (gsi);
497 if (virtual_operand_p (gimple_phi_result (phi)))
498 mark_virtual_phi_result_for_renaming (phi);
499 }
500 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
501 {
502 gimple stmt = gsi_stmt (gsi);
503 tree vdef = gimple_vdef (stmt);
504 if (vdef && TREE_CODE (vdef) == SSA_NAME)
505 mark_virtual_operand_for_renaming (vdef);
506 }
507 delete_basic_block (bbs[i]);
508 }
509 free (bbs);
510
511 set_immediate_dominator (CDI_DOMINATORS, dest,
512 recompute_dominator (CDI_DOMINATORS, dest));
513 }
514
515 /* Generates code for PARTITION. */
516
517 static void
518 generate_code_for_partition (struct loop *loop,
519 partition_t partition, bool copy_p)
520 {
521 switch (partition->kind)
522 {
523 case PKIND_MEMSET:
524 generate_memset_builtin (loop, partition);
525 /* If this is the last partition for which we generate code, we have
526 to destroy the loop. */
527 if (!copy_p)
528 destroy_loop (loop);
529 break;
530
531 case PKIND_MEMCPY:
532 generate_memcpy_builtin (loop, partition);
533 /* If this is the last partition for which we generate code, we have
534 to destroy the loop. */
535 if (!copy_p)
536 destroy_loop (loop);
537 break;
538
539 case PKIND_NORMAL:
540 generate_loops_for_partition (loop, partition, copy_p);
541 break;
542
543 default:
544 gcc_unreachable ();
545 }
546 }
547
548
549 /* Returns true if the node V of RDG cannot be recomputed. */
550
551 static bool
552 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
553 {
554 if (RDG_MEM_WRITE_STMT (rdg, v))
555 return true;
556
557 return false;
558 }
559
560 /* Returns true when the vertex V has already been generated in the
561 current partition (V is in PROCESSED), or when V belongs to another
562 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
563
564 static inline bool
565 already_processed_vertex_p (bitmap processed, int v)
566 {
567 return (bitmap_bit_p (processed, v)
568 || !bitmap_bit_p (remaining_stmts, v));
569 }
570
571 /* Returns NULL when there is no anti-dependence among the successors
572 of vertex V, otherwise returns the edge with the anti-dep. */
573
574 static struct graph_edge *
575 has_anti_dependence (struct vertex *v)
576 {
577 struct graph_edge *e;
578
579 if (v->succ)
580 for (e = v->succ; e; e = e->succ_next)
581 if (RDGE_TYPE (e) == anti_dd)
582 return e;
583
584 return NULL;
585 }
586
587 /* Returns true when V has an anti-dependence edge among its successors. */
588
589 static bool
590 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
591 {
592 struct graph_edge *e;
593
594 if (v->pred)
595 for (e = v->pred; e; e = e->pred_next)
596 if (bitmap_bit_p (upstream_mem_writes, e->src)
597 /* Don't consider flow channels: a write to memory followed
598 by a read from memory. These channels allow the split of
599 the RDG in different partitions. */
600 && !RDG_MEM_WRITE_STMT (rdg, e->src))
601 return true;
602
603 return false;
604 }
605
606 /* Initializes the upstream_mem_writes bitmap following the
607 information from RDG. */
608
609 static void
610 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
611 {
612 int v, x;
613 bitmap seen = BITMAP_ALLOC (NULL);
614
615 for (v = rdg->n_vertices - 1; v >= 0; v--)
616 if (!bitmap_bit_p (seen, v))
617 {
618 unsigned i;
619 vec<int> nodes;
620 nodes.create (3);
621
622 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
623
624 FOR_EACH_VEC_ELT (nodes, i, x)
625 {
626 if (!bitmap_set_bit (seen, x))
627 continue;
628
629 if (RDG_MEM_WRITE_STMT (rdg, x)
630 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
631 /* In anti dependences the read should occur before
632 the write, this is why both the read and the write
633 should be placed in the same partition. */
634 || has_anti_dependence (&(rdg->vertices[x])))
635 {
636 bitmap_set_bit (upstream_mem_writes, x);
637 }
638 }
639
640 nodes.release ();
641 }
642 }
643
644 /* Returns true when vertex u has a memory write node as a predecessor
645 in RDG. */
646
647 static bool
648 has_upstream_mem_writes (int u)
649 {
650 return bitmap_bit_p (upstream_mem_writes, u);
651 }
652
653 static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
654 bitmap, bitmap);
655
656 /* Flag the uses of U stopping following the information from
657 upstream_mem_writes. */
658
659 static void
660 rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
661 bitmap processed)
662 {
663 use_operand_p use_p;
664 struct vertex *x = &(rdg->vertices[u]);
665 gimple stmt = RDGV_STMT (x);
666 struct graph_edge *anti_dep = has_anti_dependence (x);
667
668 /* Keep in the same partition the destination of an antidependence,
669 because this is a store to the exact same location. Putting this
670 in another partition is bad for cache locality. */
671 if (anti_dep)
672 {
673 int v = anti_dep->dest;
674
675 if (!already_processed_vertex_p (processed, v))
676 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
677 processed);
678 }
679
680 if (gimple_code (stmt) != GIMPLE_PHI)
681 {
682 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
683 {
684 tree use = USE_FROM_PTR (use_p);
685
686 if (TREE_CODE (use) == SSA_NAME)
687 {
688 gimple def_stmt = SSA_NAME_DEF_STMT (use);
689 int v = rdg_vertex_for_stmt (rdg, def_stmt);
690
691 if (v >= 0
692 && !already_processed_vertex_p (processed, v))
693 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
694 processed);
695 }
696 }
697 }
698
699 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
700 {
701 tree op0 = gimple_assign_lhs (stmt);
702
703 /* Scalar channels don't have enough space for transmitting data
704 between tasks, unless we add more storage by privatizing. */
705 if (is_gimple_reg (op0))
706 {
707 use_operand_p use_p;
708 imm_use_iterator iter;
709
710 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
711 {
712 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
713
714 if (!already_processed_vertex_p (processed, v))
715 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
716 processed);
717 }
718 }
719 }
720 }
721
722 /* Flag V from RDG as part of PARTITION, and also flag its loop number
723 in LOOPS. */
724
725 static void
726 rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops)
727 {
728 struct loop *loop;
729
730 if (!bitmap_set_bit (partition->stmts, v))
731 return;
732
733 loop = loop_containing_stmt (RDG_STMT (rdg, v));
734 bitmap_set_bit (loops, loop->num);
735
736 if (rdg_cannot_recompute_vertex_p (rdg, v))
737 {
738 partition->has_writes = true;
739 bitmap_clear_bit (remaining_stmts, v);
740 }
741 }
742
743 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
744 Also flag their loop number in LOOPS. */
745
746 static void
747 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
748 bitmap loops, bitmap processed)
749 {
750 unsigned i;
751 vec<int> nodes;
752 nodes.create (3);
753 int x;
754
755 bitmap_set_bit (processed, v);
756 rdg_flag_uses (rdg, v, partition, loops, processed);
757 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
758 rdg_flag_vertex (rdg, v, partition, loops);
759
760 FOR_EACH_VEC_ELT (nodes, i, x)
761 if (!already_processed_vertex_p (processed, x))
762 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed);
763
764 nodes.release ();
765 }
766
767 /* Initialize CONDS with all the condition statements from the basic
768 blocks of LOOP. */
769
770 static void
771 collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
772 {
773 unsigned i;
774 edge e;
775 vec<edge> exits = get_loop_exit_edges (loop);
776
777 FOR_EACH_VEC_ELT (exits, i, e)
778 {
779 gimple cond = last_stmt (e->src);
780
781 if (cond)
782 conds->safe_push (cond);
783 }
784
785 exits.release ();
786 }
787
788 /* Add to PARTITION all the exit condition statements for LOOPS
789 together with all their dependent statements determined from
790 RDG. */
791
792 static void
793 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
794 bitmap processed)
795 {
796 unsigned i;
797 bitmap_iterator bi;
798 vec<gimple> conds;
799 conds.create (3);
800
801 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
802 collect_condition_stmts (get_loop (i), &conds);
803
804 while (!conds.is_empty ())
805 {
806 gimple cond = conds.pop ();
807 int v = rdg_vertex_for_stmt (rdg, cond);
808 bitmap new_loops = BITMAP_ALLOC (NULL);
809
810 if (!already_processed_vertex_p (processed, v))
811 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed);
812
813 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
814 if (bitmap_set_bit (loops, i))
815 collect_condition_stmts (get_loop (i), &conds);
816
817 BITMAP_FREE (new_loops);
818 }
819
820 conds.release ();
821 }
822
823 /* Returns a bitmap in which all the statements needed for computing
824 the strongly connected component C of the RDG are flagged, also
825 including the loop exit conditions. */
826
827 static partition_t
828 build_rdg_partition_for_component (struct graph *rdg, rdgc c)
829 {
830 int i, v;
831 partition_t partition = partition_alloc (NULL);
832 bitmap loops = BITMAP_ALLOC (NULL);
833 bitmap processed = BITMAP_ALLOC (NULL);
834
835 FOR_EACH_VEC_ELT (c->vertices, i, v)
836 if (!already_processed_vertex_p (processed, v))
837 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed);
838
839 rdg_flag_loop_exits (rdg, loops, partition, processed);
840
841 BITMAP_FREE (processed);
842 BITMAP_FREE (loops);
843 return partition;
844 }
845
846 /* Free memory for COMPONENTS. */
847
848 static void
849 free_rdg_components (vec<rdgc> components)
850 {
851 int i;
852 rdgc x;
853
854 FOR_EACH_VEC_ELT (components, i, x)
855 {
856 x->vertices.release ();
857 free (x);
858 }
859
860 components.release ();
861 }
862
863 /* Build the COMPONENTS vector with the strongly connected components
864 of RDG in which the STARTING_VERTICES occur. */
865
866 static void
867 rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
868 vec<rdgc> *components)
869 {
870 int i, v;
871 bitmap saved_components = BITMAP_ALLOC (NULL);
872 int n_components = graphds_scc (rdg, NULL);
873 /* ??? Macros cannot process template types with more than one
874 argument, so we need this typedef. */
875 typedef vec<int> vec_int_heap;
876 vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
877
878 for (i = 0; i < n_components; i++)
879 all_components[i].create (3);
880
881 for (i = 0; i < rdg->n_vertices; i++)
882 all_components[rdg->vertices[i].component].safe_push (i);
883
884 FOR_EACH_VEC_ELT (starting_vertices, i, v)
885 {
886 int c = rdg->vertices[v].component;
887
888 if (bitmap_set_bit (saved_components, c))
889 {
890 rdgc x = XCNEW (struct rdg_component);
891 x->num = c;
892 x->vertices = all_components[c];
893
894 components->safe_push (x);
895 }
896 }
897
898 for (i = 0; i < n_components; i++)
899 if (!bitmap_bit_p (saved_components, i))
900 all_components[i].release ();
901
902 free (all_components);
903 BITMAP_FREE (saved_components);
904 }
905
906 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
907 For the moment we detect only the memset zero pattern. */
908
909 static void
910 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
911 {
912 bitmap_iterator bi;
913 unsigned i;
914 tree nb_iter;
915 data_reference_p single_load, single_store;
916
917 partition->kind = PKIND_NORMAL;
918 partition->main_dr = NULL;
919 partition->secondary_dr = NULL;
920
921 if (!flag_tree_loop_distribute_patterns)
922 return;
923
924 /* Perform general partition disqualification for builtins. */
925 nb_iter = number_of_exit_cond_executions (loop);
926 if (!nb_iter || nb_iter == chrec_dont_know)
927 return;
928
929 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
930 {
931 gimple stmt = RDG_STMT (rdg, i);
932
933 if (gimple_has_volatile_ops (stmt))
934 return;
935
936 /* If the stmt has uses outside of the loop fail.
937 ??? If the stmt is generated in another partition that
938 is not created as builtin we can ignore this. */
939 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
940 {
941 if (dump_file && (dump_flags & TDF_DETAILS))
942 fprintf (dump_file, "not generating builtin, partition has "
943 "scalar uses outside of the loop\n");
944 return;
945 }
946 }
947
948 /* Detect memset and memcpy. */
949 single_load = NULL;
950 single_store = NULL;
951 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
952 {
953 gimple stmt = RDG_STMT (rdg, i);
954 data_reference_p dr;
955 unsigned j;
956
957 if (gimple_code (stmt) == GIMPLE_PHI)
958 continue;
959
960 /* Any scalar stmts are ok. */
961 if (!gimple_vuse (stmt))
962 continue;
963
964 /* Otherwise just regular loads/stores. */
965 if (!gimple_assign_single_p (stmt))
966 return;
967
968 /* But exactly one store and/or load. */
969 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
970 {
971 if (DR_IS_READ (dr))
972 {
973 if (single_load != NULL)
974 return;
975 single_load = dr;
976 }
977 else
978 {
979 if (single_store != NULL)
980 return;
981 single_store = dr;
982 }
983 }
984 }
985
986 if (single_store && !single_load)
987 {
988 gimple stmt = DR_STMT (single_store);
989 tree rhs = gimple_assign_rhs1 (stmt);
990 if (!(integer_zerop (rhs)
991 || integer_all_onesp (rhs)
992 || real_zerop (rhs)
993 || (TREE_CODE (rhs) == CONSTRUCTOR
994 && !TREE_CLOBBER_P (rhs))
995 || (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
996 && (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
997 == TYPE_MODE (unsigned_char_type_node)))))
998 return;
999 if (TREE_CODE (rhs) == SSA_NAME
1000 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1001 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1002 return;
1003 if (!adjacent_dr_p (single_store))
1004 return;
1005 partition->kind = PKIND_MEMSET;
1006 partition->main_dr = single_store;
1007 }
1008 else if (single_store && single_load)
1009 {
1010 gimple store = DR_STMT (single_store);
1011 gimple load = DR_STMT (single_load);
1012 /* Direct aggregate copy or via an SSA name temporary. */
1013 if (load != store
1014 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1015 return;
1016 if (!adjacent_dr_p (single_store)
1017 || !adjacent_dr_p (single_load)
1018 || !operand_equal_p (DR_STEP (single_store),
1019 DR_STEP (single_load), 0))
1020 return;
1021 /* Now check that if there is a dependence this dependence is
1022 of a suitable form for memmove. */
1023 vec<loop_p> loops = vNULL;
1024 ddr_p ddr;
1025 loops.safe_push (loop);
1026 ddr = initialize_data_dependence_relation (single_load, single_store,
1027 loops);
1028 compute_affine_dependence (ddr, loop);
1029 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1030 {
1031 free_dependence_relation (ddr);
1032 loops.release ();
1033 return;
1034 }
1035 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1036 {
1037 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1038 {
1039 free_dependence_relation (ddr);
1040 loops.release ();
1041 return;
1042 }
1043 lambda_vector dist_v;
1044 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1045 {
1046 int dist = dist_v[index_in_loop_nest (loop->num,
1047 DDR_LOOP_NEST (ddr))];
1048 if (dist > 0 && !DDR_REVERSED_P (ddr))
1049 {
1050 free_dependence_relation (ddr);
1051 loops.release ();
1052 return;
1053 }
1054 }
1055 }
1056 free_dependence_relation (ddr);
1057 loops.release ();
1058 partition->kind = PKIND_MEMCPY;
1059 partition->main_dr = single_store;
1060 partition->secondary_dr = single_load;
1061 }
1062 }
1063
1064 /* For a data reference REF, return the declaration of its base
1065 address or NULL_TREE if the base is not determined. */
1066
1067 static tree
1068 ref_base_address (data_reference_p dr)
1069 {
1070 tree base_address = DR_BASE_ADDRESS (dr);
1071 if (base_address
1072 && TREE_CODE (base_address) == ADDR_EXPR)
1073 return TREE_OPERAND (base_address, 0);
1074
1075 return base_address;
1076 }
1077
1078 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1079 accesses in RDG. */
1080
1081 static bool
1082 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1083 partition_t partition2)
1084 {
1085 unsigned i, j, k, l;
1086 bitmap_iterator bi, bj;
1087 data_reference_p ref1, ref2;
1088
1089 /* First check whether in the intersection of the two partitions are
1090 any loads or stores. Common loads are the situation that happens
1091 most often. */
1092 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1093 if (RDG_MEM_WRITE_STMT (rdg, i)
1094 || RDG_MEM_READS_STMT (rdg, i))
1095 return true;
1096
1097 /* Then check all data-references against each other. */
1098 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1099 if (RDG_MEM_WRITE_STMT (rdg, i)
1100 || RDG_MEM_READS_STMT (rdg, i))
1101 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1102 if (RDG_MEM_WRITE_STMT (rdg, j)
1103 || RDG_MEM_READS_STMT (rdg, j))
1104 {
1105 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1106 {
1107 tree base1 = ref_base_address (ref1);
1108 if (base1)
1109 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1110 if (base1 == ref_base_address (ref2))
1111 return true;
1112 }
1113 }
1114
1115 return false;
1116 }
1117
1118 /* Aggregate several components into a useful partition that is
1119 registered in the PARTITIONS vector. Partitions will be
1120 distributed in different loops. */
1121
1122 static void
1123 rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
1124 vec<int> *other_stores,
1125 vec<partition_t> *partitions, bitmap processed)
1126 {
1127 int i;
1128 rdgc x;
1129 partition_t partition = partition_alloc (NULL);
1130
1131 FOR_EACH_VEC_ELT (components, i, x)
1132 {
1133 partition_t np;
1134 int v = x->vertices[0];
1135
1136 if (bitmap_bit_p (processed, v))
1137 continue;
1138
1139 np = build_rdg_partition_for_component (rdg, x);
1140 bitmap_ior_into (partition->stmts, np->stmts);
1141 partition->has_writes = partition_has_writes (np);
1142 bitmap_ior_into (processed, np->stmts);
1143 partition_free (np);
1144
1145 if (partition_has_writes (partition))
1146 {
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 {
1149 fprintf (dump_file, "ldist useful partition:\n");
1150 dump_bitmap (dump_file, partition->stmts);
1151 }
1152
1153 partitions->safe_push (partition);
1154 partition = partition_alloc (NULL);
1155 }
1156 }
1157
1158 /* Add the nodes from the RDG that were not marked as processed, and
1159 that are used outside the current loop. These are scalar
1160 computations that are not yet part of previous partitions. */
1161 for (i = 0; i < rdg->n_vertices; i++)
1162 if (!bitmap_bit_p (processed, i)
1163 && rdg_defs_used_in_other_loops_p (rdg, i))
1164 other_stores->safe_push (i);
1165
1166 /* If there are still statements left in the OTHER_STORES array,
1167 create other components and partitions with these stores and
1168 their dependences. */
1169 if (other_stores->length () > 0)
1170 {
1171 vec<rdgc> comps;
1172 comps.create (3);
1173 vec<int> foo;
1174 foo.create (3);
1175
1176 rdg_build_components (rdg, *other_stores, &comps);
1177 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
1178
1179 foo.release ();
1180 free_rdg_components (comps);
1181 }
1182
1183 /* If there is something left in the last partition, save it. */
1184 if (bitmap_count_bits (partition->stmts) > 0)
1185 partitions->safe_push (partition);
1186 else
1187 partition_free (partition);
1188 }
1189
1190 /* Dump to FILE the PARTITIONS. */
1191
1192 static void
1193 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1194 {
1195 int i;
1196 partition_t partition;
1197
1198 FOR_EACH_VEC_ELT (partitions, i, partition)
1199 debug_bitmap_file (file, partition->stmts);
1200 }
1201
1202 /* Debug PARTITIONS. */
1203 extern void debug_rdg_partitions (vec<partition_t> );
1204
1205 DEBUG_FUNCTION void
1206 debug_rdg_partitions (vec<partition_t> partitions)
1207 {
1208 dump_rdg_partitions (stderr, partitions);
1209 }
1210
1211 /* Returns the number of read and write operations in the RDG. */
1212
1213 static int
1214 number_of_rw_in_rdg (struct graph *rdg)
1215 {
1216 int i, res = 0;
1217
1218 for (i = 0; i < rdg->n_vertices; i++)
1219 {
1220 if (RDG_MEM_WRITE_STMT (rdg, i))
1221 ++res;
1222
1223 if (RDG_MEM_READS_STMT (rdg, i))
1224 ++res;
1225 }
1226
1227 return res;
1228 }
1229
1230 /* Returns the number of read and write operations in a PARTITION of
1231 the RDG. */
1232
1233 static int
1234 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1235 {
1236 int res = 0;
1237 unsigned i;
1238 bitmap_iterator ii;
1239
1240 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1241 {
1242 if (RDG_MEM_WRITE_STMT (rdg, i))
1243 ++res;
1244
1245 if (RDG_MEM_READS_STMT (rdg, i))
1246 ++res;
1247 }
1248
1249 return res;
1250 }
1251
1252 /* Returns true when one of the PARTITIONS contains all the read or
1253 write operations of RDG. */
1254
1255 static bool
1256 partition_contains_all_rw (struct graph *rdg,
1257 vec<partition_t> partitions)
1258 {
1259 int i;
1260 partition_t partition;
1261 int nrw = number_of_rw_in_rdg (rdg);
1262
1263 FOR_EACH_VEC_ELT (partitions, i, partition)
1264 if (nrw == number_of_rw_in_partition (rdg, partition))
1265 return true;
1266
1267 return false;
1268 }
1269
1270 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1271 distributed loops. */
1272
1273 static int
1274 ldist_gen (struct loop *loop, struct graph *rdg,
1275 vec<int> starting_vertices)
1276 {
1277 int i, nbp;
1278 vec<rdgc> components;
1279 components.create (3);
1280 vec<partition_t> partitions;
1281 partitions.create (3);
1282 vec<int> other_stores;
1283 other_stores.create (3);
1284 partition_t partition;
1285 bitmap processed = BITMAP_ALLOC (NULL);
1286 bool any_builtin;
1287
1288 remaining_stmts = BITMAP_ALLOC (NULL);
1289 upstream_mem_writes = BITMAP_ALLOC (NULL);
1290
1291 for (i = 0; i < rdg->n_vertices; i++)
1292 {
1293 bitmap_set_bit (remaining_stmts, i);
1294
1295 /* Save in OTHER_STORES all the memory writes that are not in
1296 STARTING_VERTICES. */
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1298 {
1299 int v;
1300 unsigned j;
1301 bool found = false;
1302
1303 FOR_EACH_VEC_ELT (starting_vertices, j, v)
1304 if (i == v)
1305 {
1306 found = true;
1307 break;
1308 }
1309
1310 if (!found)
1311 other_stores.safe_push (i);
1312 }
1313 }
1314
1315 mark_nodes_having_upstream_mem_writes (rdg);
1316 rdg_build_components (rdg, starting_vertices, &components);
1317 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1318 processed);
1319 BITMAP_FREE (processed);
1320
1321 any_builtin = false;
1322 FOR_EACH_VEC_ELT (partitions, i, partition)
1323 {
1324 classify_partition (loop, rdg, partition);
1325 any_builtin |= partition_builtin_p (partition);
1326 }
1327
1328 /* If we are only distributing patterns fuse all partitions that
1329 were not properly classified as builtins. Else fuse partitions
1330 with similar memory accesses. */
1331 if (!flag_tree_loop_distribution)
1332 {
1333 partition_t into;
1334 /* If we did not detect any builtin simply bail out. */
1335 if (!any_builtin)
1336 {
1337 nbp = 0;
1338 goto ldist_done;
1339 }
1340 /* Only fuse adjacent non-builtin partitions, see PR53616.
1341 ??? Use dependence information to improve partition ordering. */
1342 i = 0;
1343 do
1344 {
1345 for (; partitions.iterate (i, &into); ++i)
1346 if (!partition_builtin_p (into))
1347 break;
1348 for (++i; partitions.iterate (i, &partition); ++i)
1349 if (!partition_builtin_p (partition))
1350 {
1351 bitmap_ior_into (into->stmts, partition->stmts);
1352 partitions.ordered_remove (i);
1353 i--;
1354 }
1355 else
1356 break;
1357 }
1358 while ((unsigned) i < partitions.length ());
1359 }
1360 else
1361 {
1362 partition_t into;
1363 int j;
1364 for (i = 0; partitions.iterate (i, &into); ++i)
1365 {
1366 if (partition_builtin_p (into))
1367 continue;
1368 for (j = i + 1;
1369 partitions.iterate (j, &partition); ++j)
1370 {
1371 if (!partition_builtin_p (partition)
1372 /* ??? The following is horribly inefficient,
1373 we are re-computing and analyzing data-references
1374 of the stmts in the partitions all the time. */
1375 && similar_memory_accesses (rdg, into, partition))
1376 {
1377 if (dump_file && (dump_flags & TDF_DETAILS))
1378 {
1379 fprintf (dump_file, "fusing partitions\n");
1380 dump_bitmap (dump_file, into->stmts);
1381 dump_bitmap (dump_file, partition->stmts);
1382 fprintf (dump_file, "because they have similar "
1383 "memory accesses\n");
1384 }
1385 bitmap_ior_into (into->stmts, partition->stmts);
1386 partitions.ordered_remove (j);
1387 j--;
1388 }
1389 }
1390 }
1391 }
1392
1393 nbp = partitions.length ();
1394 if (nbp == 0
1395 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1396 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1397 {
1398 nbp = 0;
1399 goto ldist_done;
1400 }
1401
1402 if (dump_file && (dump_flags & TDF_DETAILS))
1403 dump_rdg_partitions (dump_file, partitions);
1404
1405 FOR_EACH_VEC_ELT (partitions, i, partition)
1406 generate_code_for_partition (loop, partition, i < nbp - 1);
1407
1408 ldist_done:
1409
1410 BITMAP_FREE (remaining_stmts);
1411 BITMAP_FREE (upstream_mem_writes);
1412
1413 FOR_EACH_VEC_ELT (partitions, i, partition)
1414 partition_free (partition);
1415
1416 other_stores.release ();
1417 partitions.release ();
1418 free_rdg_components (components);
1419 return nbp;
1420 }
1421
1422 /* Distributes the code from LOOP in such a way that producer
1423 statements are placed before consumer statements. When STMTS is
1424 NULL, performs the maximal distribution, if STMTS is not NULL,
1425 tries to separate only these statements from the LOOP's body.
1426 Returns the number of distributed loops. */
1427
1428 static int
1429 distribute_loop (struct loop *loop, vec<gimple> stmts)
1430 {
1431 int res = 0;
1432 struct graph *rdg;
1433 gimple s;
1434 unsigned i;
1435 vec<int> vertices;
1436 vec<ddr_p> dependence_relations;
1437 vec<data_reference_p> datarefs;
1438 vec<loop_p> loop_nest;
1439
1440 datarefs.create (10);
1441 dependence_relations.create (100);
1442 loop_nest.create (3);
1443 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1444
1445 if (!rdg)
1446 {
1447 if (dump_file && (dump_flags & TDF_DETAILS))
1448 fprintf (dump_file,
1449 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1450 loop->num);
1451
1452 free_dependence_relations (dependence_relations);
1453 free_data_refs (datarefs);
1454 loop_nest.release ();
1455 return res;
1456 }
1457
1458 vertices.create (3);
1459
1460 if (dump_file && (dump_flags & TDF_DETAILS))
1461 dump_rdg (dump_file, rdg);
1462
1463 FOR_EACH_VEC_ELT (stmts, i, s)
1464 {
1465 int v = rdg_vertex_for_stmt (rdg, s);
1466
1467 if (v >= 0)
1468 {
1469 vertices.safe_push (v);
1470
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1472 fprintf (dump_file,
1473 "ldist asked to generate code for vertex %d\n", v);
1474 }
1475 }
1476
1477 res = ldist_gen (loop, rdg, vertices);
1478 vertices.release ();
1479 free_rdg (rdg);
1480 free_dependence_relations (dependence_relations);
1481 free_data_refs (datarefs);
1482 loop_nest.release ();
1483 return res;
1484 }
1485
1486 /* Distribute all loops in the current function. */
1487
1488 static unsigned int
1489 tree_loop_distribution (void)
1490 {
1491 struct loop *loop;
1492 loop_iterator li;
1493 bool changed = false;
1494 basic_block bb;
1495
1496 FOR_ALL_BB (bb)
1497 {
1498 gimple_stmt_iterator gsi;
1499 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1500 gimple_set_uid (gsi_stmt (gsi), -1);
1501 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1502 gimple_set_uid (gsi_stmt (gsi), -1);
1503 }
1504
1505 /* We can at the moment only distribute non-nested loops, thus restrict
1506 walking to innermost loops. */
1507 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
1508 {
1509 vec<gimple> work_list = vNULL;
1510 basic_block *bbs;
1511 int num = loop->num;
1512 int nb_generated_loops = 0;
1513 unsigned int i;
1514
1515 /* If the loop doesn't have a single exit we will fail anyway,
1516 so do that early. */
1517 if (!single_exit (loop))
1518 continue;
1519
1520 /* Only optimize hot loops. */
1521 if (!optimize_loop_for_speed_p (loop))
1522 continue;
1523
1524 /* Only distribute loops with a header and latch for now. */
1525 if (loop->num_nodes > 2)
1526 continue;
1527
1528 /* Initialize the worklist with stmts we seed the partitions with. */
1529 bbs = get_loop_body_in_dom_order (loop);
1530 for (i = 0; i < loop->num_nodes; ++i)
1531 {
1532 gimple_stmt_iterator gsi;
1533 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1534 {
1535 gimple stmt = gsi_stmt (gsi);
1536 /* Only distribute stores for now.
1537 ??? We should also try to distribute scalar reductions,
1538 thus SSA defs that have scalar uses outside of the loop. */
1539 if (!gimple_assign_single_p (stmt)
1540 || is_gimple_reg (gimple_assign_lhs (stmt)))
1541 continue;
1542
1543 work_list.safe_push (stmt);
1544 }
1545 }
1546 free (bbs);
1547
1548 if (work_list.length () > 0)
1549 nb_generated_loops = distribute_loop (loop, work_list);
1550
1551 if (nb_generated_loops > 0)
1552 changed = true;
1553
1554 if (dump_file && (dump_flags & TDF_DETAILS))
1555 {
1556 if (nb_generated_loops > 1)
1557 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1558 num, nb_generated_loops);
1559 else
1560 fprintf (dump_file, "Loop %d is the same.\n", num);
1561 }
1562
1563 work_list.release ();
1564 }
1565
1566 if (changed)
1567 {
1568 mark_virtual_operands_for_renaming (cfun);
1569 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1570 }
1571
1572 #ifdef ENABLE_CHECKING
1573 verify_loop_structure ();
1574 #endif
1575
1576 return 0;
1577 }
1578
1579 static bool
1580 gate_tree_loop_distribution (void)
1581 {
1582 return flag_tree_loop_distribution
1583 || flag_tree_loop_distribute_patterns;
1584 }
1585
1586 struct gimple_opt_pass pass_loop_distribution =
1587 {
1588 {
1589 GIMPLE_PASS,
1590 "ldist", /* name */
1591 OPTGROUP_LOOP, /* optinfo_flags */
1592 gate_tree_loop_distribution, /* gate */
1593 tree_loop_distribution, /* execute */
1594 NULL, /* sub */
1595 NULL, /* next */
1596 0, /* static_pass_number */
1597 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1598 PROP_cfg | PROP_ssa, /* properties_required */
1599 0, /* properties_provided */
1600 0, /* properties_destroyed */
1601 0, /* todo_flags_start */
1602 TODO_ggc_collect
1603 | TODO_verify_ssa /* todo_flags_finish */
1604 }
1605 };