tree-vect-loop-manip.c (vect_can_advance_ivs_p): Query is_gimple_reg on the SSA name...
[gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree-flow.h"
27 #include "cfgloop.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
34
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
41
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
58
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
60 /*
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
64
65
66 parloop
67 {
68 int sum=1;
69
70 for (i = 0; i < N; i++)
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75 }
76
77 gimple-like code:
78 header_bb:
79
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
89
90 exit_bb:
91
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
94
95
96 after reduction transformation (only relevant parts):
97
98 parloop
99 {
100
101 ....
102
103
104 # Storing the initial value given by the user. #
105
106 .paral_data_store.32.sum.27 = 1;
107
108 #pragma omp parallel num_threads(4)
109
110 #pragma omp for schedule(static)
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
118 sum.27_11 = D.1827_8 + sum.27_29;
119
120 GIMPLE_OMP_CONTINUE
121
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
125
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
128
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
133
134 GIMPLE_OMP_RETURN
135
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
140
141
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150 ...
151
152 }
153
154 */
155
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
159
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
163 {
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178 };
179
180 /* Equality and hash functions for hashtab code. */
181
182 static int
183 reduction_info_eq (const void *aa, const void *bb)
184 {
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189 }
190
191 static hashval_t
192 reduction_info_hash (const void *aa)
193 {
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
196 return a->reduc_version;
197 }
198
199 static struct reduction_info *
200 reduction_phi (htab_t reduction_list, gimple phi)
201 {
202 struct reduction_info tmpred, *red;
203
204 if (htab_elements (reduction_list) == 0 || phi == NULL)
205 return NULL;
206
207 tmpred.reduc_phi = phi;
208 tmpred.reduc_version = gimple_uid (phi);
209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210
211 return red;
212 }
213
214 /* Element of hashtable of names to copy. */
215
216 struct name_to_copy_elt
217 {
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222 };
223
224 /* Equality and hash functions for hashtab code. */
225
226 static int
227 name_to_copy_elt_eq (const void *aa, const void *bb)
228 {
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231
232 return a->version == b->version;
233 }
234
235 static hashval_t
236 name_to_copy_elt_hash (const void *aa)
237 {
238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239
240 return (hashval_t) a->version;
241 }
242
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
247 {
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252 } *lambda_trans_matrix;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
257
258 /* Allocate a new transformation matrix. */
259
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263 {
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273 }
274
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279 static void
280 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282 {
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289 }
290
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304 static bool
305 lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308 {
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361 }
362
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
366
367 static bool
368 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369 {
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
372 VEC (data_reference_p, heap) *datarefs;
373 lambda_trans_matrix trans;
374 bool ret = false;
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
381 else
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
384
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389 loop_nest = VEC_alloc (loop_p, heap, 3);
390 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391 &dependence_relations))
392 {
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395 ret = false;
396 goto end;
397 }
398 if (dump_file && (dump_flags & TDF_DETAILS))
399 dump_data_dependence_relations (dump_file, dependence_relations);
400
401 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402 LTM_MATRIX (trans)[0][0] = -1;
403
404 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 {
406 ret = true;
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 }
410 else if (dump_file && (dump_flags & TDF_DETAILS))
411 fprintf (dump_file,
412 " FAILED: data dependencies exist across iterations\n");
413
414 end:
415 VEC_free (loop_p, heap, loop_nest);
416 free_dependence_relations (dependence_relations);
417 free_data_refs (datarefs);
418
419 return ret;
420 }
421
422 /* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
424
425 static inline bool
426 loop_has_blocks_with_irreducible_flag (struct loop *loop)
427 {
428 unsigned i;
429 basic_block *bbs = get_loop_body_in_dom_order (loop);
430 bool res = true;
431
432 for (i = 0; i < loop->num_nodes; i++)
433 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434 goto end;
435
436 res = false;
437 end:
438 free (bbs);
439 return res;
440 }
441
442 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444 to their addresses that can be reused. The address of OBJ is known to
445 be invariant in the whole function. Other needed statements are placed
446 right before GSI. */
447
448 static tree
449 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450 gimple_stmt_iterator *gsi)
451 {
452 int uid;
453 void **dslot;
454 struct int_tree_map ielt, *nielt;
455 tree *var_p, name, bvar, addr;
456 gimple stmt;
457 gimple_seq stmts;
458
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj = unshare_expr (obj);
462 for (var_p = &obj;
463 handled_component_p (*var_p);
464 var_p = &TREE_OPERAND (*var_p, 0))
465 continue;
466
467 /* Canonicalize the access to base on a MEM_REF. */
468 if (DECL_P (*var_p))
469 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
473 on it. */
474 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475 ielt.uid = uid;
476 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477 if (!*dslot)
478 {
479 if (gsi == NULL)
480 return NULL;
481 addr = TREE_OPERAND (*var_p, 0);
482 bvar = create_tmp_var (TREE_TYPE (addr),
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p, 0), 0)));
485 stmt = gimple_build_assign (bvar, addr);
486 name = make_ssa_name (bvar, stmt);
487 gimple_assign_set_lhs (stmt, name);
488 gsi_insert_on_edge_immediate (entry, stmt);
489
490 nielt = XNEW (struct int_tree_map);
491 nielt->uid = uid;
492 nielt->to = name;
493 *dslot = nielt;
494 }
495 else
496 name = ((struct int_tree_map *) *dslot)->to;
497
498 /* Express the address in terms of the canonical SSA name. */
499 TREE_OPERAND (*var_p, 0) = name;
500 if (gsi == NULL)
501 return build_fold_addr_expr_with_type (obj, type);
502
503 name = force_gimple_operand (build_addr (obj, current_function_decl),
504 &stmts, true, NULL_TREE);
505 if (!gimple_seq_empty_p (stmts))
506 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
507
508 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
509 {
510 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
511 NULL_TREE);
512 if (!gimple_seq_empty_p (stmts))
513 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
514 }
515
516 return name;
517 }
518
519 /* Callback for htab_traverse. Create the initialization statement
520 for reduction described in SLOT, and place it at the preheader of
521 the loop described in DATA. */
522
523 static int
524 initialize_reductions (void **slot, void *data)
525 {
526 tree init, c;
527 tree bvar, type, arg;
528 edge e;
529
530 struct reduction_info *const reduc = (struct reduction_info *) *slot;
531 struct loop *loop = (struct loop *) data;
532
533 /* Create initialization in preheader:
534 reduction_variable = initialization value of reduction. */
535
536 /* In the phi node at the header, replace the argument coming
537 from the preheader with the reduction initialization value. */
538
539 /* Create a new variable to initialize the reduction. */
540 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
541 bvar = create_tmp_var (type, "reduction");
542
543 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
544 OMP_CLAUSE_REDUCTION);
545 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
546 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
547
548 init = omp_reduction_init (c, TREE_TYPE (bvar));
549 reduc->init = init;
550
551 /* Replace the argument representing the initialization value
552 with the initialization value for the reduction (neutral
553 element for the particular operation, e.g. 0 for PLUS_EXPR,
554 1 for MULT_EXPR, etc).
555 Keep the old value in a new variable "reduction_initial",
556 that will be taken in consideration after the parallel
557 computing is done. */
558
559 e = loop_preheader_edge (loop);
560 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
561 /* Create new variable to hold the initial value. */
562
563 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
564 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
565 reduc->initial_value = arg;
566 return 1;
567 }
568
569 struct elv_data
570 {
571 struct walk_stmt_info info;
572 edge entry;
573 htab_t decl_address;
574 gimple_stmt_iterator *gsi;
575 bool changed;
576 bool reset;
577 };
578
579 /* Eliminates references to local variables in *TP out of the single
580 entry single exit region starting at DTA->ENTRY.
581 DECL_ADDRESS contains addresses of the references that had their
582 address taken already. If the expression is changed, CHANGED is
583 set to true. Callback for walk_tree. */
584
585 static tree
586 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
587 {
588 struct elv_data *const dta = (struct elv_data *) data;
589 tree t = *tp, var, addr, addr_type, type, obj;
590
591 if (DECL_P (t))
592 {
593 *walk_subtrees = 0;
594
595 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
596 return NULL_TREE;
597
598 type = TREE_TYPE (t);
599 addr_type = build_pointer_type (type);
600 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
601 dta->gsi);
602 if (dta->gsi == NULL && addr == NULL_TREE)
603 {
604 dta->reset = true;
605 return NULL_TREE;
606 }
607
608 *tp = build_simple_mem_ref (addr);
609
610 dta->changed = true;
611 return NULL_TREE;
612 }
613
614 if (TREE_CODE (t) == ADDR_EXPR)
615 {
616 /* ADDR_EXPR may appear in two contexts:
617 -- as a gimple operand, when the address taken is a function invariant
618 -- as gimple rhs, when the resulting address in not a function
619 invariant
620 We do not need to do anything special in the latter case (the base of
621 the memory reference whose address is taken may be replaced in the
622 DECL_P case). The former case is more complicated, as we need to
623 ensure that the new address is still a gimple operand. Thus, it
624 is not sufficient to replace just the base of the memory reference --
625 we need to move the whole computation of the address out of the
626 loop. */
627 if (!is_gimple_val (t))
628 return NULL_TREE;
629
630 *walk_subtrees = 0;
631 obj = TREE_OPERAND (t, 0);
632 var = get_base_address (obj);
633 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
634 return NULL_TREE;
635
636 addr_type = TREE_TYPE (t);
637 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
638 dta->gsi);
639 if (dta->gsi == NULL && addr == NULL_TREE)
640 {
641 dta->reset = true;
642 return NULL_TREE;
643 }
644 *tp = addr;
645
646 dta->changed = true;
647 return NULL_TREE;
648 }
649
650 if (!EXPR_P (t))
651 *walk_subtrees = 0;
652
653 return NULL_TREE;
654 }
655
656 /* Moves the references to local variables in STMT at *GSI out of the single
657 entry single exit region starting at ENTRY. DECL_ADDRESS contains
658 addresses of the references that had their address taken
659 already. */
660
661 static void
662 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
663 htab_t decl_address)
664 {
665 struct elv_data dta;
666 gimple stmt = gsi_stmt (*gsi);
667
668 memset (&dta.info, '\0', sizeof (dta.info));
669 dta.entry = entry;
670 dta.decl_address = decl_address;
671 dta.changed = false;
672 dta.reset = false;
673
674 if (gimple_debug_bind_p (stmt))
675 {
676 dta.gsi = NULL;
677 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
678 eliminate_local_variables_1, &dta.info, NULL);
679 if (dta.reset)
680 {
681 gimple_debug_bind_reset_value (stmt);
682 dta.changed = true;
683 }
684 }
685 else
686 {
687 dta.gsi = gsi;
688 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
689 }
690
691 if (dta.changed)
692 update_stmt (stmt);
693 }
694
695 /* Eliminates the references to local variables from the single entry
696 single exit region between the ENTRY and EXIT edges.
697
698 This includes:
699 1) Taking address of a local variable -- these are moved out of the
700 region (and temporary variable is created to hold the address if
701 necessary).
702
703 2) Dereferencing a local variable -- these are replaced with indirect
704 references. */
705
706 static void
707 eliminate_local_variables (edge entry, edge exit)
708 {
709 basic_block bb;
710 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
711 unsigned i;
712 gimple_stmt_iterator gsi;
713 bool has_debug_stmt = false;
714 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
715 free);
716 basic_block entry_bb = entry->src;
717 basic_block exit_bb = exit->dest;
718
719 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
720
721 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
722 if (bb != entry_bb && bb != exit_bb)
723 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
724 if (is_gimple_debug (gsi_stmt (gsi)))
725 {
726 if (gimple_debug_bind_p (gsi_stmt (gsi)))
727 has_debug_stmt = true;
728 }
729 else
730 eliminate_local_variables_stmt (entry, &gsi, decl_address);
731
732 if (has_debug_stmt)
733 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
734 if (bb != entry_bb && bb != exit_bb)
735 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
736 if (gimple_debug_bind_p (gsi_stmt (gsi)))
737 eliminate_local_variables_stmt (entry, &gsi, decl_address);
738
739 htab_delete (decl_address);
740 VEC_free (basic_block, heap, body);
741 }
742
743 /* Returns true if expression EXPR is not defined between ENTRY and
744 EXIT, i.e. if all its operands are defined outside of the region. */
745
746 static bool
747 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
748 {
749 basic_block entry_bb = entry->src;
750 basic_block exit_bb = exit->dest;
751 basic_block def_bb;
752
753 if (is_gimple_min_invariant (expr))
754 return true;
755
756 if (TREE_CODE (expr) == SSA_NAME)
757 {
758 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
759 if (def_bb
760 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
761 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
762 return false;
763
764 return true;
765 }
766
767 return false;
768 }
769
770 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
771 The copies are stored to NAME_COPIES, if NAME was already duplicated,
772 its duplicate stored in NAME_COPIES is returned.
773
774 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
775 duplicated, storing the copies in DECL_COPIES. */
776
777 static tree
778 separate_decls_in_region_name (tree name,
779 htab_t name_copies, htab_t decl_copies,
780 bool copy_name_p)
781 {
782 tree copy, var, var_copy;
783 unsigned idx, uid, nuid;
784 struct int_tree_map ielt, *nielt;
785 struct name_to_copy_elt elt, *nelt;
786 void **slot, **dslot;
787
788 if (TREE_CODE (name) != SSA_NAME)
789 return name;
790
791 idx = SSA_NAME_VERSION (name);
792 elt.version = idx;
793 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
794 copy_name_p ? INSERT : NO_INSERT);
795 if (slot && *slot)
796 return ((struct name_to_copy_elt *) *slot)->new_name;
797
798 var = SSA_NAME_VAR (name);
799 uid = DECL_UID (var);
800 ielt.uid = uid;
801 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
802 if (!*dslot)
803 {
804 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
805 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
806 nielt = XNEW (struct int_tree_map);
807 nielt->uid = uid;
808 nielt->to = var_copy;
809 *dslot = nielt;
810
811 /* Ensure that when we meet this decl next time, we won't duplicate
812 it again. */
813 nuid = DECL_UID (var_copy);
814 ielt.uid = nuid;
815 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
816 gcc_assert (!*dslot);
817 nielt = XNEW (struct int_tree_map);
818 nielt->uid = nuid;
819 nielt->to = var_copy;
820 *dslot = nielt;
821 }
822 else
823 var_copy = ((struct int_tree_map *) *dslot)->to;
824
825 if (copy_name_p)
826 {
827 copy = duplicate_ssa_name (name, NULL);
828 nelt = XNEW (struct name_to_copy_elt);
829 nelt->version = idx;
830 nelt->new_name = copy;
831 nelt->field = NULL_TREE;
832 *slot = nelt;
833 }
834 else
835 {
836 gcc_assert (!slot);
837 copy = name;
838 }
839
840 replace_ssa_name_symbol (copy, var_copy);
841 return copy;
842 }
843
844 /* Finds the ssa names used in STMT that are defined outside the
845 region between ENTRY and EXIT and replaces such ssa names with
846 their duplicates. The duplicates are stored to NAME_COPIES. Base
847 decls of all ssa names used in STMT (including those defined in
848 LOOP) are replaced with the new temporary variables; the
849 replacement decls are stored in DECL_COPIES. */
850
851 static void
852 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
853 htab_t name_copies, htab_t decl_copies)
854 {
855 use_operand_p use;
856 def_operand_p def;
857 ssa_op_iter oi;
858 tree name, copy;
859 bool copy_name_p;
860
861 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
862 {
863 name = DEF_FROM_PTR (def);
864 gcc_assert (TREE_CODE (name) == SSA_NAME);
865 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
866 false);
867 gcc_assert (copy == name);
868 }
869
870 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
871 {
872 name = USE_FROM_PTR (use);
873 if (TREE_CODE (name) != SSA_NAME)
874 continue;
875
876 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
877 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
878 copy_name_p);
879 SET_USE (use, copy);
880 }
881 }
882
883 /* Finds the ssa names used in STMT that are defined outside the
884 region between ENTRY and EXIT and replaces such ssa names with
885 their duplicates. The duplicates are stored to NAME_COPIES. Base
886 decls of all ssa names used in STMT (including those defined in
887 LOOP) are replaced with the new temporary variables; the
888 replacement decls are stored in DECL_COPIES. */
889
890 static bool
891 separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
892 htab_t decl_copies)
893 {
894 use_operand_p use;
895 ssa_op_iter oi;
896 tree var, name;
897 struct int_tree_map ielt;
898 struct name_to_copy_elt elt;
899 void **slot, **dslot;
900
901 if (gimple_debug_bind_p (stmt))
902 var = gimple_debug_bind_get_var (stmt);
903 else if (gimple_debug_source_bind_p (stmt))
904 var = gimple_debug_source_bind_get_var (stmt);
905 else
906 return true;
907 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
908 return true;
909 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
910 ielt.uid = DECL_UID (var);
911 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
912 if (!dslot)
913 return true;
914 if (gimple_debug_bind_p (stmt))
915 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
916 else if (gimple_debug_source_bind_p (stmt))
917 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
918
919 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
920 {
921 name = USE_FROM_PTR (use);
922 if (TREE_CODE (name) != SSA_NAME)
923 continue;
924
925 elt.version = SSA_NAME_VERSION (name);
926 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
927 if (!slot)
928 {
929 gimple_debug_bind_reset_value (stmt);
930 update_stmt (stmt);
931 break;
932 }
933
934 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
935 }
936
937 return false;
938 }
939
940 /* Callback for htab_traverse. Adds a field corresponding to the reduction
941 specified in SLOT. The type is passed in DATA. */
942
943 static int
944 add_field_for_reduction (void **slot, void *data)
945 {
946
947 struct reduction_info *const red = (struct reduction_info *) *slot;
948 tree const type = (tree) data;
949 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
950 tree field = build_decl (gimple_location (red->reduc_stmt),
951 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
952
953 insert_field_into_struct (type, field);
954
955 red->field = field;
956
957 return 1;
958 }
959
960 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
961 described in SLOT. The type is passed in DATA. */
962
963 static int
964 add_field_for_name (void **slot, void *data)
965 {
966 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
967 tree type = (tree) data;
968 tree name = ssa_name (elt->version);
969 tree var = SSA_NAME_VAR (name);
970 tree field = build_decl (DECL_SOURCE_LOCATION (var),
971 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
972
973 insert_field_into_struct (type, field);
974 elt->field = field;
975
976 return 1;
977 }
978
979 /* Callback for htab_traverse. A local result is the intermediate result
980 computed by a single
981 thread, or the initial value in case no iteration was executed.
982 This function creates a phi node reflecting these values.
983 The phi's result will be stored in NEW_PHI field of the
984 reduction's data structure. */
985
986 static int
987 create_phi_for_local_result (void **slot, void *data)
988 {
989 struct reduction_info *const reduc = (struct reduction_info *) *slot;
990 const struct loop *const loop = (const struct loop *) data;
991 edge e;
992 gimple new_phi;
993 basic_block store_bb;
994 tree local_res;
995 source_location locus;
996
997 /* STORE_BB is the block where the phi
998 should be stored. It is the destination of the loop exit.
999 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1000 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1001
1002 /* STORE_BB has two predecessors. One coming from the loop
1003 (the reduction's result is computed at the loop),
1004 and another coming from a block preceding the loop,
1005 when no iterations
1006 are executed (the initial value should be taken). */
1007 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1008 e = EDGE_PRED (store_bb, 1);
1009 else
1010 e = EDGE_PRED (store_bb, 0);
1011 local_res
1012 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1013 NULL);
1014 locus = gimple_location (reduc->reduc_stmt);
1015 new_phi = create_phi_node (local_res, store_bb);
1016 SSA_NAME_DEF_STMT (local_res) = new_phi;
1017 add_phi_arg (new_phi, reduc->init, e, locus);
1018 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1019 FALLTHRU_EDGE (loop->latch), locus);
1020 reduc->new_phi = new_phi;
1021
1022 return 1;
1023 }
1024
1025 struct clsn_data
1026 {
1027 tree store;
1028 tree load;
1029
1030 basic_block store_bb;
1031 basic_block load_bb;
1032 };
1033
1034 /* Callback for htab_traverse. Create an atomic instruction for the
1035 reduction described in SLOT.
1036 DATA annotates the place in memory the atomic operation relates to,
1037 and the basic block it needs to be generated in. */
1038
1039 static int
1040 create_call_for_reduction_1 (void **slot, void *data)
1041 {
1042 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1043 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1044 gimple_stmt_iterator gsi;
1045 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1046 tree load_struct;
1047 basic_block bb;
1048 basic_block new_bb;
1049 edge e;
1050 tree t, addr, ref, x;
1051 tree tmp_load, name;
1052 gimple load;
1053
1054 load_struct = build_simple_mem_ref (clsn_data->load);
1055 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1056
1057 addr = build_addr (t, current_function_decl);
1058
1059 /* Create phi node. */
1060 bb = clsn_data->load_bb;
1061
1062 e = split_block (bb, t);
1063 new_bb = e->dest;
1064
1065 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1066 tmp_load = make_ssa_name (tmp_load, NULL);
1067 load = gimple_build_omp_atomic_load (tmp_load, addr);
1068 SSA_NAME_DEF_STMT (tmp_load) = load;
1069 gsi = gsi_start_bb (new_bb);
1070 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1071
1072 e = split_block (new_bb, load);
1073 new_bb = e->dest;
1074 gsi = gsi_start_bb (new_bb);
1075 ref = tmp_load;
1076 x = fold_build2 (reduc->reduction_code,
1077 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1078 PHI_RESULT (reduc->new_phi));
1079
1080 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1081 GSI_CONTINUE_LINKING);
1082
1083 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1084 return 1;
1085 }
1086
1087 /* Create the atomic operation at the join point of the threads.
1088 REDUCTION_LIST describes the reductions in the LOOP.
1089 LD_ST_DATA describes the shared data structure where
1090 shared data is stored in and loaded from. */
1091 static void
1092 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1093 struct clsn_data *ld_st_data)
1094 {
1095 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1096 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1097 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1098 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1099 }
1100
1101 /* Callback for htab_traverse. Loads the final reduction value at the
1102 join point of all threads, and inserts it in the right place. */
1103
1104 static int
1105 create_loads_for_reductions (void **slot, void *data)
1106 {
1107 struct reduction_info *const red = (struct reduction_info *) *slot;
1108 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1109 gimple stmt;
1110 gimple_stmt_iterator gsi;
1111 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1112 tree load_struct;
1113 tree name;
1114 tree x;
1115
1116 gsi = gsi_after_labels (clsn_data->load_bb);
1117 load_struct = build_simple_mem_ref (clsn_data->load);
1118 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1119 NULL_TREE);
1120
1121 x = load_struct;
1122 name = PHI_RESULT (red->keep_res);
1123 stmt = gimple_build_assign (name, x);
1124 SSA_NAME_DEF_STMT (name) = stmt;
1125
1126 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1127
1128 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1129 !gsi_end_p (gsi); gsi_next (&gsi))
1130 if (gsi_stmt (gsi) == red->keep_res)
1131 {
1132 remove_phi_node (&gsi, false);
1133 return 1;
1134 }
1135 gcc_unreachable ();
1136 }
1137
1138 /* Load the reduction result that was stored in LD_ST_DATA.
1139 REDUCTION_LIST describes the list of reductions that the
1140 loads should be generated for. */
1141 static void
1142 create_final_loads_for_reduction (htab_t reduction_list,
1143 struct clsn_data *ld_st_data)
1144 {
1145 gimple_stmt_iterator gsi;
1146 tree t;
1147 gimple stmt;
1148
1149 gsi = gsi_after_labels (ld_st_data->load_bb);
1150 t = build_fold_addr_expr (ld_st_data->store);
1151 stmt = gimple_build_assign (ld_st_data->load, t);
1152
1153 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1154 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1155
1156 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1157
1158 }
1159
1160 /* Callback for htab_traverse. Store the neutral value for the
1161 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1162 1 for MULT_EXPR, etc. into the reduction field.
1163 The reduction is specified in SLOT. The store information is
1164 passed in DATA. */
1165
1166 static int
1167 create_stores_for_reduction (void **slot, void *data)
1168 {
1169 struct reduction_info *const red = (struct reduction_info *) *slot;
1170 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1171 tree t;
1172 gimple stmt;
1173 gimple_stmt_iterator gsi;
1174 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1175
1176 gsi = gsi_last_bb (clsn_data->store_bb);
1177 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1178 stmt = gimple_build_assign (t, red->initial_value);
1179 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1180
1181 return 1;
1182 }
1183
1184 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1185 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1186 specified in SLOT. */
1187
1188 static int
1189 create_loads_and_stores_for_name (void **slot, void *data)
1190 {
1191 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1192 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1193 tree t;
1194 gimple stmt;
1195 gimple_stmt_iterator gsi;
1196 tree type = TREE_TYPE (elt->new_name);
1197 tree load_struct;
1198
1199 gsi = gsi_last_bb (clsn_data->store_bb);
1200 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1201 stmt = gimple_build_assign (t, ssa_name (elt->version));
1202 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1203
1204 gsi = gsi_last_bb (clsn_data->load_bb);
1205 load_struct = build_simple_mem_ref (clsn_data->load);
1206 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1207 stmt = gimple_build_assign (elt->new_name, t);
1208 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1209 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1210
1211 return 1;
1212 }
1213
1214 /* Moves all the variables used in LOOP and defined outside of it (including
1215 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1216 name) to a structure created for this purpose. The code
1217
1218 while (1)
1219 {
1220 use (a);
1221 use (b);
1222 }
1223
1224 is transformed this way:
1225
1226 bb0:
1227 old.a = a;
1228 old.b = b;
1229
1230 bb1:
1231 a' = new->a;
1232 b' = new->b;
1233 while (1)
1234 {
1235 use (a');
1236 use (b');
1237 }
1238
1239 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1240 pointer `new' is intentionally not initialized (the loop will be split to a
1241 separate function later, and `new' will be initialized from its arguments).
1242 LD_ST_DATA holds information about the shared data structure used to pass
1243 information among the threads. It is initialized here, and
1244 gen_parallel_loop will pass it to create_call_for_reduction that
1245 needs this information. REDUCTION_LIST describes the reductions
1246 in LOOP. */
1247
1248 static void
1249 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1250 tree *arg_struct, tree *new_arg_struct,
1251 struct clsn_data *ld_st_data)
1252
1253 {
1254 basic_block bb1 = split_edge (entry);
1255 basic_block bb0 = single_pred (bb1);
1256 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1257 name_to_copy_elt_eq, free);
1258 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1259 free);
1260 unsigned i;
1261 tree type, type_name, nvar;
1262 gimple_stmt_iterator gsi;
1263 struct clsn_data clsn_data;
1264 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1265 basic_block bb;
1266 basic_block entry_bb = bb1;
1267 basic_block exit_bb = exit->dest;
1268 bool has_debug_stmt = false;
1269
1270 entry = single_succ_edge (entry_bb);
1271 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1272
1273 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1274 {
1275 if (bb != entry_bb && bb != exit_bb)
1276 {
1277 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1278 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1279 name_copies, decl_copies);
1280
1281 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282 {
1283 gimple stmt = gsi_stmt (gsi);
1284
1285 if (is_gimple_debug (stmt))
1286 has_debug_stmt = true;
1287 else
1288 separate_decls_in_region_stmt (entry, exit, stmt,
1289 name_copies, decl_copies);
1290 }
1291 }
1292 }
1293
1294 /* Now process debug bind stmts. We must not create decls while
1295 processing debug stmts, so we defer their processing so as to
1296 make sure we will have debug info for as many variables as
1297 possible (all of those that were dealt with in the loop above),
1298 and discard those for which we know there's nothing we can
1299 do. */
1300 if (has_debug_stmt)
1301 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1302 if (bb != entry_bb && bb != exit_bb)
1303 {
1304 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1305 {
1306 gimple stmt = gsi_stmt (gsi);
1307
1308 if (is_gimple_debug (stmt))
1309 {
1310 if (separate_decls_in_region_debug (stmt, name_copies,
1311 decl_copies))
1312 {
1313 gsi_remove (&gsi, true);
1314 continue;
1315 }
1316 }
1317
1318 gsi_next (&gsi);
1319 }
1320 }
1321
1322 VEC_free (basic_block, heap, body);
1323
1324 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1325 {
1326 /* It may happen that there is nothing to copy (if there are only
1327 loop carried and external variables in the loop). */
1328 *arg_struct = NULL;
1329 *new_arg_struct = NULL;
1330 }
1331 else
1332 {
1333 /* Create the type for the structure to store the ssa names to. */
1334 type = lang_hooks.types.make_type (RECORD_TYPE);
1335 type_name = build_decl (UNKNOWN_LOCATION,
1336 TYPE_DECL, create_tmp_var_name (".paral_data"),
1337 type);
1338 TYPE_NAME (type) = type_name;
1339
1340 htab_traverse (name_copies, add_field_for_name, type);
1341 if (reduction_list && htab_elements (reduction_list) > 0)
1342 {
1343 /* Create the fields for reductions. */
1344 htab_traverse (reduction_list, add_field_for_reduction,
1345 type);
1346 }
1347 layout_type (type);
1348
1349 /* Create the loads and stores. */
1350 *arg_struct = create_tmp_var (type, ".paral_data_store");
1351 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1352 *new_arg_struct = make_ssa_name (nvar, NULL);
1353
1354 ld_st_data->store = *arg_struct;
1355 ld_st_data->load = *new_arg_struct;
1356 ld_st_data->store_bb = bb0;
1357 ld_st_data->load_bb = bb1;
1358
1359 htab_traverse (name_copies, create_loads_and_stores_for_name,
1360 ld_st_data);
1361
1362 /* Load the calculation from memory (after the join of the threads). */
1363
1364 if (reduction_list && htab_elements (reduction_list) > 0)
1365 {
1366 htab_traverse (reduction_list, create_stores_for_reduction,
1367 ld_st_data);
1368 clsn_data.load = make_ssa_name (nvar, NULL);
1369 clsn_data.load_bb = exit->dest;
1370 clsn_data.store = ld_st_data->store;
1371 create_final_loads_for_reduction (reduction_list, &clsn_data);
1372 }
1373 }
1374
1375 htab_delete (decl_copies);
1376 htab_delete (name_copies);
1377 }
1378
1379 /* Bitmap containing uids of functions created by parallelization. We cannot
1380 allocate it from the default obstack, as it must live across compilation
1381 of several functions; we make it gc allocated instead. */
1382
1383 static GTY(()) bitmap parallelized_functions;
1384
1385 /* Returns true if FN was created by create_loop_fn. */
1386
1387 bool
1388 parallelized_function_p (tree fn)
1389 {
1390 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1391 return false;
1392
1393 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1394 }
1395
1396 /* Creates and returns an empty function that will receive the body of
1397 a parallelized loop. */
1398
1399 static tree
1400 create_loop_fn (location_t loc)
1401 {
1402 char buf[100];
1403 char *tname;
1404 tree decl, type, name, t;
1405 struct function *act_cfun = cfun;
1406 static unsigned loopfn_num;
1407
1408 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1409 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1410 clean_symbol_name (tname);
1411 name = get_identifier (tname);
1412 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1413
1414 decl = build_decl (loc, FUNCTION_DECL, name, type);
1415 if (!parallelized_functions)
1416 parallelized_functions = BITMAP_GGC_ALLOC ();
1417 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1418
1419 TREE_STATIC (decl) = 1;
1420 TREE_USED (decl) = 1;
1421 DECL_ARTIFICIAL (decl) = 1;
1422 DECL_IGNORED_P (decl) = 0;
1423 TREE_PUBLIC (decl) = 0;
1424 DECL_UNINLINABLE (decl) = 1;
1425 DECL_EXTERNAL (decl) = 0;
1426 DECL_CONTEXT (decl) = NULL_TREE;
1427 DECL_INITIAL (decl) = make_node (BLOCK);
1428
1429 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1430 DECL_ARTIFICIAL (t) = 1;
1431 DECL_IGNORED_P (t) = 1;
1432 DECL_RESULT (decl) = t;
1433
1434 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1435 ptr_type_node);
1436 DECL_ARTIFICIAL (t) = 1;
1437 DECL_ARG_TYPE (t) = ptr_type_node;
1438 DECL_CONTEXT (t) = decl;
1439 TREE_USED (t) = 1;
1440 DECL_ARGUMENTS (decl) = t;
1441
1442 allocate_struct_function (decl, false);
1443
1444 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1445 it. */
1446 set_cfun (act_cfun);
1447
1448 return decl;
1449 }
1450
1451 /* Moves the exit condition of LOOP to the beginning of its header, and
1452 duplicates the part of the last iteration that gets disabled to the
1453 exit of the loop. NIT is the number of iterations of the loop
1454 (used to initialize the variables in the duplicated part).
1455
1456 TODO: the common case is that latch of the loop is empty and immediately
1457 follows the loop exit. In this case, it would be better not to copy the
1458 body of the loop, but only move the entry of the loop directly before the
1459 exit check and increase the number of iterations of the loop by one.
1460 This may need some additional preconditioning in case NIT = ~0.
1461 REDUCTION_LIST describes the reductions in LOOP. */
1462
1463 static void
1464 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1465 {
1466 basic_block *bbs, *nbbs, ex_bb, orig_header;
1467 unsigned n;
1468 bool ok;
1469 edge exit = single_dom_exit (loop), hpred;
1470 tree control, control_name, res, t;
1471 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1472 gimple_stmt_iterator gsi;
1473 tree nit_1;
1474
1475 split_block_after_labels (loop->header);
1476 orig_header = single_succ (loop->header);
1477 hpred = single_succ_edge (loop->header);
1478
1479 cond_stmt = last_stmt (exit->src);
1480 control = gimple_cond_lhs (cond_stmt);
1481 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1482
1483 /* Make sure that we have phi nodes on exit for all loop header phis
1484 (create_parallel_loop requires that). */
1485 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1486 {
1487 phi = gsi_stmt (gsi);
1488 res = PHI_RESULT (phi);
1489 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1490 SET_PHI_RESULT (phi, t);
1491 nphi = create_phi_node (res, orig_header);
1492 SSA_NAME_DEF_STMT (res) = nphi;
1493 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1494
1495 if (res == control)
1496 {
1497 gimple_cond_set_lhs (cond_stmt, t);
1498 update_stmt (cond_stmt);
1499 control = t;
1500 }
1501 }
1502
1503 bbs = get_loop_body_in_dom_order (loop);
1504
1505 for (n = 0; bbs[n] != exit->src; n++)
1506 continue;
1507 nbbs = XNEWVEC (basic_block, n);
1508 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1509 bbs + 1, n, nbbs);
1510 gcc_assert (ok);
1511 free (bbs);
1512 ex_bb = nbbs[0];
1513 free (nbbs);
1514
1515 /* Other than reductions, the only gimple reg that should be copied
1516 out of the loop is the control variable. */
1517 exit = single_dom_exit (loop);
1518 control_name = NULL_TREE;
1519 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1520 {
1521 phi = gsi_stmt (gsi);
1522 res = PHI_RESULT (phi);
1523 if (!is_gimple_reg (res))
1524 {
1525 gsi_next (&gsi);
1526 continue;
1527 }
1528
1529 /* Check if it is a part of reduction. If it is,
1530 keep the phi at the reduction's keep_res field. The
1531 PHI_RESULT of this phi is the resulting value of the reduction
1532 variable when exiting the loop. */
1533
1534 if (htab_elements (reduction_list) > 0)
1535 {
1536 struct reduction_info *red;
1537
1538 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1539 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1540 if (red)
1541 {
1542 red->keep_res = phi;
1543 gsi_next (&gsi);
1544 continue;
1545 }
1546 }
1547 gcc_assert (control_name == NULL_TREE
1548 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1549 control_name = res;
1550 remove_phi_node (&gsi, false);
1551 }
1552 gcc_assert (control_name != NULL_TREE);
1553
1554 /* Initialize the control variable to number of iterations
1555 according to the rhs of the exit condition. */
1556 gsi = gsi_after_labels (ex_bb);
1557 cond_nit = last_stmt (exit->src);
1558 nit_1 = gimple_cond_rhs (cond_nit);
1559 nit_1 = force_gimple_operand_gsi (&gsi,
1560 fold_convert (TREE_TYPE (control_name), nit_1),
1561 false, NULL_TREE, false, GSI_SAME_STMT);
1562 stmt = gimple_build_assign (control_name, nit_1);
1563 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1564 SSA_NAME_DEF_STMT (control_name) = stmt;
1565 }
1566
1567 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1568 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1569 NEW_DATA is the variable that should be initialized from the argument
1570 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1571 basic block containing GIMPLE_OMP_PARALLEL tree. */
1572
1573 static basic_block
1574 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1575 tree new_data, unsigned n_threads, location_t loc)
1576 {
1577 gimple_stmt_iterator gsi;
1578 basic_block bb, paral_bb, for_bb, ex_bb;
1579 tree t, param;
1580 gimple stmt, for_stmt, phi, cond_stmt;
1581 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1582 edge exit, nexit, guard, end, e;
1583
1584 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1585 bb = loop_preheader_edge (loop)->src;
1586 paral_bb = single_pred (bb);
1587 gsi = gsi_last_bb (paral_bb);
1588
1589 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1590 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1591 = build_int_cst (integer_type_node, n_threads);
1592 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1593 gimple_set_location (stmt, loc);
1594
1595 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1596
1597 /* Initialize NEW_DATA. */
1598 if (data)
1599 {
1600 gsi = gsi_after_labels (bb);
1601
1602 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1603 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1604 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1605 SSA_NAME_DEF_STMT (param) = stmt;
1606
1607 stmt = gimple_build_assign (new_data,
1608 fold_convert (TREE_TYPE (new_data), param));
1609 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1610 SSA_NAME_DEF_STMT (new_data) = stmt;
1611 }
1612
1613 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1614 bb = split_loop_exit_edge (single_dom_exit (loop));
1615 gsi = gsi_last_bb (bb);
1616 stmt = gimple_build_omp_return (false);
1617 gimple_set_location (stmt, loc);
1618 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1619
1620 /* Extract data for GIMPLE_OMP_FOR. */
1621 gcc_assert (loop->header == single_dom_exit (loop)->src);
1622 cond_stmt = last_stmt (loop->header);
1623
1624 cvar = gimple_cond_lhs (cond_stmt);
1625 cvar_base = SSA_NAME_VAR (cvar);
1626 phi = SSA_NAME_DEF_STMT (cvar);
1627 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1628 initvar = make_ssa_name (cvar_base, NULL);
1629 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1630 initvar);
1631 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1632
1633 gsi = gsi_last_nondebug_bb (loop->latch);
1634 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1635 gsi_remove (&gsi, true);
1636
1637 /* Prepare cfg. */
1638 for_bb = split_edge (loop_preheader_edge (loop));
1639 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1640 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1641 gcc_assert (exit == single_dom_exit (loop));
1642
1643 guard = make_edge (for_bb, ex_bb, 0);
1644 single_succ_edge (loop->latch)->flags = 0;
1645 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1646 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1647 {
1648 source_location locus;
1649 tree def;
1650 phi = gsi_stmt (gsi);
1651 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1652
1653 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1654 locus = gimple_phi_arg_location_from_edge (stmt,
1655 loop_preheader_edge (loop));
1656 add_phi_arg (phi, def, guard, locus);
1657
1658 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1659 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1660 add_phi_arg (phi, def, end, locus);
1661 }
1662 e = redirect_edge_and_branch (exit, nexit->dest);
1663 PENDING_STMT (e) = NULL;
1664
1665 /* Emit GIMPLE_OMP_FOR. */
1666 gimple_cond_set_lhs (cond_stmt, cvar_base);
1667 type = TREE_TYPE (cvar);
1668 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1669 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1670
1671 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1672 gimple_set_location (for_stmt, loc);
1673 gimple_omp_for_set_index (for_stmt, 0, initvar);
1674 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1675 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1676 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1677 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1678 cvar_base,
1679 build_int_cst (type, 1)));
1680
1681 gsi = gsi_last_bb (for_bb);
1682 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1683 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1684
1685 /* Emit GIMPLE_OMP_CONTINUE. */
1686 gsi = gsi_last_bb (loop->latch);
1687 stmt = gimple_build_omp_continue (cvar_next, cvar);
1688 gimple_set_location (stmt, loc);
1689 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1690 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1691
1692 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1693 gsi = gsi_last_bb (ex_bb);
1694 stmt = gimple_build_omp_return (true);
1695 gimple_set_location (stmt, loc);
1696 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1697
1698 /* After the above dom info is hosed. Re-compute it. */
1699 free_dominance_info (CDI_DOMINATORS);
1700 calculate_dominance_info (CDI_DOMINATORS);
1701
1702 return paral_bb;
1703 }
1704
1705 /* Generates code to execute the iterations of LOOP in N_THREADS
1706 threads in parallel.
1707
1708 NITER describes number of iterations of LOOP.
1709 REDUCTION_LIST describes the reductions existent in the LOOP. */
1710
1711 static void
1712 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1713 unsigned n_threads, struct tree_niter_desc *niter)
1714 {
1715 loop_iterator li;
1716 tree many_iterations_cond, type, nit;
1717 tree arg_struct, new_arg_struct;
1718 gimple_seq stmts;
1719 basic_block parallel_head;
1720 edge entry, exit;
1721 struct clsn_data clsn_data;
1722 unsigned prob;
1723 location_t loc;
1724 gimple cond_stmt;
1725 unsigned int m_p_thread=2;
1726
1727 /* From
1728
1729 ---------------------------------------------------------------------
1730 loop
1731 {
1732 IV = phi (INIT, IV + STEP)
1733 BODY1;
1734 if (COND)
1735 break;
1736 BODY2;
1737 }
1738 ---------------------------------------------------------------------
1739
1740 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1741 we generate the following code:
1742
1743 ---------------------------------------------------------------------
1744
1745 if (MAY_BE_ZERO
1746 || NITER < MIN_PER_THREAD * N_THREADS)
1747 goto original;
1748
1749 BODY1;
1750 store all local loop-invariant variables used in body of the loop to DATA.
1751 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1752 load the variables from DATA.
1753 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1754 BODY2;
1755 BODY1;
1756 GIMPLE_OMP_CONTINUE;
1757 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1758 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1759 goto end;
1760
1761 original:
1762 loop
1763 {
1764 IV = phi (INIT, IV + STEP)
1765 BODY1;
1766 if (COND)
1767 break;
1768 BODY2;
1769 }
1770
1771 end:
1772
1773 */
1774
1775 /* Create two versions of the loop -- in the old one, we know that the
1776 number of iterations is large enough, and we will transform it into the
1777 loop that will be split to loop_fn, the new one will be used for the
1778 remaining iterations. */
1779
1780 /* We should compute a better number-of-iterations value for outer loops.
1781 That is, if we have
1782
1783 for (i = 0; i < n; ++i)
1784 for (j = 0; j < m; ++j)
1785 ...
1786
1787 we should compute nit = n * m, not nit = n.
1788 Also may_be_zero handling would need to be adjusted. */
1789
1790 type = TREE_TYPE (niter->niter);
1791 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1792 NULL_TREE);
1793 if (stmts)
1794 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1795
1796 if (loop->inner)
1797 m_p_thread=2;
1798 else
1799 m_p_thread=MIN_PER_THREAD;
1800
1801 many_iterations_cond =
1802 fold_build2 (GE_EXPR, boolean_type_node,
1803 nit, build_int_cst (type, m_p_thread * n_threads));
1804
1805 many_iterations_cond
1806 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1807 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1808 many_iterations_cond);
1809 many_iterations_cond
1810 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1811 if (stmts)
1812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1813 if (!is_gimple_condexpr (many_iterations_cond))
1814 {
1815 many_iterations_cond
1816 = force_gimple_operand (many_iterations_cond, &stmts,
1817 true, NULL_TREE);
1818 if (stmts)
1819 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1820 }
1821
1822 initialize_original_copy_tables ();
1823
1824 /* We assume that the loop usually iterates a lot. */
1825 prob = 4 * REG_BR_PROB_BASE / 5;
1826 loop_version (loop, many_iterations_cond, NULL,
1827 prob, prob, REG_BR_PROB_BASE - prob, true);
1828 update_ssa (TODO_update_ssa);
1829 free_original_copy_tables ();
1830
1831 /* Base all the induction variables in LOOP on a single control one. */
1832 canonicalize_loop_ivs (loop, &nit, true);
1833
1834 /* Ensure that the exit condition is the first statement in the loop. */
1835 transform_to_exit_first_loop (loop, reduction_list, nit);
1836
1837 /* Generate initializations for reductions. */
1838 if (htab_elements (reduction_list) > 0)
1839 htab_traverse (reduction_list, initialize_reductions, loop);
1840
1841 /* Eliminate the references to local variables from the loop. */
1842 gcc_assert (single_exit (loop));
1843 entry = loop_preheader_edge (loop);
1844 exit = single_dom_exit (loop);
1845
1846 eliminate_local_variables (entry, exit);
1847 /* In the old loop, move all variables non-local to the loop to a structure
1848 and back, and create separate decls for the variables used in loop. */
1849 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1850 &new_arg_struct, &clsn_data);
1851
1852 /* Create the parallel constructs. */
1853 loc = UNKNOWN_LOCATION;
1854 cond_stmt = last_stmt (loop->header);
1855 if (cond_stmt)
1856 loc = gimple_location (cond_stmt);
1857 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1858 new_arg_struct, n_threads, loc);
1859 if (htab_elements (reduction_list) > 0)
1860 create_call_for_reduction (loop, reduction_list, &clsn_data);
1861
1862 scev_reset ();
1863
1864 /* Cancel the loop (it is simpler to do it here rather than to teach the
1865 expander to do it). */
1866 cancel_loop_tree (loop);
1867
1868 /* Free loop bound estimations that could contain references to
1869 removed statements. */
1870 FOR_EACH_LOOP (li, loop, 0)
1871 free_numbers_of_iterations_estimates_loop (loop);
1872
1873 /* Expand the parallel constructs. We do it directly here instead of running
1874 a separate expand_omp pass, since it is more efficient, and less likely to
1875 cause troubles with further analyses not being able to deal with the
1876 OMP trees. */
1877
1878 omp_expand_local (parallel_head);
1879 }
1880
1881 /* Returns true when LOOP contains vector phi nodes. */
1882
1883 static bool
1884 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1885 {
1886 unsigned i;
1887 basic_block *bbs = get_loop_body_in_dom_order (loop);
1888 gimple_stmt_iterator gsi;
1889 bool res = true;
1890
1891 for (i = 0; i < loop->num_nodes; i++)
1892 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1893 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1894 goto end;
1895
1896 res = false;
1897 end:
1898 free (bbs);
1899 return res;
1900 }
1901
1902 /* Create a reduction_info struct, initialize it with REDUC_STMT
1903 and PHI, insert it to the REDUCTION_LIST. */
1904
1905 static void
1906 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1907 {
1908 PTR *slot;
1909 struct reduction_info *new_reduction;
1910
1911 gcc_assert (reduc_stmt);
1912
1913 if (dump_file && (dump_flags & TDF_DETAILS))
1914 {
1915 fprintf (dump_file,
1916 "Detected reduction. reduction stmt is: \n");
1917 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1918 fprintf (dump_file, "\n");
1919 }
1920
1921 new_reduction = XCNEW (struct reduction_info);
1922
1923 new_reduction->reduc_stmt = reduc_stmt;
1924 new_reduction->reduc_phi = phi;
1925 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1926 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1927 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1928 *slot = new_reduction;
1929 }
1930
1931 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1932
1933 static int
1934 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1935 {
1936 struct reduction_info *const red = (struct reduction_info *) *slot;
1937 gimple_set_uid (red->reduc_phi, red->reduc_version);
1938 return 1;
1939 }
1940
1941 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1942
1943 static void
1944 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1945 {
1946 gimple_stmt_iterator gsi;
1947 loop_vec_info simple_loop_info;
1948
1949 vect_dump = NULL;
1950 simple_loop_info = vect_analyze_loop_form (loop);
1951
1952 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1953 {
1954 gimple phi = gsi_stmt (gsi);
1955 affine_iv iv;
1956 tree res = PHI_RESULT (phi);
1957 bool double_reduc;
1958
1959 if (!is_gimple_reg (res))
1960 continue;
1961
1962 if (!simple_iv (loop, loop, res, &iv, true)
1963 && simple_loop_info)
1964 {
1965 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1966 phi, true,
1967 &double_reduc);
1968 if (reduc_stmt && !double_reduc)
1969 build_new_reduction (reduction_list, reduc_stmt, phi);
1970 }
1971 }
1972 destroy_loop_vec_info (simple_loop_info, true);
1973
1974 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1975 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1976 only now. */
1977 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1978 }
1979
1980 /* Try to initialize NITER for code generation part. */
1981
1982 static bool
1983 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1984 {
1985 edge exit = single_dom_exit (loop);
1986
1987 gcc_assert (exit);
1988
1989 /* We need to know # of iterations, and there should be no uses of values
1990 defined inside loop outside of it, unless the values are invariants of
1991 the loop. */
1992 if (!number_of_iterations_exit (loop, exit, niter, false))
1993 {
1994 if (dump_file && (dump_flags & TDF_DETAILS))
1995 fprintf (dump_file, " FAILED: number of iterations not known\n");
1996 return false;
1997 }
1998
1999 return true;
2000 }
2001
2002 /* Try to initialize REDUCTION_LIST for code generation part.
2003 REDUCTION_LIST describes the reductions. */
2004
2005 static bool
2006 try_create_reduction_list (loop_p loop, htab_t reduction_list)
2007 {
2008 edge exit = single_dom_exit (loop);
2009 gimple_stmt_iterator gsi;
2010
2011 gcc_assert (exit);
2012
2013 gather_scalar_reductions (loop, reduction_list);
2014
2015
2016 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2017 {
2018 gimple phi = gsi_stmt (gsi);
2019 struct reduction_info *red;
2020 imm_use_iterator imm_iter;
2021 use_operand_p use_p;
2022 gimple reduc_phi;
2023 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2024
2025 if (is_gimple_reg (val))
2026 {
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 {
2029 fprintf (dump_file, "phi is ");
2030 print_gimple_stmt (dump_file, phi, 0, 0);
2031 fprintf (dump_file, "arg of phi to exit: value ");
2032 print_generic_expr (dump_file, val, 0);
2033 fprintf (dump_file, " used outside loop\n");
2034 fprintf (dump_file,
2035 " checking if it a part of reduction pattern: \n");
2036 }
2037 if (htab_elements (reduction_list) == 0)
2038 {
2039 if (dump_file && (dump_flags & TDF_DETAILS))
2040 fprintf (dump_file,
2041 " FAILED: it is not a part of reduction.\n");
2042 return false;
2043 }
2044 reduc_phi = NULL;
2045 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2046 {
2047 if (!gimple_debug_bind_p (USE_STMT (use_p))
2048 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2049 {
2050 reduc_phi = USE_STMT (use_p);
2051 break;
2052 }
2053 }
2054 red = reduction_phi (reduction_list, reduc_phi);
2055 if (red == NULL)
2056 {
2057 if (dump_file && (dump_flags & TDF_DETAILS))
2058 fprintf (dump_file,
2059 " FAILED: it is not a part of reduction.\n");
2060 return false;
2061 }
2062 if (dump_file && (dump_flags & TDF_DETAILS))
2063 {
2064 fprintf (dump_file, "reduction phi is ");
2065 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2066 fprintf (dump_file, "reduction stmt is ");
2067 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2068 }
2069 }
2070 }
2071
2072 /* The iterations of the loop may communicate only through bivs whose
2073 iteration space can be distributed efficiently. */
2074 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2075 {
2076 gimple phi = gsi_stmt (gsi);
2077 tree def = PHI_RESULT (phi);
2078 affine_iv iv;
2079
2080 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2081 {
2082 struct reduction_info *red;
2083
2084 red = reduction_phi (reduction_list, phi);
2085 if (red == NULL)
2086 {
2087 if (dump_file && (dump_flags & TDF_DETAILS))
2088 fprintf (dump_file,
2089 " FAILED: scalar dependency between iterations\n");
2090 return false;
2091 }
2092 }
2093 }
2094
2095
2096 return true;
2097 }
2098
2099 /* Detect parallel loops and generate parallel code using libgomp
2100 primitives. Returns true if some loop was parallelized, false
2101 otherwise. */
2102
2103 bool
2104 parallelize_loops (void)
2105 {
2106 unsigned n_threads = flag_tree_parallelize_loops;
2107 bool changed = false;
2108 struct loop *loop;
2109 struct tree_niter_desc niter_desc;
2110 loop_iterator li;
2111 htab_t reduction_list;
2112 struct obstack parloop_obstack;
2113 HOST_WIDE_INT estimated;
2114 LOC loop_loc;
2115
2116 /* Do not parallelize loops in the functions created by parallelization. */
2117 if (parallelized_function_p (cfun->decl))
2118 return false;
2119 if (cfun->has_nonlocal_label)
2120 return false;
2121
2122 gcc_obstack_init (&parloop_obstack);
2123 reduction_list = htab_create (10, reduction_info_hash,
2124 reduction_info_eq, free);
2125 init_stmt_vec_info_vec ();
2126
2127 FOR_EACH_LOOP (li, loop, 0)
2128 {
2129 htab_empty (reduction_list);
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 {
2132 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2133 if (loop->inner)
2134 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2135 else
2136 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2137 }
2138
2139 /* If we use autopar in graphite pass, we use its marked dependency
2140 checking results. */
2141 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2142 {
2143 if (dump_file && (dump_flags & TDF_DETAILS))
2144 fprintf (dump_file, "loop is not parallel according to graphite\n");
2145 continue;
2146 }
2147
2148 if (!single_dom_exit (loop))
2149 {
2150
2151 if (dump_file && (dump_flags & TDF_DETAILS))
2152 fprintf (dump_file, "loop is !single_dom_exit\n");
2153
2154 continue;
2155 }
2156
2157 if (/* And of course, the loop must be parallelizable. */
2158 !can_duplicate_loop_p (loop)
2159 || loop_has_blocks_with_irreducible_flag (loop)
2160 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2161 /* FIXME: the check for vector phi nodes could be removed. */
2162 || loop_has_vector_phi_nodes (loop))
2163 continue;
2164
2165 estimated = estimated_stmt_executions_int (loop);
2166 if (estimated == -1)
2167 estimated = max_stmt_executions_int (loop);
2168 /* FIXME: Bypass this check as graphite doesn't update the
2169 count and frequency correctly now. */
2170 if (!flag_loop_parallelize_all
2171 && ((estimated != -1
2172 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2173 /* Do not bother with loops in cold areas. */
2174 || optimize_loop_nest_for_size_p (loop)))
2175 continue;
2176
2177 if (!try_get_loop_niter (loop, &niter_desc))
2178 continue;
2179
2180 if (!try_create_reduction_list (loop, reduction_list))
2181 continue;
2182
2183 if (!flag_loop_parallelize_all
2184 && !loop_parallel_p (loop, &parloop_obstack))
2185 continue;
2186
2187 changed = true;
2188 if (dump_file && (dump_flags & TDF_DETAILS))
2189 {
2190 if (loop->inner)
2191 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2192 else
2193 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2194 loop_loc = find_loop_location (loop);
2195 if (loop_loc != UNKNOWN_LOC)
2196 fprintf (dump_file, "\nloop at %s:%d: ",
2197 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2198 }
2199 gen_parallel_loop (loop, reduction_list,
2200 n_threads, &niter_desc);
2201 #ifdef ENABLE_CHECKING
2202 verify_flow_info ();
2203 verify_loop_structure ();
2204 verify_loop_closed_ssa (true);
2205 #endif
2206 }
2207
2208 free_stmt_vec_info_vec ();
2209 htab_delete (reduction_list);
2210 obstack_free (&parloop_obstack, NULL);
2211
2212 /* Parallelization will cause new function calls to be inserted through
2213 which local variables will escape. Reset the points-to solution
2214 for ESCAPED. */
2215 if (changed)
2216 pt_solution_reset (&cfun->gimple_df->escaped);
2217
2218 return changed;
2219 }
2220
2221 #include "gt-tree-parloops.h"