re PR bootstrap/80867 (gnat bootstrap broken on powerpc64le-linux-gnu with -O3)
[gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
107 };
108
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
111
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
116
117 /* Return true if REGION is a combined parallel+workshare region. */
118
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
121 {
122 return region->is_combined_parallel;
123 }
124
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
135
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
138
139 Is lowered into:
140
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
150
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
155
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
161
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
166
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 {
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
172
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
175
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
184
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
195
196 return true;
197 }
198
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
201
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 {
205 if (!simd_schedule)
206 return chunk_size;
207
208 poly_uint64 vf = omp_max_vf ();
209 if (known_eq (vf, 1U))
210 return chunk_size;
211
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
217 }
218
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
222
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 {
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
229
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 {
232 struct omp_for_data fd;
233 tree n1, n2;
234
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
238
239 if (gimple_omp_for_combined_into_p (for_stmt))
240 {
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
250 }
251
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
256
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
259
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
262
263 if (fd.chunk_size)
264 {
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
268 }
269
270 return ws_args;
271 }
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 {
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
282 }
283
284 gcc_unreachable ();
285 }
286
287 /* Discover whether REGION is a combined parallel+workshare region. */
288
289 static void
290 determine_parallel_type (struct omp_region *region)
291 {
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
294
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
299
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
305
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
312
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
319 {
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
322
323 if (region->inner->type == GIMPLE_OMP_FOR)
324 {
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 {
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
344 }
345 }
346
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350 }
351 }
352
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
357
358 /* Dump the parallel region tree rooted at REGION. */
359
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 {
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
365
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
368
369 if (region->cont)
370 {
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
373 }
374
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
380
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
383 }
384
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
387 {
388 dump_omp_region (stderr, region, 0);
389 }
390
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
393 {
394 dump_omp_region (stderr, root_omp_region, 0);
395 }
396
397 /* Create a new parallel region starting at STMT inside region PARENT. */
398
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
402 {
403 struct omp_region *region = XCNEW (struct omp_region);
404
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
408
409 if (parent)
410 {
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
415 }
416 else
417 {
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
422 }
423
424 return region;
425 }
426
427 /* Release the memory associated with the region tree rooted at REGION. */
428
429 static void
430 free_omp_region_1 (struct omp_region *region)
431 {
432 struct omp_region *i, *n;
433
434 for (i = region->inner; i ; i = n)
435 {
436 n = i->next;
437 free_omp_region_1 (i);
438 }
439
440 free (region);
441 }
442
443 /* Release the memory for the entire omp region tree. */
444
445 void
446 omp_free_regions (void)
447 {
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
450 {
451 n = r->next;
452 free_omp_region_1 (r);
453 }
454 root_omp_region = NULL;
455 }
456
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
459
460 static gcond *
461 gimple_build_cond_empty (tree cond)
462 {
463 enum tree_code pred_code;
464 tree lhs, rhs;
465
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468 }
469
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
472
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 {
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
478 {
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
482 {
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
485
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
491 }
492 }
493
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
497
498 return false;
499 }
500
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
505
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
508
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
511 {
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
513 {
514 tree b = BLOCK_SUPERCONTEXT (entry_block);
515
516 if (TREE_CODE (b) == BLOCK)
517 {
518 tree parent_fndecl;
519
520 /* Follow supercontext chain until the parent fndecl
521 is found. */
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 TREE_CODE (parent_fndecl) == BLOCK;
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
525 ;
526
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
528
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 BLOCK_VARS (b) = child_fndecl;
533 }
534 }
535 }
536
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
543
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 gomp_parallel *entry_stmt,
547 vec<tree, va_gc> *ws_args)
548 {
549 tree t, t1, t2, val, cond, c, clauses, flags;
550 gimple_stmt_iterator gsi;
551 gimple *stmt;
552 enum built_in_function start_ix;
553 int start_ix2;
554 location_t clause_loc;
555 vec<tree, va_gc> *args;
556
557 clauses = gimple_omp_parallel_clauses (entry_stmt);
558
559 /* Determine what flavor of GOMP_parallel we will be
560 emitting. */
561 start_ix = BUILT_IN_GOMP_PARALLEL;
562 if (is_combined_parallel (region))
563 {
564 switch (region->inner->type)
565 {
566 case GIMPLE_OMP_FOR:
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 switch (region->inner->sched_kind)
569 {
570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 start_ix2 = 3;
572 break;
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 case OMP_CLAUSE_SCHEDULE_GUIDED:
575 if (region->inner->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
577 {
578 start_ix2 = 3 + region->inner->sched_kind;
579 break;
580 }
581 /* FALLTHRU */
582 default:
583 start_ix2 = region->inner->sched_kind;
584 break;
585 }
586 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 start_ix = (enum built_in_function) start_ix2;
588 break;
589 case GIMPLE_OMP_SECTIONS:
590 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 break;
592 default:
593 gcc_unreachable ();
594 }
595 }
596
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
599 cond = NULL_TREE;
600 val = build_int_cst (unsigned_type_node, 0);
601 flags = build_int_cst (unsigned_type_node, 0);
602
603 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604 if (c)
605 cond = OMP_CLAUSE_IF_EXPR (c);
606
607 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608 if (c)
609 {
610 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611 clause_loc = OMP_CLAUSE_LOCATION (c);
612 }
613 else
614 clause_loc = gimple_location (entry_stmt);
615
616 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617 if (c)
618 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
619
620 /* Ensure 'val' is of the correct type. */
621 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
622
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
625 if (cond)
626 {
627 cond = gimple_boolify (cond);
628
629 if (integer_zerop (val))
630 val = fold_build2_loc (clause_loc,
631 EQ_EXPR, unsigned_type_node, cond,
632 build_int_cst (TREE_TYPE (cond), 0));
633 else
634 {
635 basic_block cond_bb, then_bb, else_bb;
636 edge e, e_then, e_else;
637 tree tmp_then, tmp_else, tmp_join, tmp_var;
638
639 tmp_var = create_tmp_var (TREE_TYPE (val));
640 if (gimple_in_ssa_p (cfun))
641 {
642 tmp_then = make_ssa_name (tmp_var);
643 tmp_else = make_ssa_name (tmp_var);
644 tmp_join = make_ssa_name (tmp_var);
645 }
646 else
647 {
648 tmp_then = tmp_var;
649 tmp_else = tmp_var;
650 tmp_join = tmp_var;
651 }
652
653 e = split_block_after_labels (bb);
654 cond_bb = e->src;
655 bb = e->dest;
656 remove_edge (e);
657
658 then_bb = create_empty_bb (cond_bb);
659 else_bb = create_empty_bb (then_bb);
660 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
662
663 stmt = gimple_build_cond_empty (cond);
664 gsi = gsi_start_bb (cond_bb);
665 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
666
667 gsi = gsi_start_bb (then_bb);
668 expand_omp_build_assign (&gsi, tmp_then, val, true);
669
670 gsi = gsi_start_bb (else_bb);
671 expand_omp_build_assign (&gsi, tmp_else,
672 build_int_cst (unsigned_type_node, 1),
673 true);
674
675 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 add_bb_to_loop (then_bb, cond_bb->loop_father);
678 add_bb_to_loop (else_bb, cond_bb->loop_father);
679 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
681
682 if (gimple_in_ssa_p (cfun))
683 {
684 gphi *phi = create_phi_node (tmp_join, bb);
685 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
687 }
688
689 val = tmp_join;
690 }
691
692 gsi = gsi_start_bb (bb);
693 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 false, GSI_CONTINUE_LINKING);
695 }
696
697 gsi = gsi_last_nondebug_bb (bb);
698 t = gimple_omp_parallel_data_arg (entry_stmt);
699 if (t == NULL)
700 t1 = null_pointer_node;
701 else
702 t1 = build_fold_addr_expr (t);
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704 t2 = build_fold_addr_expr (child_fndecl);
705
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
707
708 vec_alloc (args, 4 + vec_safe_length (ws_args));
709 args->quick_push (t2);
710 args->quick_push (t1);
711 args->quick_push (val);
712 if (ws_args)
713 args->splice (*ws_args);
714 args->quick_push (flags);
715
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 builtin_decl_explicit (start_ix), args);
718
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 false, GSI_CONTINUE_LINKING);
721
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region))
724 {
725 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726 hsa_register_kernel (child_cnode);
727 }
728 }
729
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
732
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
736 {
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
740
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
742
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
749
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
754
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
760 {
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
779 {
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
782 {
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
785 }
786 else
787 num_tasks = integer_zero_node;
788 }
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
795 }
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
798
799 tree flags = build_int_cst (unsigned_type_node, iflags);
800
801 tree cond = boolean_true_node;
802 if (ifc)
803 {
804 if (taskloop_p)
805 {
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
813 }
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
816 }
817
818 if (finalc)
819 {
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
826 }
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
836
837 gsi = gsi_last_nondebug_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
849
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
864
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867 }
868
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
870
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
873 {
874 tree chain = NULL_TREE, t;
875 unsigned ix;
876
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
878 {
879 DECL_CHAIN (t) = chain;
880 chain = t;
881 }
882
883 return chain;
884 }
885
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
891
892 static void
893 remove_exit_barrier (struct omp_region *region)
894 {
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
901
902 exit_bb = region->exit;
903
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
908
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_nondebug_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev_nondebug (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
919
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
921 {
922 gsi = gsi_last_nondebug_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
928 {
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
938 {
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
944
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
948 {
949 any_addressable_vars = 1;
950 break;
951 }
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
957 {
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
962 {
963 any_addressable_vars = 1;
964 break;
965 }
966 if (block == gimple_block (parallel_stmt))
967 break;
968 }
969 }
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
972 }
973 }
974 }
975
976 static void
977 remove_exit_barriers (struct omp_region *region)
978 {
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
981
982 if (region->inner)
983 {
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
987 {
988 region = region->next;
989 remove_exit_barriers (region);
990 }
991 }
992 }
993
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1001
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1004 {
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1014
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1017 {
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1020
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1026 {
1027 tree built_in;
1028
1029 if (DECL_NAME (decl) == thr_num_id)
1030 {
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1036 }
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1041
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1045
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1048
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1053
1054 gimple_call_set_fndecl (call, built_in);
1055 }
1056 }
1057 }
1058
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1061
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1064 {
1065 tree t = *tp;
1066
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1070
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1073
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1076 }
1077
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1079
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1083 {
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1095 {
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1098 }
1099 }
1100
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1102
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1105 {
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1113
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1117
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1123
1124 if (is_combined_parallel (region))
1125 ws_args = region->ws_args;
1126 else
1127 ws_args = NULL;
1128
1129 if (child_cfun->cfg)
1130 {
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e, exit_succ_e;
1135
1136 entry_succ_e = single_succ_edge (entry_bb);
1137
1138 gsi = gsi_last_nondebug_bb (entry_bb);
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141 gsi_remove (&gsi, true);
1142
1143 new_bb = entry_bb;
1144 if (exit_bb)
1145 {
1146 exit_succ_e = single_succ_edge (exit_bb);
1147 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1148 }
1149 remove_edge_and_dominated_blocks (entry_succ_e);
1150 }
1151 else
1152 {
1153 unsigned srcidx, dstidx, num;
1154
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1161 function.
1162
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt))
1169 {
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 : FALLTHRU_EDGE (entry_bb)->dest;
1173 tree arg;
1174 gimple *parcopy_stmt = NULL;
1175
1176 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1177 {
1178 gimple *stmt;
1179
1180 gcc_assert (!gsi_end_p (gsi));
1181 stmt = gsi_stmt (gsi);
1182 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 continue;
1184
1185 if (gimple_num_ops (stmt) == 2)
1186 {
1187 tree arg = gimple_assign_rhs1 (stmt);
1188
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1191
1192 if (TREE_CODE (arg) == ADDR_EXPR
1193 && TREE_OPERAND (arg, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt))
1195 {
1196 parcopy_stmt = stmt;
1197 break;
1198 }
1199 }
1200 }
1201
1202 gcc_assert (parcopy_stmt != NULL);
1203 arg = DECL_ARGUMENTS (child_fn);
1204
1205 if (!gimple_in_ssa_p (cfun))
1206 {
1207 if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 gsi_remove (&gsi, true);
1209 else
1210 {
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1214 }
1215 }
1216 else
1217 {
1218 tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 update_stmt (parcopy_stmt);
1228 }
1229 }
1230
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block = DECL_INITIAL (child_fn);
1233 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 varpool_node::finalize_decl (t);
1240 DECL_SAVED_TREE (child_fn) = NULL;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn, NULL);
1243 TREE_USED (block) = 1;
1244
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 DECL_CONTEXT (t) = child_fn;
1248
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi = gsi_last_nondebug_bb (entry_bb);
1252 stmt = gsi_stmt (gsi);
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255 e = split_block (entry_bb, stmt);
1256 gsi_remove (&gsi, true);
1257 entry_bb = e->dest;
1258 edge e2 = NULL;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261 else
1262 {
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 gcc_assert (e2->dest == region->exit);
1265 remove_edge (BRANCH_EDGE (entry_bb));
1266 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 gsi = gsi_last_nondebug_bb (region->exit);
1268 gcc_assert (!gsi_end_p (gsi)
1269 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 gsi_remove (&gsi, true);
1271 }
1272
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1274 if (exit_bb)
1275 {
1276 gsi = gsi_last_nondebug_bb (exit_bb);
1277 gcc_assert (!gsi_end_p (gsi)
1278 && (gimple_code (gsi_stmt (gsi))
1279 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 stmt = gimple_build_return (NULL);
1281 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 gsi_remove (&gsi, true);
1283 }
1284
1285 /* Move the parallel region into CHILD_CFUN. */
1286
1287 if (gimple_in_ssa_p (cfun))
1288 {
1289 init_tree_ssa (child_cfun);
1290 init_ssa_operands (child_cfun);
1291 child_cfun->gimple_df->in_ssa_p = true;
1292 block = NULL_TREE;
1293 }
1294 else
1295 block = gimple_block (entry_stmt);
1296
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun))
1300 (*debug_hooks->early_global_decl) (cfun->decl);
1301
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303 if (exit_bb)
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305 if (e2)
1306 {
1307 basic_block dest_bb = e2->dest;
1308 if (!exit_bb)
1309 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 remove_edge (e2);
1311 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1312 }
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1317
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num = vec_safe_length (child_cfun->local_decls);
1320 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1321 {
1322 t = (*child_cfun->local_decls)[srcidx];
1323 if (DECL_CONTEXT (t) == cfun->decl)
1324 continue;
1325 if (srcidx != dstidx)
1326 (*child_cfun->local_decls)[dstidx] = t;
1327 dstidx++;
1328 }
1329 if (dstidx != num)
1330 vec_safe_truncate (child_cfun->local_decls, dstidx);
1331
1332 /* Inform the callgraph about the new function. */
1333 child_cfun->curr_properties = cfun->curr_properties;
1334 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336 cgraph_node *node = cgraph_node::get_create (child_fn);
1337 node->parallelized_function = 1;
1338 cgraph_node::add_new_function (child_fn, true);
1339
1340 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1342
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun);
1346 if (need_asm)
1347 assign_assembler_name_if_needed (child_fn);
1348
1349 if (optimize)
1350 optimize_omp_library_calls (entry_stmt);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1353
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions)
1359 {
1360 basic_block bb;
1361 bool changed = false;
1362
1363 FOR_EACH_BB_FN (bb, cfun)
1364 changed |= gimple_purge_dead_eh_edges (bb);
1365 if (changed)
1366 cleanup_tree_cfg ();
1367 }
1368 if (gimple_in_ssa_p (cfun))
1369 update_ssa (TODO_update_ssa);
1370 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 verify_loop_structure ();
1372 pop_cfun ();
1373
1374 if (dump_file && !gimple_in_ssa_p (cfun))
1375 {
1376 omp_any_child_fn_dumped = true;
1377 dump_function_header (dump_file, child_fn, dump_flags);
1378 dump_function_to_file (child_fn, dump_file, dump_flags);
1379 }
1380 }
1381
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383 expand_parallel_call (region, new_bb,
1384 as_a <gomp_parallel *> (entry_stmt), ws_args);
1385 else
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387 if (gimple_in_ssa_p (cfun))
1388 update_ssa (TODO_update_ssa_only_virtuals);
1389 }
1390
1391 /* Information about members of an OpenACC collapsed loop nest. */
1392
1393 struct oacc_collapse
1394 {
1395 tree base; /* Base value. */
1396 tree iters; /* Number of steps. */
1397 tree step; /* Step size. */
1398 tree tile; /* Tile increment (if tiled). */
1399 tree outer; /* Tile iterator var. */
1400 };
1401
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1405
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 gimple_stmt_iterator *gsi,
1409 oacc_collapse *counts, tree bound_type,
1410 location_t loc)
1411 {
1412 tree tiling = fd->tiling;
1413 tree total = build_int_cst (bound_type, 1);
1414 int ix;
1415
1416 gcc_assert (integer_onep (fd->loop.step));
1417 gcc_assert (integer_zerop (fd->loop.n1));
1418
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix = fd->collapse; ix--;)
1423 {
1424 const omp_for_data_loop *loop = &fd->loops[ix];
1425
1426 tree iter_type = TREE_TYPE (loop->v);
1427 tree diff_type = iter_type;
1428 tree plus_type = iter_type;
1429
1430 gcc_assert (loop->cond_code == fd->loop.cond_code);
1431
1432 if (POINTER_TYPE_P (iter_type))
1433 plus_type = sizetype;
1434 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 diff_type = signed_type_for (diff_type);
1436
1437 if (tiling)
1438 {
1439 tree num = build_int_cst (integer_type_node, fd->collapse);
1440 tree loop_no = build_int_cst (integer_type_node, ix);
1441 tree tile = TREE_VALUE (tiling);
1442 gcall *call
1443 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1444 /* gwv-outer=*/integer_zero_node,
1445 /* gwv-inner=*/integer_zero_node);
1446
1447 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1448 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1449 gimple_call_set_lhs (call, counts[ix].tile);
1450 gimple_set_location (call, loc);
1451 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1452
1453 tiling = TREE_CHAIN (tiling);
1454 }
1455 else
1456 {
1457 counts[ix].tile = NULL;
1458 counts[ix].outer = loop->v;
1459 }
1460
1461 tree b = loop->n1;
1462 tree e = loop->n2;
1463 tree s = loop->step;
1464 bool up = loop->cond_code == LT_EXPR;
1465 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1466 bool negating;
1467 tree expr;
1468
1469 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1470 true, GSI_SAME_STMT);
1471 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1472 true, GSI_SAME_STMT);
1473
1474 /* Convert the step, avoiding possible unsigned->signed overflow. */
1475 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1476 if (negating)
1477 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1478 s = fold_convert (diff_type, s);
1479 if (negating)
1480 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1481 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1482 true, GSI_SAME_STMT);
1483
1484 /* Determine the range, avoiding possible unsigned->signed overflow. */
1485 negating = !up && TYPE_UNSIGNED (iter_type);
1486 expr = fold_build2 (MINUS_EXPR, plus_type,
1487 fold_convert (plus_type, negating ? b : e),
1488 fold_convert (plus_type, negating ? e : b));
1489 expr = fold_convert (diff_type, expr);
1490 if (negating)
1491 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1492 tree range = force_gimple_operand_gsi
1493 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1494
1495 /* Determine number of iterations. */
1496 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1497 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1498 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1499
1500 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1501 true, GSI_SAME_STMT);
1502
1503 counts[ix].base = b;
1504 counts[ix].iters = iters;
1505 counts[ix].step = s;
1506
1507 total = fold_build2 (MULT_EXPR, bound_type, total,
1508 fold_convert (bound_type, iters));
1509 }
1510
1511 return total;
1512 }
1513
1514 /* Emit initializers for collapsed loop members. INNER is true if
1515 this is for the element loop of a TILE. IVAR is the outer
1516 loop iteration variable, from which collapsed loop iteration values
1517 are calculated. COUNTS array has been initialized by
1518 expand_oacc_collapse_inits. */
1519
1520 static void
1521 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1522 gimple_stmt_iterator *gsi,
1523 const oacc_collapse *counts, tree ivar)
1524 {
1525 tree ivar_type = TREE_TYPE (ivar);
1526
1527 /* The most rapidly changing iteration variable is the innermost
1528 one. */
1529 for (int ix = fd->collapse; ix--;)
1530 {
1531 const omp_for_data_loop *loop = &fd->loops[ix];
1532 const oacc_collapse *collapse = &counts[ix];
1533 tree v = inner ? loop->v : collapse->outer;
1534 tree iter_type = TREE_TYPE (v);
1535 tree diff_type = TREE_TYPE (collapse->step);
1536 tree plus_type = iter_type;
1537 enum tree_code plus_code = PLUS_EXPR;
1538 tree expr;
1539
1540 if (POINTER_TYPE_P (iter_type))
1541 {
1542 plus_code = POINTER_PLUS_EXPR;
1543 plus_type = sizetype;
1544 }
1545
1546 expr = ivar;
1547 if (ix)
1548 {
1549 tree mod = fold_convert (ivar_type, collapse->iters);
1550 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1551 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1552 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1553 true, GSI_SAME_STMT);
1554 }
1555
1556 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1557 collapse->step);
1558 expr = fold_build2 (plus_code, iter_type,
1559 inner ? collapse->outer : collapse->base,
1560 fold_convert (plus_type, expr));
1561 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1562 true, GSI_SAME_STMT);
1563 gassign *ass = gimple_build_assign (v, expr);
1564 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1565 }
1566 }
1567
1568 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1569 of the combined collapse > 1 loop constructs, generate code like:
1570 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1571 if (cond3 is <)
1572 adj = STEP3 - 1;
1573 else
1574 adj = STEP3 + 1;
1575 count3 = (adj + N32 - N31) / STEP3;
1576 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1577 if (cond2 is <)
1578 adj = STEP2 - 1;
1579 else
1580 adj = STEP2 + 1;
1581 count2 = (adj + N22 - N21) / STEP2;
1582 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1583 if (cond1 is <)
1584 adj = STEP1 - 1;
1585 else
1586 adj = STEP1 + 1;
1587 count1 = (adj + N12 - N11) / STEP1;
1588 count = count1 * count2 * count3;
1589 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1590 count = 0;
1591 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1592 of the combined loop constructs, just initialize COUNTS array
1593 from the _looptemp_ clauses. */
1594
1595 /* NOTE: It *could* be better to moosh all of the BBs together,
1596 creating one larger BB with all the computation and the unexpected
1597 jump at the end. I.e.
1598
1599 bool zero3, zero2, zero1, zero;
1600
1601 zero3 = N32 c3 N31;
1602 count3 = (N32 - N31) /[cl] STEP3;
1603 zero2 = N22 c2 N21;
1604 count2 = (N22 - N21) /[cl] STEP2;
1605 zero1 = N12 c1 N11;
1606 count1 = (N12 - N11) /[cl] STEP1;
1607 zero = zero3 || zero2 || zero1;
1608 count = count1 * count2 * count3;
1609 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1610
1611 After all, we expect the zero=false, and thus we expect to have to
1612 evaluate all of the comparison expressions, so short-circuiting
1613 oughtn't be a win. Since the condition isn't protecting a
1614 denominator, we're not concerned about divide-by-zero, so we can
1615 fully evaluate count even if a numerator turned out to be wrong.
1616
1617 It seems like putting this all together would create much better
1618 scheduling opportunities, and less pressure on the chip's branch
1619 predictor. */
1620
1621 static void
1622 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1623 basic_block &entry_bb, tree *counts,
1624 basic_block &zero_iter1_bb, int &first_zero_iter1,
1625 basic_block &zero_iter2_bb, int &first_zero_iter2,
1626 basic_block &l2_dom_bb)
1627 {
1628 tree t, type = TREE_TYPE (fd->loop.v);
1629 edge e, ne;
1630 int i;
1631
1632 /* Collapsed loops need work for expansion into SSA form. */
1633 gcc_assert (!gimple_in_ssa_p (cfun));
1634
1635 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1636 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1637 {
1638 gcc_assert (fd->ordered == 0);
1639 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1640 isn't supposed to be handled, as the inner loop doesn't
1641 use it. */
1642 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1643 OMP_CLAUSE__LOOPTEMP_);
1644 gcc_assert (innerc);
1645 for (i = 0; i < fd->collapse; i++)
1646 {
1647 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1648 OMP_CLAUSE__LOOPTEMP_);
1649 gcc_assert (innerc);
1650 if (i)
1651 counts[i] = OMP_CLAUSE_DECL (innerc);
1652 else
1653 counts[0] = NULL_TREE;
1654 }
1655 return;
1656 }
1657
1658 for (i = fd->collapse; i < fd->ordered; i++)
1659 {
1660 tree itype = TREE_TYPE (fd->loops[i].v);
1661 counts[i] = NULL_TREE;
1662 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1663 fold_convert (itype, fd->loops[i].n1),
1664 fold_convert (itype, fd->loops[i].n2));
1665 if (t && integer_zerop (t))
1666 {
1667 for (i = fd->collapse; i < fd->ordered; i++)
1668 counts[i] = build_int_cst (type, 0);
1669 break;
1670 }
1671 }
1672 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1673 {
1674 tree itype = TREE_TYPE (fd->loops[i].v);
1675
1676 if (i >= fd->collapse && counts[i])
1677 continue;
1678 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1679 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1680 fold_convert (itype, fd->loops[i].n1),
1681 fold_convert (itype, fd->loops[i].n2)))
1682 == NULL_TREE || !integer_onep (t)))
1683 {
1684 gcond *cond_stmt;
1685 tree n1, n2;
1686 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1687 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1688 true, GSI_SAME_STMT);
1689 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1690 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1691 true, GSI_SAME_STMT);
1692 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1693 NULL_TREE, NULL_TREE);
1694 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1695 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1696 expand_omp_regimplify_p, NULL, NULL)
1697 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1698 expand_omp_regimplify_p, NULL, NULL))
1699 {
1700 *gsi = gsi_for_stmt (cond_stmt);
1701 gimple_regimplify_operands (cond_stmt, gsi);
1702 }
1703 e = split_block (entry_bb, cond_stmt);
1704 basic_block &zero_iter_bb
1705 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1706 int &first_zero_iter
1707 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1708 if (zero_iter_bb == NULL)
1709 {
1710 gassign *assign_stmt;
1711 first_zero_iter = i;
1712 zero_iter_bb = create_empty_bb (entry_bb);
1713 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1714 *gsi = gsi_after_labels (zero_iter_bb);
1715 if (i < fd->collapse)
1716 assign_stmt = gimple_build_assign (fd->loop.n2,
1717 build_zero_cst (type));
1718 else
1719 {
1720 counts[i] = create_tmp_reg (type, ".count");
1721 assign_stmt
1722 = gimple_build_assign (counts[i], build_zero_cst (type));
1723 }
1724 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1725 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1726 entry_bb);
1727 }
1728 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1729 ne->probability = profile_probability::very_unlikely ();
1730 e->flags = EDGE_TRUE_VALUE;
1731 e->probability = ne->probability.invert ();
1732 if (l2_dom_bb == NULL)
1733 l2_dom_bb = entry_bb;
1734 entry_bb = e->dest;
1735 *gsi = gsi_last_nondebug_bb (entry_bb);
1736 }
1737
1738 if (POINTER_TYPE_P (itype))
1739 itype = signed_type_for (itype);
1740 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1741 ? -1 : 1));
1742 t = fold_build2 (PLUS_EXPR, itype,
1743 fold_convert (itype, fd->loops[i].step), t);
1744 t = fold_build2 (PLUS_EXPR, itype, t,
1745 fold_convert (itype, fd->loops[i].n2));
1746 t = fold_build2 (MINUS_EXPR, itype, t,
1747 fold_convert (itype, fd->loops[i].n1));
1748 /* ?? We could probably use CEIL_DIV_EXPR instead of
1749 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1750 generate the same code in the end because generically we
1751 don't know that the values involved must be negative for
1752 GT?? */
1753 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1754 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1755 fold_build1 (NEGATE_EXPR, itype, t),
1756 fold_build1 (NEGATE_EXPR, itype,
1757 fold_convert (itype,
1758 fd->loops[i].step)));
1759 else
1760 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1761 fold_convert (itype, fd->loops[i].step));
1762 t = fold_convert (type, t);
1763 if (TREE_CODE (t) == INTEGER_CST)
1764 counts[i] = t;
1765 else
1766 {
1767 if (i < fd->collapse || i != first_zero_iter2)
1768 counts[i] = create_tmp_reg (type, ".count");
1769 expand_omp_build_assign (gsi, counts[i], t);
1770 }
1771 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1772 {
1773 if (i == 0)
1774 t = counts[0];
1775 else
1776 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1777 expand_omp_build_assign (gsi, fd->loop.n2, t);
1778 }
1779 }
1780 }
1781
1782 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1783 T = V;
1784 V3 = N31 + (T % count3) * STEP3;
1785 T = T / count3;
1786 V2 = N21 + (T % count2) * STEP2;
1787 T = T / count2;
1788 V1 = N11 + T * STEP1;
1789 if this loop doesn't have an inner loop construct combined with it.
1790 If it does have an inner loop construct combined with it and the
1791 iteration count isn't known constant, store values from counts array
1792 into its _looptemp_ temporaries instead. */
1793
1794 static void
1795 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 tree *counts, gimple *inner_stmt, tree startvar)
1797 {
1798 int i;
1799 if (gimple_omp_for_combined_p (fd->for_stmt))
1800 {
1801 /* If fd->loop.n2 is constant, then no propagation of the counts
1802 is needed, they are constant. */
1803 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1804 return;
1805
1806 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1807 ? gimple_omp_taskreg_clauses (inner_stmt)
1808 : gimple_omp_for_clauses (inner_stmt);
1809 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1810 isn't supposed to be handled, as the inner loop doesn't
1811 use it. */
1812 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1813 gcc_assert (innerc);
1814 for (i = 0; i < fd->collapse; i++)
1815 {
1816 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1817 OMP_CLAUSE__LOOPTEMP_);
1818 gcc_assert (innerc);
1819 if (i)
1820 {
1821 tree tem = OMP_CLAUSE_DECL (innerc);
1822 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1823 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1824 false, GSI_CONTINUE_LINKING);
1825 gassign *stmt = gimple_build_assign (tem, t);
1826 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1827 }
1828 }
1829 return;
1830 }
1831
1832 tree type = TREE_TYPE (fd->loop.v);
1833 tree tem = create_tmp_reg (type, ".tem");
1834 gassign *stmt = gimple_build_assign (tem, startvar);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1836
1837 for (i = fd->collapse - 1; i >= 0; i--)
1838 {
1839 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1840 itype = vtype;
1841 if (POINTER_TYPE_P (vtype))
1842 itype = signed_type_for (vtype);
1843 if (i != 0)
1844 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1845 else
1846 t = tem;
1847 t = fold_convert (itype, t);
1848 t = fold_build2 (MULT_EXPR, itype, t,
1849 fold_convert (itype, fd->loops[i].step));
1850 if (POINTER_TYPE_P (vtype))
1851 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1852 else
1853 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1854 t = force_gimple_operand_gsi (gsi, t,
1855 DECL_P (fd->loops[i].v)
1856 && TREE_ADDRESSABLE (fd->loops[i].v),
1857 NULL_TREE, false,
1858 GSI_CONTINUE_LINKING);
1859 stmt = gimple_build_assign (fd->loops[i].v, t);
1860 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1861 if (i != 0)
1862 {
1863 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1864 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1865 false, GSI_CONTINUE_LINKING);
1866 stmt = gimple_build_assign (tem, t);
1867 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1868 }
1869 }
1870 }
1871
1872 /* Helper function for expand_omp_for_*. Generate code like:
1873 L10:
1874 V3 += STEP3;
1875 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1876 L11:
1877 V3 = N31;
1878 V2 += STEP2;
1879 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1880 L12:
1881 V2 = N21;
1882 V1 += STEP1;
1883 goto BODY_BB; */
1884
1885 static basic_block
1886 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1887 basic_block body_bb)
1888 {
1889 basic_block last_bb, bb, collapse_bb = NULL;
1890 int i;
1891 gimple_stmt_iterator gsi;
1892 edge e;
1893 tree t;
1894 gimple *stmt;
1895
1896 last_bb = cont_bb;
1897 for (i = fd->collapse - 1; i >= 0; i--)
1898 {
1899 tree vtype = TREE_TYPE (fd->loops[i].v);
1900
1901 bb = create_empty_bb (last_bb);
1902 add_bb_to_loop (bb, last_bb->loop_father);
1903 gsi = gsi_start_bb (bb);
1904
1905 if (i < fd->collapse - 1)
1906 {
1907 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1908 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1909
1910 t = fd->loops[i + 1].n1;
1911 t = force_gimple_operand_gsi (&gsi, t,
1912 DECL_P (fd->loops[i + 1].v)
1913 && TREE_ADDRESSABLE (fd->loops[i
1914 + 1].v),
1915 NULL_TREE, false,
1916 GSI_CONTINUE_LINKING);
1917 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1918 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1919 }
1920 else
1921 collapse_bb = bb;
1922
1923 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1924
1925 if (POINTER_TYPE_P (vtype))
1926 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1927 else
1928 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1929 t = force_gimple_operand_gsi (&gsi, t,
1930 DECL_P (fd->loops[i].v)
1931 && TREE_ADDRESSABLE (fd->loops[i].v),
1932 NULL_TREE, false, GSI_CONTINUE_LINKING);
1933 stmt = gimple_build_assign (fd->loops[i].v, t);
1934 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1935
1936 if (i > 0)
1937 {
1938 t = fd->loops[i].n2;
1939 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1940 false, GSI_CONTINUE_LINKING);
1941 tree v = fd->loops[i].v;
1942 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1943 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1944 false, GSI_CONTINUE_LINKING);
1945 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1946 stmt = gimple_build_cond_empty (t);
1947 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1948 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1949 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1950 }
1951 else
1952 make_edge (bb, body_bb, EDGE_FALLTHRU);
1953 last_bb = bb;
1954 }
1955
1956 return collapse_bb;
1957 }
1958
1959 /* Expand #pragma omp ordered depend(source). */
1960
1961 static void
1962 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1963 tree *counts, location_t loc)
1964 {
1965 enum built_in_function source_ix
1966 = fd->iter_type == long_integer_type_node
1967 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1968 gimple *g
1969 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1970 build_fold_addr_expr (counts[fd->ordered]));
1971 gimple_set_location (g, loc);
1972 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1973 }
1974
1975 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1976
1977 static void
1978 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1979 tree *counts, tree c, location_t loc)
1980 {
1981 auto_vec<tree, 10> args;
1982 enum built_in_function sink_ix
1983 = fd->iter_type == long_integer_type_node
1984 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1985 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1986 int i;
1987 gimple_stmt_iterator gsi2 = *gsi;
1988 bool warned_step = false;
1989
1990 for (i = 0; i < fd->ordered; i++)
1991 {
1992 tree step = NULL_TREE;
1993 off = TREE_PURPOSE (deps);
1994 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1995 {
1996 step = TREE_OPERAND (off, 1);
1997 off = TREE_OPERAND (off, 0);
1998 }
1999 if (!integer_zerop (off))
2000 {
2001 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2002 || fd->loops[i].cond_code == GT_EXPR);
2003 bool forward = fd->loops[i].cond_code == LT_EXPR;
2004 if (step)
2005 {
2006 /* Non-simple Fortran DO loops. If step is variable,
2007 we don't know at compile even the direction, so can't
2008 warn. */
2009 if (TREE_CODE (step) != INTEGER_CST)
2010 break;
2011 forward = tree_int_cst_sgn (step) != -1;
2012 }
2013 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2014 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2015 "lexically later iteration");
2016 break;
2017 }
2018 deps = TREE_CHAIN (deps);
2019 }
2020 /* If all offsets corresponding to the collapsed loops are zero,
2021 this depend clause can be ignored. FIXME: but there is still a
2022 flush needed. We need to emit one __sync_synchronize () for it
2023 though (perhaps conditionally)? Solve this together with the
2024 conservative dependence folding optimization.
2025 if (i >= fd->collapse)
2026 return; */
2027
2028 deps = OMP_CLAUSE_DECL (c);
2029 gsi_prev (&gsi2);
2030 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2031 edge e2 = split_block_after_labels (e1->dest);
2032
2033 gsi2 = gsi_after_labels (e1->dest);
2034 *gsi = gsi_last_bb (e1->src);
2035 for (i = 0; i < fd->ordered; i++)
2036 {
2037 tree itype = TREE_TYPE (fd->loops[i].v);
2038 tree step = NULL_TREE;
2039 tree orig_off = NULL_TREE;
2040 if (POINTER_TYPE_P (itype))
2041 itype = sizetype;
2042 if (i)
2043 deps = TREE_CHAIN (deps);
2044 off = TREE_PURPOSE (deps);
2045 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2046 {
2047 step = TREE_OPERAND (off, 1);
2048 off = TREE_OPERAND (off, 0);
2049 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2050 && integer_onep (fd->loops[i].step)
2051 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2052 }
2053 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2054 if (step)
2055 {
2056 off = fold_convert_loc (loc, itype, off);
2057 orig_off = off;
2058 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2059 }
2060
2061 if (integer_zerop (off))
2062 t = boolean_true_node;
2063 else
2064 {
2065 tree a;
2066 tree co = fold_convert_loc (loc, itype, off);
2067 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2068 {
2069 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2070 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2071 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2072 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2073 co);
2074 }
2075 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2076 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2077 fd->loops[i].v, co);
2078 else
2079 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2080 fd->loops[i].v, co);
2081 if (step)
2082 {
2083 tree t1, t2;
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2085 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2086 fd->loops[i].n1);
2087 else
2088 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2089 fd->loops[i].n2);
2090 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2091 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2093 else
2094 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2095 fd->loops[i].n1);
2096 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2097 step, build_int_cst (TREE_TYPE (step), 0));
2098 if (TREE_CODE (step) != INTEGER_CST)
2099 {
2100 t1 = unshare_expr (t1);
2101 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2102 false, GSI_CONTINUE_LINKING);
2103 t2 = unshare_expr (t2);
2104 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2105 false, GSI_CONTINUE_LINKING);
2106 }
2107 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2108 t, t2, t1);
2109 }
2110 else if (fd->loops[i].cond_code == LT_EXPR)
2111 {
2112 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2113 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2114 fd->loops[i].n1);
2115 else
2116 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2117 fd->loops[i].n2);
2118 }
2119 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2120 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2121 fd->loops[i].n2);
2122 else
2123 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2124 fd->loops[i].n1);
2125 }
2126 if (cond)
2127 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2128 else
2129 cond = t;
2130
2131 off = fold_convert_loc (loc, itype, off);
2132
2133 if (step
2134 || (fd->loops[i].cond_code == LT_EXPR
2135 ? !integer_onep (fd->loops[i].step)
2136 : !integer_minus_onep (fd->loops[i].step)))
2137 {
2138 if (step == NULL_TREE
2139 && TYPE_UNSIGNED (itype)
2140 && fd->loops[i].cond_code == GT_EXPR)
2141 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2142 fold_build1_loc (loc, NEGATE_EXPR, itype,
2143 s));
2144 else
2145 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2146 orig_off ? orig_off : off, s);
2147 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2148 build_int_cst (itype, 0));
2149 if (integer_zerop (t) && !warned_step)
2150 {
2151 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2152 "in the iteration space");
2153 warned_step = true;
2154 }
2155 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2156 cond, t);
2157 }
2158
2159 if (i <= fd->collapse - 1 && fd->collapse > 1)
2160 t = fd->loop.v;
2161 else if (counts[i])
2162 t = counts[i];
2163 else
2164 {
2165 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2166 fd->loops[i].v, fd->loops[i].n1);
2167 t = fold_convert_loc (loc, fd->iter_type, t);
2168 }
2169 if (step)
2170 /* We have divided off by step already earlier. */;
2171 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2172 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2173 fold_build1_loc (loc, NEGATE_EXPR, itype,
2174 s));
2175 else
2176 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2177 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2178 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2179 off = fold_convert_loc (loc, fd->iter_type, off);
2180 if (i <= fd->collapse - 1 && fd->collapse > 1)
2181 {
2182 if (i)
2183 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2184 off);
2185 if (i < fd->collapse - 1)
2186 {
2187 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2188 counts[i]);
2189 continue;
2190 }
2191 }
2192 off = unshare_expr (off);
2193 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2194 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2195 true, GSI_SAME_STMT);
2196 args.safe_push (t);
2197 }
2198 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2199 gimple_set_location (g, loc);
2200 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2201
2202 cond = unshare_expr (cond);
2203 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2204 GSI_CONTINUE_LINKING);
2205 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2206 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2207 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2208 e1->probability = e3->probability.invert ();
2209 e1->flags = EDGE_TRUE_VALUE;
2210 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2211
2212 *gsi = gsi_after_labels (e2->dest);
2213 }
2214
2215 /* Expand all #pragma omp ordered depend(source) and
2216 #pragma omp ordered depend(sink:...) constructs in the current
2217 #pragma omp for ordered(n) region. */
2218
2219 static void
2220 expand_omp_ordered_source_sink (struct omp_region *region,
2221 struct omp_for_data *fd, tree *counts,
2222 basic_block cont_bb)
2223 {
2224 struct omp_region *inner;
2225 int i;
2226 for (i = fd->collapse - 1; i < fd->ordered; i++)
2227 if (i == fd->collapse - 1 && fd->collapse > 1)
2228 counts[i] = NULL_TREE;
2229 else if (i >= fd->collapse && !cont_bb)
2230 counts[i] = build_zero_cst (fd->iter_type);
2231 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2232 && integer_onep (fd->loops[i].step))
2233 counts[i] = NULL_TREE;
2234 else
2235 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2236 tree atype
2237 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2238 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2239 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2240
2241 for (inner = region->inner; inner; inner = inner->next)
2242 if (inner->type == GIMPLE_OMP_ORDERED)
2243 {
2244 gomp_ordered *ord_stmt = inner->ord_stmt;
2245 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2246 location_t loc = gimple_location (ord_stmt);
2247 tree c;
2248 for (c = gimple_omp_ordered_clauses (ord_stmt);
2249 c; c = OMP_CLAUSE_CHAIN (c))
2250 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2251 break;
2252 if (c)
2253 expand_omp_ordered_source (&gsi, fd, counts, loc);
2254 for (c = gimple_omp_ordered_clauses (ord_stmt);
2255 c; c = OMP_CLAUSE_CHAIN (c))
2256 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2257 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2258 gsi_remove (&gsi, true);
2259 }
2260 }
2261
2262 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2263 collapsed. */
2264
2265 static basic_block
2266 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2267 basic_block cont_bb, basic_block body_bb,
2268 bool ordered_lastprivate)
2269 {
2270 if (fd->ordered == fd->collapse)
2271 return cont_bb;
2272
2273 if (!cont_bb)
2274 {
2275 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2276 for (int i = fd->collapse; i < fd->ordered; i++)
2277 {
2278 tree type = TREE_TYPE (fd->loops[i].v);
2279 tree n1 = fold_convert (type, fd->loops[i].n1);
2280 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2281 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2282 size_int (i - fd->collapse + 1),
2283 NULL_TREE, NULL_TREE);
2284 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2285 }
2286 return NULL;
2287 }
2288
2289 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2290 {
2291 tree t, type = TREE_TYPE (fd->loops[i].v);
2292 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293 expand_omp_build_assign (&gsi, fd->loops[i].v,
2294 fold_convert (type, fd->loops[i].n1));
2295 if (counts[i])
2296 expand_omp_build_assign (&gsi, counts[i],
2297 build_zero_cst (fd->iter_type));
2298 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299 size_int (i - fd->collapse + 1),
2300 NULL_TREE, NULL_TREE);
2301 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302 if (!gsi_end_p (gsi))
2303 gsi_prev (&gsi);
2304 else
2305 gsi = gsi_last_bb (body_bb);
2306 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2307 basic_block new_body = e1->dest;
2308 if (body_bb == cont_bb)
2309 cont_bb = new_body;
2310 edge e2 = NULL;
2311 basic_block new_header;
2312 if (EDGE_COUNT (cont_bb->preds) > 0)
2313 {
2314 gsi = gsi_last_bb (cont_bb);
2315 if (POINTER_TYPE_P (type))
2316 t = fold_build_pointer_plus (fd->loops[i].v,
2317 fold_convert (sizetype,
2318 fd->loops[i].step));
2319 else
2320 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2321 fold_convert (type, fd->loops[i].step));
2322 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2323 if (counts[i])
2324 {
2325 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2326 build_int_cst (fd->iter_type, 1));
2327 expand_omp_build_assign (&gsi, counts[i], t);
2328 t = counts[i];
2329 }
2330 else
2331 {
2332 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2333 fd->loops[i].v, fd->loops[i].n1);
2334 t = fold_convert (fd->iter_type, t);
2335 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2336 true, GSI_SAME_STMT);
2337 }
2338 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2339 size_int (i - fd->collapse + 1),
2340 NULL_TREE, NULL_TREE);
2341 expand_omp_build_assign (&gsi, aref, t);
2342 gsi_prev (&gsi);
2343 e2 = split_block (cont_bb, gsi_stmt (gsi));
2344 new_header = e2->dest;
2345 }
2346 else
2347 new_header = cont_bb;
2348 gsi = gsi_after_labels (new_header);
2349 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2350 true, GSI_SAME_STMT);
2351 tree n2
2352 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2353 true, NULL_TREE, true, GSI_SAME_STMT);
2354 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2355 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2356 edge e3 = split_block (new_header, gsi_stmt (gsi));
2357 cont_bb = e3->dest;
2358 remove_edge (e1);
2359 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2360 e3->flags = EDGE_FALSE_VALUE;
2361 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2362 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2363 e1->probability = e3->probability.invert ();
2364
2365 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2366 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2367
2368 if (e2)
2369 {
2370 struct loop *loop = alloc_loop ();
2371 loop->header = new_header;
2372 loop->latch = e2->src;
2373 add_loop (loop, body_bb->loop_father);
2374 }
2375 }
2376
2377 /* If there are any lastprivate clauses and it is possible some loops
2378 might have zero iterations, ensure all the decls are initialized,
2379 otherwise we could crash evaluating C++ class iterators with lastprivate
2380 clauses. */
2381 bool need_inits = false;
2382 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2383 if (need_inits)
2384 {
2385 tree type = TREE_TYPE (fd->loops[i].v);
2386 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2387 expand_omp_build_assign (&gsi, fd->loops[i].v,
2388 fold_convert (type, fd->loops[i].n1));
2389 }
2390 else
2391 {
2392 tree type = TREE_TYPE (fd->loops[i].v);
2393 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2394 boolean_type_node,
2395 fold_convert (type, fd->loops[i].n1),
2396 fold_convert (type, fd->loops[i].n2));
2397 if (!integer_onep (this_cond))
2398 need_inits = true;
2399 }
2400
2401 return cont_bb;
2402 }
2403
2404 /* A subroutine of expand_omp_for. Generate code for a parallel
2405 loop with any schedule. Given parameters:
2406
2407 for (V = N1; V cond N2; V += STEP) BODY;
2408
2409 where COND is "<" or ">", we generate pseudocode
2410
2411 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2412 if (more) goto L0; else goto L3;
2413 L0:
2414 V = istart0;
2415 iend = iend0;
2416 L1:
2417 BODY;
2418 V += STEP;
2419 if (V cond iend) goto L1; else goto L2;
2420 L2:
2421 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2422 L3:
2423
2424 If this is a combined omp parallel loop, instead of the call to
2425 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2426 If this is gimple_omp_for_combined_p loop, then instead of assigning
2427 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2428 inner GIMPLE_OMP_FOR and V += STEP; and
2429 if (V cond iend) goto L1; else goto L2; are removed.
2430
2431 For collapsed loops, given parameters:
2432 collapse(3)
2433 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2434 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2435 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2436 BODY;
2437
2438 we generate pseudocode
2439
2440 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2441 if (cond3 is <)
2442 adj = STEP3 - 1;
2443 else
2444 adj = STEP3 + 1;
2445 count3 = (adj + N32 - N31) / STEP3;
2446 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2447 if (cond2 is <)
2448 adj = STEP2 - 1;
2449 else
2450 adj = STEP2 + 1;
2451 count2 = (adj + N22 - N21) / STEP2;
2452 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2453 if (cond1 is <)
2454 adj = STEP1 - 1;
2455 else
2456 adj = STEP1 + 1;
2457 count1 = (adj + N12 - N11) / STEP1;
2458 count = count1 * count2 * count3;
2459 goto Z1;
2460 Z0:
2461 count = 0;
2462 Z1:
2463 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2464 if (more) goto L0; else goto L3;
2465 L0:
2466 V = istart0;
2467 T = V;
2468 V3 = N31 + (T % count3) * STEP3;
2469 T = T / count3;
2470 V2 = N21 + (T % count2) * STEP2;
2471 T = T / count2;
2472 V1 = N11 + T * STEP1;
2473 iend = iend0;
2474 L1:
2475 BODY;
2476 V += 1;
2477 if (V < iend) goto L10; else goto L2;
2478 L10:
2479 V3 += STEP3;
2480 if (V3 cond3 N32) goto L1; else goto L11;
2481 L11:
2482 V3 = N31;
2483 V2 += STEP2;
2484 if (V2 cond2 N22) goto L1; else goto L12;
2485 L12:
2486 V2 = N21;
2487 V1 += STEP1;
2488 goto L1;
2489 L2:
2490 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2491 L3:
2492
2493 */
2494
2495 static void
2496 expand_omp_for_generic (struct omp_region *region,
2497 struct omp_for_data *fd,
2498 enum built_in_function start_fn,
2499 enum built_in_function next_fn,
2500 gimple *inner_stmt)
2501 {
2502 tree type, istart0, iend0, iend;
2503 tree t, vmain, vback, bias = NULL_TREE;
2504 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2505 basic_block l2_bb = NULL, l3_bb = NULL;
2506 gimple_stmt_iterator gsi;
2507 gassign *assign_stmt;
2508 bool in_combined_parallel = is_combined_parallel (region);
2509 bool broken_loop = region->cont == NULL;
2510 edge e, ne;
2511 tree *counts = NULL;
2512 int i;
2513 bool ordered_lastprivate = false;
2514
2515 gcc_assert (!broken_loop || !in_combined_parallel);
2516 gcc_assert (fd->iter_type == long_integer_type_node
2517 || !in_combined_parallel);
2518
2519 entry_bb = region->entry;
2520 cont_bb = region->cont;
2521 collapse_bb = NULL;
2522 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2523 gcc_assert (broken_loop
2524 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2525 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2526 l1_bb = single_succ (l0_bb);
2527 if (!broken_loop)
2528 {
2529 l2_bb = create_empty_bb (cont_bb);
2530 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2531 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2532 == l1_bb));
2533 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2534 }
2535 else
2536 l2_bb = NULL;
2537 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2538 exit_bb = region->exit;
2539
2540 gsi = gsi_last_nondebug_bb (entry_bb);
2541
2542 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2543 if (fd->ordered
2544 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2545 OMP_CLAUSE_LASTPRIVATE))
2546 ordered_lastprivate = false;
2547 if (fd->collapse > 1 || fd->ordered)
2548 {
2549 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2550 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2551
2552 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2553 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2554 zero_iter1_bb, first_zero_iter1,
2555 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2556
2557 if (zero_iter1_bb)
2558 {
2559 /* Some counts[i] vars might be uninitialized if
2560 some loop has zero iterations. But the body shouldn't
2561 be executed in that case, so just avoid uninit warnings. */
2562 for (i = first_zero_iter1;
2563 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2564 if (SSA_VAR_P (counts[i]))
2565 TREE_NO_WARNING (counts[i]) = 1;
2566 gsi_prev (&gsi);
2567 e = split_block (entry_bb, gsi_stmt (gsi));
2568 entry_bb = e->dest;
2569 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2570 gsi = gsi_last_nondebug_bb (entry_bb);
2571 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2572 get_immediate_dominator (CDI_DOMINATORS,
2573 zero_iter1_bb));
2574 }
2575 if (zero_iter2_bb)
2576 {
2577 /* Some counts[i] vars might be uninitialized if
2578 some loop has zero iterations. But the body shouldn't
2579 be executed in that case, so just avoid uninit warnings. */
2580 for (i = first_zero_iter2; i < fd->ordered; i++)
2581 if (SSA_VAR_P (counts[i]))
2582 TREE_NO_WARNING (counts[i]) = 1;
2583 if (zero_iter1_bb)
2584 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2585 else
2586 {
2587 gsi_prev (&gsi);
2588 e = split_block (entry_bb, gsi_stmt (gsi));
2589 entry_bb = e->dest;
2590 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2591 gsi = gsi_last_nondebug_bb (entry_bb);
2592 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2593 get_immediate_dominator
2594 (CDI_DOMINATORS, zero_iter2_bb));
2595 }
2596 }
2597 if (fd->collapse == 1)
2598 {
2599 counts[0] = fd->loop.n2;
2600 fd->loop = fd->loops[0];
2601 }
2602 }
2603
2604 type = TREE_TYPE (fd->loop.v);
2605 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2606 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2607 TREE_ADDRESSABLE (istart0) = 1;
2608 TREE_ADDRESSABLE (iend0) = 1;
2609
2610 /* See if we need to bias by LLONG_MIN. */
2611 if (fd->iter_type == long_long_unsigned_type_node
2612 && TREE_CODE (type) == INTEGER_TYPE
2613 && !TYPE_UNSIGNED (type)
2614 && fd->ordered == 0)
2615 {
2616 tree n1, n2;
2617
2618 if (fd->loop.cond_code == LT_EXPR)
2619 {
2620 n1 = fd->loop.n1;
2621 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2622 }
2623 else
2624 {
2625 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2626 n2 = fd->loop.n1;
2627 }
2628 if (TREE_CODE (n1) != INTEGER_CST
2629 || TREE_CODE (n2) != INTEGER_CST
2630 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2631 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2632 }
2633
2634 gimple_stmt_iterator gsif = gsi;
2635 gsi_prev (&gsif);
2636
2637 tree arr = NULL_TREE;
2638 if (in_combined_parallel)
2639 {
2640 gcc_assert (fd->ordered == 0);
2641 /* In a combined parallel loop, emit a call to
2642 GOMP_loop_foo_next. */
2643 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2644 build_fold_addr_expr (istart0),
2645 build_fold_addr_expr (iend0));
2646 }
2647 else
2648 {
2649 tree t0, t1, t2, t3, t4;
2650 /* If this is not a combined parallel loop, emit a call to
2651 GOMP_loop_foo_start in ENTRY_BB. */
2652 t4 = build_fold_addr_expr (iend0);
2653 t3 = build_fold_addr_expr (istart0);
2654 if (fd->ordered)
2655 {
2656 t0 = build_int_cst (unsigned_type_node,
2657 fd->ordered - fd->collapse + 1);
2658 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2659 fd->ordered
2660 - fd->collapse + 1),
2661 ".omp_counts");
2662 DECL_NAMELESS (arr) = 1;
2663 TREE_ADDRESSABLE (arr) = 1;
2664 TREE_STATIC (arr) = 1;
2665 vec<constructor_elt, va_gc> *v;
2666 vec_alloc (v, fd->ordered - fd->collapse + 1);
2667 int idx;
2668
2669 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2670 {
2671 tree c;
2672 if (idx == 0 && fd->collapse > 1)
2673 c = fd->loop.n2;
2674 else
2675 c = counts[idx + fd->collapse - 1];
2676 tree purpose = size_int (idx);
2677 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2678 if (TREE_CODE (c) != INTEGER_CST)
2679 TREE_STATIC (arr) = 0;
2680 }
2681
2682 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2683 if (!TREE_STATIC (arr))
2684 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2685 void_type_node, arr),
2686 true, NULL_TREE, true, GSI_SAME_STMT);
2687 t1 = build_fold_addr_expr (arr);
2688 t2 = NULL_TREE;
2689 }
2690 else
2691 {
2692 t2 = fold_convert (fd->iter_type, fd->loop.step);
2693 t1 = fd->loop.n2;
2694 t0 = fd->loop.n1;
2695 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2696 {
2697 tree innerc
2698 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2699 OMP_CLAUSE__LOOPTEMP_);
2700 gcc_assert (innerc);
2701 t0 = OMP_CLAUSE_DECL (innerc);
2702 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2703 OMP_CLAUSE__LOOPTEMP_);
2704 gcc_assert (innerc);
2705 t1 = OMP_CLAUSE_DECL (innerc);
2706 }
2707 if (POINTER_TYPE_P (TREE_TYPE (t0))
2708 && TYPE_PRECISION (TREE_TYPE (t0))
2709 != TYPE_PRECISION (fd->iter_type))
2710 {
2711 /* Avoid casting pointers to integer of a different size. */
2712 tree itype = signed_type_for (type);
2713 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2714 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2715 }
2716 else
2717 {
2718 t1 = fold_convert (fd->iter_type, t1);
2719 t0 = fold_convert (fd->iter_type, t0);
2720 }
2721 if (bias)
2722 {
2723 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2724 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2725 }
2726 }
2727 if (fd->iter_type == long_integer_type_node || fd->ordered)
2728 {
2729 if (fd->chunk_size)
2730 {
2731 t = fold_convert (fd->iter_type, fd->chunk_size);
2732 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2733 if (fd->ordered)
2734 t = build_call_expr (builtin_decl_explicit (start_fn),
2735 5, t0, t1, t, t3, t4);
2736 else
2737 t = build_call_expr (builtin_decl_explicit (start_fn),
2738 6, t0, t1, t2, t, t3, t4);
2739 }
2740 else if (fd->ordered)
2741 t = build_call_expr (builtin_decl_explicit (start_fn),
2742 4, t0, t1, t3, t4);
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 5, t0, t1, t2, t3, t4);
2746 }
2747 else
2748 {
2749 tree t5;
2750 tree c_bool_type;
2751 tree bfn_decl;
2752
2753 /* The GOMP_loop_ull_*start functions have additional boolean
2754 argument, true for < loops and false for > loops.
2755 In Fortran, the C bool type can be different from
2756 boolean_type_node. */
2757 bfn_decl = builtin_decl_explicit (start_fn);
2758 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2759 t5 = build_int_cst (c_bool_type,
2760 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2761 if (fd->chunk_size)
2762 {
2763 tree bfn_decl = builtin_decl_explicit (start_fn);
2764 t = fold_convert (fd->iter_type, fd->chunk_size);
2765 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2766 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2767 }
2768 else
2769 t = build_call_expr (builtin_decl_explicit (start_fn),
2770 6, t5, t0, t1, t2, t3, t4);
2771 }
2772 }
2773 if (TREE_TYPE (t) != boolean_type_node)
2774 t = fold_build2 (NE_EXPR, boolean_type_node,
2775 t, build_int_cst (TREE_TYPE (t), 0));
2776 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2777 true, GSI_SAME_STMT);
2778 if (arr && !TREE_STATIC (arr))
2779 {
2780 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2781 TREE_THIS_VOLATILE (clobber) = 1;
2782 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2783 GSI_SAME_STMT);
2784 }
2785 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2786
2787 /* Remove the GIMPLE_OMP_FOR statement. */
2788 gsi_remove (&gsi, true);
2789
2790 if (gsi_end_p (gsif))
2791 gsif = gsi_after_labels (gsi_bb (gsif));
2792 gsi_next (&gsif);
2793
2794 /* Iteration setup for sequential loop goes in L0_BB. */
2795 tree startvar = fd->loop.v;
2796 tree endvar = NULL_TREE;
2797
2798 if (gimple_omp_for_combined_p (fd->for_stmt))
2799 {
2800 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2801 && gimple_omp_for_kind (inner_stmt)
2802 == GF_OMP_FOR_KIND_SIMD);
2803 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2804 OMP_CLAUSE__LOOPTEMP_);
2805 gcc_assert (innerc);
2806 startvar = OMP_CLAUSE_DECL (innerc);
2807 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2808 OMP_CLAUSE__LOOPTEMP_);
2809 gcc_assert (innerc);
2810 endvar = OMP_CLAUSE_DECL (innerc);
2811 }
2812
2813 gsi = gsi_start_bb (l0_bb);
2814 t = istart0;
2815 if (fd->ordered && fd->collapse == 1)
2816 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2817 fold_convert (fd->iter_type, fd->loop.step));
2818 else if (bias)
2819 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2820 if (fd->ordered && fd->collapse == 1)
2821 {
2822 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2823 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2824 fd->loop.n1, fold_convert (sizetype, t));
2825 else
2826 {
2827 t = fold_convert (TREE_TYPE (startvar), t);
2828 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2829 fd->loop.n1, t);
2830 }
2831 }
2832 else
2833 {
2834 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2835 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2836 t = fold_convert (TREE_TYPE (startvar), t);
2837 }
2838 t = force_gimple_operand_gsi (&gsi, t,
2839 DECL_P (startvar)
2840 && TREE_ADDRESSABLE (startvar),
2841 NULL_TREE, false, GSI_CONTINUE_LINKING);
2842 assign_stmt = gimple_build_assign (startvar, t);
2843 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2844
2845 t = iend0;
2846 if (fd->ordered && fd->collapse == 1)
2847 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2848 fold_convert (fd->iter_type, fd->loop.step));
2849 else if (bias)
2850 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2851 if (fd->ordered && fd->collapse == 1)
2852 {
2853 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2854 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2855 fd->loop.n1, fold_convert (sizetype, t));
2856 else
2857 {
2858 t = fold_convert (TREE_TYPE (startvar), t);
2859 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2860 fd->loop.n1, t);
2861 }
2862 }
2863 else
2864 {
2865 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2866 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2867 t = fold_convert (TREE_TYPE (startvar), t);
2868 }
2869 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2870 false, GSI_CONTINUE_LINKING);
2871 if (endvar)
2872 {
2873 assign_stmt = gimple_build_assign (endvar, iend);
2874 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2875 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2876 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2877 else
2878 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2879 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2880 }
2881 /* Handle linear clause adjustments. */
2882 tree itercnt = NULL_TREE;
2883 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2884 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2885 c; c = OMP_CLAUSE_CHAIN (c))
2886 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2887 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2888 {
2889 tree d = OMP_CLAUSE_DECL (c);
2890 bool is_ref = omp_is_reference (d);
2891 tree t = d, a, dest;
2892 if (is_ref)
2893 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2894 tree type = TREE_TYPE (t);
2895 if (POINTER_TYPE_P (type))
2896 type = sizetype;
2897 dest = unshare_expr (t);
2898 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2899 expand_omp_build_assign (&gsif, v, t);
2900 if (itercnt == NULL_TREE)
2901 {
2902 itercnt = startvar;
2903 tree n1 = fd->loop.n1;
2904 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2905 {
2906 itercnt
2907 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2908 itercnt);
2909 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2910 }
2911 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2912 itercnt, n1);
2913 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2914 itercnt, fd->loop.step);
2915 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2916 NULL_TREE, false,
2917 GSI_CONTINUE_LINKING);
2918 }
2919 a = fold_build2 (MULT_EXPR, type,
2920 fold_convert (type, itercnt),
2921 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2922 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2923 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2924 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2925 false, GSI_CONTINUE_LINKING);
2926 assign_stmt = gimple_build_assign (dest, t);
2927 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2928 }
2929 if (fd->collapse > 1)
2930 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2931
2932 if (fd->ordered)
2933 {
2934 /* Until now, counts array contained number of iterations or
2935 variable containing it for ith loop. From now on, we need
2936 those counts only for collapsed loops, and only for the 2nd
2937 till the last collapsed one. Move those one element earlier,
2938 we'll use counts[fd->collapse - 1] for the first source/sink
2939 iteration counter and so on and counts[fd->ordered]
2940 as the array holding the current counter values for
2941 depend(source). */
2942 if (fd->collapse > 1)
2943 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2944 if (broken_loop)
2945 {
2946 int i;
2947 for (i = fd->collapse; i < fd->ordered; i++)
2948 {
2949 tree type = TREE_TYPE (fd->loops[i].v);
2950 tree this_cond
2951 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2952 fold_convert (type, fd->loops[i].n1),
2953 fold_convert (type, fd->loops[i].n2));
2954 if (!integer_onep (this_cond))
2955 break;
2956 }
2957 if (i < fd->ordered)
2958 {
2959 cont_bb
2960 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2961 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2962 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2963 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2964 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2965 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2966 make_edge (cont_bb, l1_bb, 0);
2967 l2_bb = create_empty_bb (cont_bb);
2968 broken_loop = false;
2969 }
2970 }
2971 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2972 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2973 ordered_lastprivate);
2974 if (counts[fd->collapse - 1])
2975 {
2976 gcc_assert (fd->collapse == 1);
2977 gsi = gsi_last_bb (l0_bb);
2978 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2979 istart0, true);
2980 gsi = gsi_last_bb (cont_bb);
2981 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2982 build_int_cst (fd->iter_type, 1));
2983 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2984 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2985 size_zero_node, NULL_TREE, NULL_TREE);
2986 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2987 t = counts[fd->collapse - 1];
2988 }
2989 else if (fd->collapse > 1)
2990 t = fd->loop.v;
2991 else
2992 {
2993 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2994 fd->loops[0].v, fd->loops[0].n1);
2995 t = fold_convert (fd->iter_type, t);
2996 }
2997 gsi = gsi_last_bb (l0_bb);
2998 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2999 size_zero_node, NULL_TREE, NULL_TREE);
3000 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3001 false, GSI_CONTINUE_LINKING);
3002 expand_omp_build_assign (&gsi, aref, t, true);
3003 }
3004
3005 if (!broken_loop)
3006 {
3007 /* Code to control the increment and predicate for the sequential
3008 loop goes in the CONT_BB. */
3009 gsi = gsi_last_nondebug_bb (cont_bb);
3010 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3011 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3012 vmain = gimple_omp_continue_control_use (cont_stmt);
3013 vback = gimple_omp_continue_control_def (cont_stmt);
3014
3015 if (!gimple_omp_for_combined_p (fd->for_stmt))
3016 {
3017 if (POINTER_TYPE_P (type))
3018 t = fold_build_pointer_plus (vmain, fd->loop.step);
3019 else
3020 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3021 t = force_gimple_operand_gsi (&gsi, t,
3022 DECL_P (vback)
3023 && TREE_ADDRESSABLE (vback),
3024 NULL_TREE, true, GSI_SAME_STMT);
3025 assign_stmt = gimple_build_assign (vback, t);
3026 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3027
3028 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3029 {
3030 if (fd->collapse > 1)
3031 t = fd->loop.v;
3032 else
3033 {
3034 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3035 fd->loops[0].v, fd->loops[0].n1);
3036 t = fold_convert (fd->iter_type, t);
3037 }
3038 tree aref = build4 (ARRAY_REF, fd->iter_type,
3039 counts[fd->ordered], size_zero_node,
3040 NULL_TREE, NULL_TREE);
3041 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3042 true, GSI_SAME_STMT);
3043 expand_omp_build_assign (&gsi, aref, t);
3044 }
3045
3046 t = build2 (fd->loop.cond_code, boolean_type_node,
3047 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3048 iend);
3049 gcond *cond_stmt = gimple_build_cond_empty (t);
3050 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3051 }
3052
3053 /* Remove GIMPLE_OMP_CONTINUE. */
3054 gsi_remove (&gsi, true);
3055
3056 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3057 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3058
3059 /* Emit code to get the next parallel iteration in L2_BB. */
3060 gsi = gsi_start_bb (l2_bb);
3061
3062 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3063 build_fold_addr_expr (istart0),
3064 build_fold_addr_expr (iend0));
3065 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3066 false, GSI_CONTINUE_LINKING);
3067 if (TREE_TYPE (t) != boolean_type_node)
3068 t = fold_build2 (NE_EXPR, boolean_type_node,
3069 t, build_int_cst (TREE_TYPE (t), 0));
3070 gcond *cond_stmt = gimple_build_cond_empty (t);
3071 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3072 }
3073
3074 /* Add the loop cleanup function. */
3075 gsi = gsi_last_nondebug_bb (exit_bb);
3076 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3077 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3078 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3079 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3080 else
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3082 gcall *call_stmt = gimple_build_call (t, 0);
3083 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3084 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3085 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3086 if (fd->ordered)
3087 {
3088 tree arr = counts[fd->ordered];
3089 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3090 TREE_THIS_VOLATILE (clobber) = 1;
3091 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3092 GSI_SAME_STMT);
3093 }
3094 gsi_remove (&gsi, true);
3095
3096 /* Connect the new blocks. */
3097 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3098 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3099
3100 if (!broken_loop)
3101 {
3102 gimple_seq phis;
3103
3104 e = find_edge (cont_bb, l3_bb);
3105 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3106
3107 phis = phi_nodes (l3_bb);
3108 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3109 {
3110 gimple *phi = gsi_stmt (gsi);
3111 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3112 PHI_ARG_DEF_FROM_EDGE (phi, e));
3113 }
3114 remove_edge (e);
3115
3116 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3117 e = find_edge (cont_bb, l1_bb);
3118 if (e == NULL)
3119 {
3120 e = BRANCH_EDGE (cont_bb);
3121 gcc_assert (single_succ (e->dest) == l1_bb);
3122 }
3123 if (gimple_omp_for_combined_p (fd->for_stmt))
3124 {
3125 remove_edge (e);
3126 e = NULL;
3127 }
3128 else if (fd->collapse > 1)
3129 {
3130 remove_edge (e);
3131 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3132 }
3133 else
3134 e->flags = EDGE_TRUE_VALUE;
3135 if (e)
3136 {
3137 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3138 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3139 }
3140 else
3141 {
3142 e = find_edge (cont_bb, l2_bb);
3143 e->flags = EDGE_FALLTHRU;
3144 }
3145 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3146
3147 if (gimple_in_ssa_p (cfun))
3148 {
3149 /* Add phis to the outer loop that connect to the phis in the inner,
3150 original loop, and move the loop entry value of the inner phi to
3151 the loop entry value of the outer phi. */
3152 gphi_iterator psi;
3153 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3154 {
3155 source_location locus;
3156 gphi *nphi;
3157 gphi *exit_phi = psi.phi ();
3158
3159 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3160 continue;
3161
3162 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3163 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3164
3165 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3166 edge latch_to_l1 = find_edge (latch, l1_bb);
3167 gphi *inner_phi
3168 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3169
3170 tree t = gimple_phi_result (exit_phi);
3171 tree new_res = copy_ssa_name (t, NULL);
3172 nphi = create_phi_node (new_res, l0_bb);
3173
3174 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3175 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3176 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3177 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3178 add_phi_arg (nphi, t, entry_to_l0, locus);
3179
3180 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3181 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3182
3183 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3184 }
3185 }
3186
3187 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3188 recompute_dominator (CDI_DOMINATORS, l2_bb));
3189 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3190 recompute_dominator (CDI_DOMINATORS, l3_bb));
3191 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3192 recompute_dominator (CDI_DOMINATORS, l0_bb));
3193 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3194 recompute_dominator (CDI_DOMINATORS, l1_bb));
3195
3196 /* We enter expand_omp_for_generic with a loop. This original loop may
3197 have its own loop struct, or it may be part of an outer loop struct
3198 (which may be the fake loop). */
3199 struct loop *outer_loop = entry_bb->loop_father;
3200 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3201
3202 add_bb_to_loop (l2_bb, outer_loop);
3203
3204 /* We've added a new loop around the original loop. Allocate the
3205 corresponding loop struct. */
3206 struct loop *new_loop = alloc_loop ();
3207 new_loop->header = l0_bb;
3208 new_loop->latch = l2_bb;
3209 add_loop (new_loop, outer_loop);
3210
3211 /* Allocate a loop structure for the original loop unless we already
3212 had one. */
3213 if (!orig_loop_has_loop_struct
3214 && !gimple_omp_for_combined_p (fd->for_stmt))
3215 {
3216 struct loop *orig_loop = alloc_loop ();
3217 orig_loop->header = l1_bb;
3218 /* The loop may have multiple latches. */
3219 add_loop (orig_loop, new_loop);
3220 }
3221 }
3222 }
3223
3224 /* A subroutine of expand_omp_for. Generate code for a parallel
3225 loop with static schedule and no specified chunk size. Given
3226 parameters:
3227
3228 for (V = N1; V cond N2; V += STEP) BODY;
3229
3230 where COND is "<" or ">", we generate pseudocode
3231
3232 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3233 if (cond is <)
3234 adj = STEP - 1;
3235 else
3236 adj = STEP + 1;
3237 if ((__typeof (V)) -1 > 0 && cond is >)
3238 n = -(adj + N2 - N1) / -STEP;
3239 else
3240 n = (adj + N2 - N1) / STEP;
3241 q = n / nthreads;
3242 tt = n % nthreads;
3243 if (threadid < tt) goto L3; else goto L4;
3244 L3:
3245 tt = 0;
3246 q = q + 1;
3247 L4:
3248 s0 = q * threadid + tt;
3249 e0 = s0 + q;
3250 V = s0 * STEP + N1;
3251 if (s0 >= e0) goto L2; else goto L0;
3252 L0:
3253 e = e0 * STEP + N1;
3254 L1:
3255 BODY;
3256 V += STEP;
3257 if (V cond e) goto L1;
3258 L2:
3259 */
3260
3261 static void
3262 expand_omp_for_static_nochunk (struct omp_region *region,
3263 struct omp_for_data *fd,
3264 gimple *inner_stmt)
3265 {
3266 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3267 tree type, itype, vmain, vback;
3268 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3269 basic_block body_bb, cont_bb, collapse_bb = NULL;
3270 basic_block fin_bb;
3271 gimple_stmt_iterator gsi;
3272 edge ep;
3273 bool broken_loop = region->cont == NULL;
3274 tree *counts = NULL;
3275 tree n1, n2, step;
3276
3277 itype = type = TREE_TYPE (fd->loop.v);
3278 if (POINTER_TYPE_P (type))
3279 itype = signed_type_for (type);
3280
3281 entry_bb = region->entry;
3282 cont_bb = region->cont;
3283 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3284 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3285 gcc_assert (broken_loop
3286 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3287 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3288 body_bb = single_succ (seq_start_bb);
3289 if (!broken_loop)
3290 {
3291 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3292 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3293 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3294 }
3295 exit_bb = region->exit;
3296
3297 /* Iteration space partitioning goes in ENTRY_BB. */
3298 gsi = gsi_last_nondebug_bb (entry_bb);
3299 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3300
3301 if (fd->collapse > 1)
3302 {
3303 int first_zero_iter = -1, dummy = -1;
3304 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3305
3306 counts = XALLOCAVEC (tree, fd->collapse);
3307 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3308 fin_bb, first_zero_iter,
3309 dummy_bb, dummy, l2_dom_bb);
3310 t = NULL_TREE;
3311 }
3312 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3313 t = integer_one_node;
3314 else
3315 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3316 fold_convert (type, fd->loop.n1),
3317 fold_convert (type, fd->loop.n2));
3318 if (fd->collapse == 1
3319 && TYPE_UNSIGNED (type)
3320 && (t == NULL_TREE || !integer_onep (t)))
3321 {
3322 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3323 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3324 true, GSI_SAME_STMT);
3325 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3326 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3327 true, GSI_SAME_STMT);
3328 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3329 NULL_TREE, NULL_TREE);
3330 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3331 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3332 expand_omp_regimplify_p, NULL, NULL)
3333 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3334 expand_omp_regimplify_p, NULL, NULL))
3335 {
3336 gsi = gsi_for_stmt (cond_stmt);
3337 gimple_regimplify_operands (cond_stmt, &gsi);
3338 }
3339 ep = split_block (entry_bb, cond_stmt);
3340 ep->flags = EDGE_TRUE_VALUE;
3341 entry_bb = ep->dest;
3342 ep->probability = profile_probability::very_likely ();
3343 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3344 ep->probability = profile_probability::very_unlikely ();
3345 if (gimple_in_ssa_p (cfun))
3346 {
3347 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3348 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3349 !gsi_end_p (gpi); gsi_next (&gpi))
3350 {
3351 gphi *phi = gpi.phi ();
3352 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3353 ep, UNKNOWN_LOCATION);
3354 }
3355 }
3356 gsi = gsi_last_bb (entry_bb);
3357 }
3358
3359 switch (gimple_omp_for_kind (fd->for_stmt))
3360 {
3361 case GF_OMP_FOR_KIND_FOR:
3362 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3363 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3364 break;
3365 case GF_OMP_FOR_KIND_DISTRIBUTE:
3366 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3367 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3368 break;
3369 default:
3370 gcc_unreachable ();
3371 }
3372 nthreads = build_call_expr (nthreads, 0);
3373 nthreads = fold_convert (itype, nthreads);
3374 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3375 true, GSI_SAME_STMT);
3376 threadid = build_call_expr (threadid, 0);
3377 threadid = fold_convert (itype, threadid);
3378 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3379 true, GSI_SAME_STMT);
3380
3381 n1 = fd->loop.n1;
3382 n2 = fd->loop.n2;
3383 step = fd->loop.step;
3384 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3385 {
3386 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3387 OMP_CLAUSE__LOOPTEMP_);
3388 gcc_assert (innerc);
3389 n1 = OMP_CLAUSE_DECL (innerc);
3390 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3391 OMP_CLAUSE__LOOPTEMP_);
3392 gcc_assert (innerc);
3393 n2 = OMP_CLAUSE_DECL (innerc);
3394 }
3395 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3396 true, NULL_TREE, true, GSI_SAME_STMT);
3397 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3398 true, NULL_TREE, true, GSI_SAME_STMT);
3399 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3400 true, NULL_TREE, true, GSI_SAME_STMT);
3401
3402 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3403 t = fold_build2 (PLUS_EXPR, itype, step, t);
3404 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3405 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3406 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3407 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3408 fold_build1 (NEGATE_EXPR, itype, t),
3409 fold_build1 (NEGATE_EXPR, itype, step));
3410 else
3411 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3412 t = fold_convert (itype, t);
3413 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3414
3415 q = create_tmp_reg (itype, "q");
3416 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3417 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3418 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3419
3420 tt = create_tmp_reg (itype, "tt");
3421 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3422 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3423 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3424
3425 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3426 gcond *cond_stmt = gimple_build_cond_empty (t);
3427 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3428
3429 second_bb = split_block (entry_bb, cond_stmt)->dest;
3430 gsi = gsi_last_nondebug_bb (second_bb);
3431 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3432
3433 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3434 GSI_SAME_STMT);
3435 gassign *assign_stmt
3436 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3437 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3438
3439 third_bb = split_block (second_bb, assign_stmt)->dest;
3440 gsi = gsi_last_nondebug_bb (third_bb);
3441 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3442
3443 t = build2 (MULT_EXPR, itype, q, threadid);
3444 t = build2 (PLUS_EXPR, itype, t, tt);
3445 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3446
3447 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3448 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3449
3450 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3451 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3452
3453 /* Remove the GIMPLE_OMP_FOR statement. */
3454 gsi_remove (&gsi, true);
3455
3456 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3457 gsi = gsi_start_bb (seq_start_bb);
3458
3459 tree startvar = fd->loop.v;
3460 tree endvar = NULL_TREE;
3461
3462 if (gimple_omp_for_combined_p (fd->for_stmt))
3463 {
3464 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3465 ? gimple_omp_parallel_clauses (inner_stmt)
3466 : gimple_omp_for_clauses (inner_stmt);
3467 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3468 gcc_assert (innerc);
3469 startvar = OMP_CLAUSE_DECL (innerc);
3470 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3471 OMP_CLAUSE__LOOPTEMP_);
3472 gcc_assert (innerc);
3473 endvar = OMP_CLAUSE_DECL (innerc);
3474 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3475 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3476 {
3477 int i;
3478 for (i = 1; i < fd->collapse; i++)
3479 {
3480 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3481 OMP_CLAUSE__LOOPTEMP_);
3482 gcc_assert (innerc);
3483 }
3484 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3485 OMP_CLAUSE__LOOPTEMP_);
3486 if (innerc)
3487 {
3488 /* If needed (distribute parallel for with lastprivate),
3489 propagate down the total number of iterations. */
3490 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3491 fd->loop.n2);
3492 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3493 GSI_CONTINUE_LINKING);
3494 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3495 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3496 }
3497 }
3498 }
3499 t = fold_convert (itype, s0);
3500 t = fold_build2 (MULT_EXPR, itype, t, step);
3501 if (POINTER_TYPE_P (type))
3502 t = fold_build_pointer_plus (n1, t);
3503 else
3504 t = fold_build2 (PLUS_EXPR, type, t, n1);
3505 t = fold_convert (TREE_TYPE (startvar), t);
3506 t = force_gimple_operand_gsi (&gsi, t,
3507 DECL_P (startvar)
3508 && TREE_ADDRESSABLE (startvar),
3509 NULL_TREE, false, GSI_CONTINUE_LINKING);
3510 assign_stmt = gimple_build_assign (startvar, t);
3511 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3512
3513 t = fold_convert (itype, e0);
3514 t = fold_build2 (MULT_EXPR, itype, t, step);
3515 if (POINTER_TYPE_P (type))
3516 t = fold_build_pointer_plus (n1, t);
3517 else
3518 t = fold_build2 (PLUS_EXPR, type, t, n1);
3519 t = fold_convert (TREE_TYPE (startvar), t);
3520 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3521 false, GSI_CONTINUE_LINKING);
3522 if (endvar)
3523 {
3524 assign_stmt = gimple_build_assign (endvar, e);
3525 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3526 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3527 assign_stmt = gimple_build_assign (fd->loop.v, e);
3528 else
3529 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3530 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3531 }
3532 /* Handle linear clause adjustments. */
3533 tree itercnt = NULL_TREE;
3534 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3535 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3536 c; c = OMP_CLAUSE_CHAIN (c))
3537 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3538 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3539 {
3540 tree d = OMP_CLAUSE_DECL (c);
3541 bool is_ref = omp_is_reference (d);
3542 tree t = d, a, dest;
3543 if (is_ref)
3544 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3545 if (itercnt == NULL_TREE)
3546 {
3547 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3548 {
3549 itercnt = fold_build2 (MINUS_EXPR, itype,
3550 fold_convert (itype, n1),
3551 fold_convert (itype, fd->loop.n1));
3552 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3553 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3554 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3555 NULL_TREE, false,
3556 GSI_CONTINUE_LINKING);
3557 }
3558 else
3559 itercnt = s0;
3560 }
3561 tree type = TREE_TYPE (t);
3562 if (POINTER_TYPE_P (type))
3563 type = sizetype;
3564 a = fold_build2 (MULT_EXPR, type,
3565 fold_convert (type, itercnt),
3566 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3567 dest = unshare_expr (t);
3568 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3569 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3570 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3571 false, GSI_CONTINUE_LINKING);
3572 assign_stmt = gimple_build_assign (dest, t);
3573 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3574 }
3575 if (fd->collapse > 1)
3576 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3577
3578 if (!broken_loop)
3579 {
3580 /* The code controlling the sequential loop replaces the
3581 GIMPLE_OMP_CONTINUE. */
3582 gsi = gsi_last_nondebug_bb (cont_bb);
3583 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3584 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3585 vmain = gimple_omp_continue_control_use (cont_stmt);
3586 vback = gimple_omp_continue_control_def (cont_stmt);
3587
3588 if (!gimple_omp_for_combined_p (fd->for_stmt))
3589 {
3590 if (POINTER_TYPE_P (type))
3591 t = fold_build_pointer_plus (vmain, step);
3592 else
3593 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3594 t = force_gimple_operand_gsi (&gsi, t,
3595 DECL_P (vback)
3596 && TREE_ADDRESSABLE (vback),
3597 NULL_TREE, true, GSI_SAME_STMT);
3598 assign_stmt = gimple_build_assign (vback, t);
3599 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3600
3601 t = build2 (fd->loop.cond_code, boolean_type_node,
3602 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3603 ? t : vback, e);
3604 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3605 }
3606
3607 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3608 gsi_remove (&gsi, true);
3609
3610 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3611 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3612 }
3613
3614 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3615 gsi = gsi_last_nondebug_bb (exit_bb);
3616 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3617 {
3618 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3619 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3620 }
3621 gsi_remove (&gsi, true);
3622
3623 /* Connect all the blocks. */
3624 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3625 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3626 ep = find_edge (entry_bb, second_bb);
3627 ep->flags = EDGE_TRUE_VALUE;
3628 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3629 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3630 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3631
3632 if (!broken_loop)
3633 {
3634 ep = find_edge (cont_bb, body_bb);
3635 if (ep == NULL)
3636 {
3637 ep = BRANCH_EDGE (cont_bb);
3638 gcc_assert (single_succ (ep->dest) == body_bb);
3639 }
3640 if (gimple_omp_for_combined_p (fd->for_stmt))
3641 {
3642 remove_edge (ep);
3643 ep = NULL;
3644 }
3645 else if (fd->collapse > 1)
3646 {
3647 remove_edge (ep);
3648 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3649 }
3650 else
3651 ep->flags = EDGE_TRUE_VALUE;
3652 find_edge (cont_bb, fin_bb)->flags
3653 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3654 }
3655
3656 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3657 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3658 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3659
3660 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3661 recompute_dominator (CDI_DOMINATORS, body_bb));
3662 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3663 recompute_dominator (CDI_DOMINATORS, fin_bb));
3664
3665 struct loop *loop = body_bb->loop_father;
3666 if (loop != entry_bb->loop_father)
3667 {
3668 gcc_assert (broken_loop || loop->header == body_bb);
3669 gcc_assert (broken_loop
3670 || loop->latch == region->cont
3671 || single_pred (loop->latch) == region->cont);
3672 return;
3673 }
3674
3675 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3676 {
3677 loop = alloc_loop ();
3678 loop->header = body_bb;
3679 if (collapse_bb == NULL)
3680 loop->latch = cont_bb;
3681 add_loop (loop, body_bb->loop_father);
3682 }
3683 }
3684
3685 /* Return phi in E->DEST with ARG on edge E. */
3686
3687 static gphi *
3688 find_phi_with_arg_on_edge (tree arg, edge e)
3689 {
3690 basic_block bb = e->dest;
3691
3692 for (gphi_iterator gpi = gsi_start_phis (bb);
3693 !gsi_end_p (gpi);
3694 gsi_next (&gpi))
3695 {
3696 gphi *phi = gpi.phi ();
3697 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3698 return phi;
3699 }
3700
3701 return NULL;
3702 }
3703
3704 /* A subroutine of expand_omp_for. Generate code for a parallel
3705 loop with static schedule and a specified chunk size. Given
3706 parameters:
3707
3708 for (V = N1; V cond N2; V += STEP) BODY;
3709
3710 where COND is "<" or ">", we generate pseudocode
3711
3712 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3713 if (cond is <)
3714 adj = STEP - 1;
3715 else
3716 adj = STEP + 1;
3717 if ((__typeof (V)) -1 > 0 && cond is >)
3718 n = -(adj + N2 - N1) / -STEP;
3719 else
3720 n = (adj + N2 - N1) / STEP;
3721 trip = 0;
3722 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3723 here so that V is defined
3724 if the loop is not entered
3725 L0:
3726 s0 = (trip * nthreads + threadid) * CHUNK;
3727 e0 = min (s0 + CHUNK, n);
3728 if (s0 < n) goto L1; else goto L4;
3729 L1:
3730 V = s0 * STEP + N1;
3731 e = e0 * STEP + N1;
3732 L2:
3733 BODY;
3734 V += STEP;
3735 if (V cond e) goto L2; else goto L3;
3736 L3:
3737 trip += 1;
3738 goto L0;
3739 L4:
3740 */
3741
3742 static void
3743 expand_omp_for_static_chunk (struct omp_region *region,
3744 struct omp_for_data *fd, gimple *inner_stmt)
3745 {
3746 tree n, s0, e0, e, t;
3747 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3748 tree type, itype, vmain, vback, vextra;
3749 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3750 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3751 gimple_stmt_iterator gsi;
3752 edge se;
3753 bool broken_loop = region->cont == NULL;
3754 tree *counts = NULL;
3755 tree n1, n2, step;
3756
3757 itype = type = TREE_TYPE (fd->loop.v);
3758 if (POINTER_TYPE_P (type))
3759 itype = signed_type_for (type);
3760
3761 entry_bb = region->entry;
3762 se = split_block (entry_bb, last_stmt (entry_bb));
3763 entry_bb = se->src;
3764 iter_part_bb = se->dest;
3765 cont_bb = region->cont;
3766 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3767 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3768 gcc_assert (broken_loop
3769 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3770 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3771 body_bb = single_succ (seq_start_bb);
3772 if (!broken_loop)
3773 {
3774 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3775 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3777 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3778 }
3779 exit_bb = region->exit;
3780
3781 /* Trip and adjustment setup goes in ENTRY_BB. */
3782 gsi = gsi_last_nondebug_bb (entry_bb);
3783 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3784
3785 if (fd->collapse > 1)
3786 {
3787 int first_zero_iter = -1, dummy = -1;
3788 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3789
3790 counts = XALLOCAVEC (tree, fd->collapse);
3791 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3792 fin_bb, first_zero_iter,
3793 dummy_bb, dummy, l2_dom_bb);
3794 t = NULL_TREE;
3795 }
3796 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3797 t = integer_one_node;
3798 else
3799 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3800 fold_convert (type, fd->loop.n1),
3801 fold_convert (type, fd->loop.n2));
3802 if (fd->collapse == 1
3803 && TYPE_UNSIGNED (type)
3804 && (t == NULL_TREE || !integer_onep (t)))
3805 {
3806 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3807 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3808 true, GSI_SAME_STMT);
3809 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3810 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3811 true, GSI_SAME_STMT);
3812 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3813 NULL_TREE, NULL_TREE);
3814 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3815 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3816 expand_omp_regimplify_p, NULL, NULL)
3817 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3818 expand_omp_regimplify_p, NULL, NULL))
3819 {
3820 gsi = gsi_for_stmt (cond_stmt);
3821 gimple_regimplify_operands (cond_stmt, &gsi);
3822 }
3823 se = split_block (entry_bb, cond_stmt);
3824 se->flags = EDGE_TRUE_VALUE;
3825 entry_bb = se->dest;
3826 se->probability = profile_probability::very_likely ();
3827 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3828 se->probability = profile_probability::very_unlikely ();
3829 if (gimple_in_ssa_p (cfun))
3830 {
3831 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3832 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3833 !gsi_end_p (gpi); gsi_next (&gpi))
3834 {
3835 gphi *phi = gpi.phi ();
3836 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3837 se, UNKNOWN_LOCATION);
3838 }
3839 }
3840 gsi = gsi_last_bb (entry_bb);
3841 }
3842
3843 switch (gimple_omp_for_kind (fd->for_stmt))
3844 {
3845 case GF_OMP_FOR_KIND_FOR:
3846 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3847 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3848 break;
3849 case GF_OMP_FOR_KIND_DISTRIBUTE:
3850 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3851 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3852 break;
3853 default:
3854 gcc_unreachable ();
3855 }
3856 nthreads = build_call_expr (nthreads, 0);
3857 nthreads = fold_convert (itype, nthreads);
3858 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3859 true, GSI_SAME_STMT);
3860 threadid = build_call_expr (threadid, 0);
3861 threadid = fold_convert (itype, threadid);
3862 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3863 true, GSI_SAME_STMT);
3864
3865 n1 = fd->loop.n1;
3866 n2 = fd->loop.n2;
3867 step = fd->loop.step;
3868 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3869 {
3870 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3871 OMP_CLAUSE__LOOPTEMP_);
3872 gcc_assert (innerc);
3873 n1 = OMP_CLAUSE_DECL (innerc);
3874 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3875 OMP_CLAUSE__LOOPTEMP_);
3876 gcc_assert (innerc);
3877 n2 = OMP_CLAUSE_DECL (innerc);
3878 }
3879 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3880 true, NULL_TREE, true, GSI_SAME_STMT);
3881 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3882 true, NULL_TREE, true, GSI_SAME_STMT);
3883 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3884 true, NULL_TREE, true, GSI_SAME_STMT);
3885 tree chunk_size = fold_convert (itype, fd->chunk_size);
3886 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3887 chunk_size
3888 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3889 GSI_SAME_STMT);
3890
3891 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3892 t = fold_build2 (PLUS_EXPR, itype, step, t);
3893 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3894 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3895 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3896 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3897 fold_build1 (NEGATE_EXPR, itype, t),
3898 fold_build1 (NEGATE_EXPR, itype, step));
3899 else
3900 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3901 t = fold_convert (itype, t);
3902 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3903 true, GSI_SAME_STMT);
3904
3905 trip_var = create_tmp_reg (itype, ".trip");
3906 if (gimple_in_ssa_p (cfun))
3907 {
3908 trip_init = make_ssa_name (trip_var);
3909 trip_main = make_ssa_name (trip_var);
3910 trip_back = make_ssa_name (trip_var);
3911 }
3912 else
3913 {
3914 trip_init = trip_var;
3915 trip_main = trip_var;
3916 trip_back = trip_var;
3917 }
3918
3919 gassign *assign_stmt
3920 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3921 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3922
3923 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3924 t = fold_build2 (MULT_EXPR, itype, t, step);
3925 if (POINTER_TYPE_P (type))
3926 t = fold_build_pointer_plus (n1, t);
3927 else
3928 t = fold_build2 (PLUS_EXPR, type, t, n1);
3929 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3930 true, GSI_SAME_STMT);
3931
3932 /* Remove the GIMPLE_OMP_FOR. */
3933 gsi_remove (&gsi, true);
3934
3935 gimple_stmt_iterator gsif = gsi;
3936
3937 /* Iteration space partitioning goes in ITER_PART_BB. */
3938 gsi = gsi_last_bb (iter_part_bb);
3939
3940 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3941 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3942 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3943 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3944 false, GSI_CONTINUE_LINKING);
3945
3946 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3947 t = fold_build2 (MIN_EXPR, itype, t, n);
3948 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3949 false, GSI_CONTINUE_LINKING);
3950
3951 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3952 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3953
3954 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3955 gsi = gsi_start_bb (seq_start_bb);
3956
3957 tree startvar = fd->loop.v;
3958 tree endvar = NULL_TREE;
3959
3960 if (gimple_omp_for_combined_p (fd->for_stmt))
3961 {
3962 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3963 ? gimple_omp_parallel_clauses (inner_stmt)
3964 : gimple_omp_for_clauses (inner_stmt);
3965 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3966 gcc_assert (innerc);
3967 startvar = OMP_CLAUSE_DECL (innerc);
3968 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3969 OMP_CLAUSE__LOOPTEMP_);
3970 gcc_assert (innerc);
3971 endvar = OMP_CLAUSE_DECL (innerc);
3972 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3973 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3974 {
3975 int i;
3976 for (i = 1; i < fd->collapse; i++)
3977 {
3978 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3979 OMP_CLAUSE__LOOPTEMP_);
3980 gcc_assert (innerc);
3981 }
3982 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3983 OMP_CLAUSE__LOOPTEMP_);
3984 if (innerc)
3985 {
3986 /* If needed (distribute parallel for with lastprivate),
3987 propagate down the total number of iterations. */
3988 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3989 fd->loop.n2);
3990 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3991 GSI_CONTINUE_LINKING);
3992 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3993 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3994 }
3995 }
3996 }
3997
3998 t = fold_convert (itype, s0);
3999 t = fold_build2 (MULT_EXPR, itype, t, step);
4000 if (POINTER_TYPE_P (type))
4001 t = fold_build_pointer_plus (n1, t);
4002 else
4003 t = fold_build2 (PLUS_EXPR, type, t, n1);
4004 t = fold_convert (TREE_TYPE (startvar), t);
4005 t = force_gimple_operand_gsi (&gsi, t,
4006 DECL_P (startvar)
4007 && TREE_ADDRESSABLE (startvar),
4008 NULL_TREE, false, GSI_CONTINUE_LINKING);
4009 assign_stmt = gimple_build_assign (startvar, t);
4010 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4011
4012 t = fold_convert (itype, e0);
4013 t = fold_build2 (MULT_EXPR, itype, t, step);
4014 if (POINTER_TYPE_P (type))
4015 t = fold_build_pointer_plus (n1, t);
4016 else
4017 t = fold_build2 (PLUS_EXPR, type, t, n1);
4018 t = fold_convert (TREE_TYPE (startvar), t);
4019 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4020 false, GSI_CONTINUE_LINKING);
4021 if (endvar)
4022 {
4023 assign_stmt = gimple_build_assign (endvar, e);
4024 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4025 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4026 assign_stmt = gimple_build_assign (fd->loop.v, e);
4027 else
4028 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4029 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4030 }
4031 /* Handle linear clause adjustments. */
4032 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4033 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4034 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4035 c; c = OMP_CLAUSE_CHAIN (c))
4036 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4037 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4038 {
4039 tree d = OMP_CLAUSE_DECL (c);
4040 bool is_ref = omp_is_reference (d);
4041 tree t = d, a, dest;
4042 if (is_ref)
4043 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4044 tree type = TREE_TYPE (t);
4045 if (POINTER_TYPE_P (type))
4046 type = sizetype;
4047 dest = unshare_expr (t);
4048 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4049 expand_omp_build_assign (&gsif, v, t);
4050 if (itercnt == NULL_TREE)
4051 {
4052 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4053 {
4054 itercntbias
4055 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4056 fold_convert (itype, fd->loop.n1));
4057 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4058 itercntbias, step);
4059 itercntbias
4060 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4061 NULL_TREE, true,
4062 GSI_SAME_STMT);
4063 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4064 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4065 NULL_TREE, false,
4066 GSI_CONTINUE_LINKING);
4067 }
4068 else
4069 itercnt = s0;
4070 }
4071 a = fold_build2 (MULT_EXPR, type,
4072 fold_convert (type, itercnt),
4073 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4074 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4075 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4076 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4077 false, GSI_CONTINUE_LINKING);
4078 assign_stmt = gimple_build_assign (dest, t);
4079 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4080 }
4081 if (fd->collapse > 1)
4082 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4083
4084 if (!broken_loop)
4085 {
4086 /* The code controlling the sequential loop goes in CONT_BB,
4087 replacing the GIMPLE_OMP_CONTINUE. */
4088 gsi = gsi_last_nondebug_bb (cont_bb);
4089 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4090 vmain = gimple_omp_continue_control_use (cont_stmt);
4091 vback = gimple_omp_continue_control_def (cont_stmt);
4092
4093 if (!gimple_omp_for_combined_p (fd->for_stmt))
4094 {
4095 if (POINTER_TYPE_P (type))
4096 t = fold_build_pointer_plus (vmain, step);
4097 else
4098 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4099 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4100 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4101 true, GSI_SAME_STMT);
4102 assign_stmt = gimple_build_assign (vback, t);
4103 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4104
4105 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4106 t = build2 (EQ_EXPR, boolean_type_node,
4107 build_int_cst (itype, 0),
4108 build_int_cst (itype, 1));
4109 else
4110 t = build2 (fd->loop.cond_code, boolean_type_node,
4111 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4112 ? t : vback, e);
4113 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4114 }
4115
4116 /* Remove GIMPLE_OMP_CONTINUE. */
4117 gsi_remove (&gsi, true);
4118
4119 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4120 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4121
4122 /* Trip update code goes into TRIP_UPDATE_BB. */
4123 gsi = gsi_start_bb (trip_update_bb);
4124
4125 t = build_int_cst (itype, 1);
4126 t = build2 (PLUS_EXPR, itype, trip_main, t);
4127 assign_stmt = gimple_build_assign (trip_back, t);
4128 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4129 }
4130
4131 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4132 gsi = gsi_last_nondebug_bb (exit_bb);
4133 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4134 {
4135 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4136 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4137 }
4138 gsi_remove (&gsi, true);
4139
4140 /* Connect the new blocks. */
4141 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4142 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4143
4144 if (!broken_loop)
4145 {
4146 se = find_edge (cont_bb, body_bb);
4147 if (se == NULL)
4148 {
4149 se = BRANCH_EDGE (cont_bb);
4150 gcc_assert (single_succ (se->dest) == body_bb);
4151 }
4152 if (gimple_omp_for_combined_p (fd->for_stmt))
4153 {
4154 remove_edge (se);
4155 se = NULL;
4156 }
4157 else if (fd->collapse > 1)
4158 {
4159 remove_edge (se);
4160 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4161 }
4162 else
4163 se->flags = EDGE_TRUE_VALUE;
4164 find_edge (cont_bb, trip_update_bb)->flags
4165 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4166
4167 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4168 iter_part_bb);
4169 }
4170
4171 if (gimple_in_ssa_p (cfun))
4172 {
4173 gphi_iterator psi;
4174 gphi *phi;
4175 edge re, ene;
4176 edge_var_map *vm;
4177 size_t i;
4178
4179 gcc_assert (fd->collapse == 1 && !broken_loop);
4180
4181 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4182 remove arguments of the phi nodes in fin_bb. We need to create
4183 appropriate phi nodes in iter_part_bb instead. */
4184 se = find_edge (iter_part_bb, fin_bb);
4185 re = single_succ_edge (trip_update_bb);
4186 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4187 ene = single_succ_edge (entry_bb);
4188
4189 psi = gsi_start_phis (fin_bb);
4190 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4191 gsi_next (&psi), ++i)
4192 {
4193 gphi *nphi;
4194 source_location locus;
4195
4196 phi = psi.phi ();
4197 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4198 redirect_edge_var_map_def (vm), 0))
4199 continue;
4200
4201 t = gimple_phi_result (phi);
4202 gcc_assert (t == redirect_edge_var_map_result (vm));
4203
4204 if (!single_pred_p (fin_bb))
4205 t = copy_ssa_name (t, phi);
4206
4207 nphi = create_phi_node (t, iter_part_bb);
4208
4209 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4210 locus = gimple_phi_arg_location_from_edge (phi, se);
4211
4212 /* A special case -- fd->loop.v is not yet computed in
4213 iter_part_bb, we need to use vextra instead. */
4214 if (t == fd->loop.v)
4215 t = vextra;
4216 add_phi_arg (nphi, t, ene, locus);
4217 locus = redirect_edge_var_map_location (vm);
4218 tree back_arg = redirect_edge_var_map_def (vm);
4219 add_phi_arg (nphi, back_arg, re, locus);
4220 edge ce = find_edge (cont_bb, body_bb);
4221 if (ce == NULL)
4222 {
4223 ce = BRANCH_EDGE (cont_bb);
4224 gcc_assert (single_succ (ce->dest) == body_bb);
4225 ce = single_succ_edge (ce->dest);
4226 }
4227 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4228 gcc_assert (inner_loop_phi != NULL);
4229 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4230 find_edge (seq_start_bb, body_bb), locus);
4231
4232 if (!single_pred_p (fin_bb))
4233 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4234 }
4235 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4236 redirect_edge_var_map_clear (re);
4237 if (single_pred_p (fin_bb))
4238 while (1)
4239 {
4240 psi = gsi_start_phis (fin_bb);
4241 if (gsi_end_p (psi))
4242 break;
4243 remove_phi_node (&psi, false);
4244 }
4245
4246 /* Make phi node for trip. */
4247 phi = create_phi_node (trip_main, iter_part_bb);
4248 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4249 UNKNOWN_LOCATION);
4250 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4251 UNKNOWN_LOCATION);
4252 }
4253
4254 if (!broken_loop)
4255 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4256 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4257 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4258 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4259 recompute_dominator (CDI_DOMINATORS, fin_bb));
4260 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4261 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4262 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4263 recompute_dominator (CDI_DOMINATORS, body_bb));
4264
4265 if (!broken_loop)
4266 {
4267 struct loop *loop = body_bb->loop_father;
4268 struct loop *trip_loop = alloc_loop ();
4269 trip_loop->header = iter_part_bb;
4270 trip_loop->latch = trip_update_bb;
4271 add_loop (trip_loop, iter_part_bb->loop_father);
4272
4273 if (loop != entry_bb->loop_father)
4274 {
4275 gcc_assert (loop->header == body_bb);
4276 gcc_assert (loop->latch == region->cont
4277 || single_pred (loop->latch) == region->cont);
4278 trip_loop->inner = loop;
4279 return;
4280 }
4281
4282 if (!gimple_omp_for_combined_p (fd->for_stmt))
4283 {
4284 loop = alloc_loop ();
4285 loop->header = body_bb;
4286 if (collapse_bb == NULL)
4287 loop->latch = cont_bb;
4288 add_loop (loop, trip_loop);
4289 }
4290 }
4291 }
4292
4293 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4294 loop. Given parameters:
4295
4296 for (V = N1; V cond N2; V += STEP) BODY;
4297
4298 where COND is "<" or ">", we generate pseudocode
4299
4300 V = N1;
4301 goto L1;
4302 L0:
4303 BODY;
4304 V += STEP;
4305 L1:
4306 if (V cond N2) goto L0; else goto L2;
4307 L2:
4308
4309 For collapsed loops, given parameters:
4310 collapse(3)
4311 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4312 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4313 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4314 BODY;
4315
4316 we generate pseudocode
4317
4318 if (cond3 is <)
4319 adj = STEP3 - 1;
4320 else
4321 adj = STEP3 + 1;
4322 count3 = (adj + N32 - N31) / STEP3;
4323 if (cond2 is <)
4324 adj = STEP2 - 1;
4325 else
4326 adj = STEP2 + 1;
4327 count2 = (adj + N22 - N21) / STEP2;
4328 if (cond1 is <)
4329 adj = STEP1 - 1;
4330 else
4331 adj = STEP1 + 1;
4332 count1 = (adj + N12 - N11) / STEP1;
4333 count = count1 * count2 * count3;
4334 V = 0;
4335 V1 = N11;
4336 V2 = N21;
4337 V3 = N31;
4338 goto L1;
4339 L0:
4340 BODY;
4341 V += 1;
4342 V3 += STEP3;
4343 V2 += (V3 cond3 N32) ? 0 : STEP2;
4344 V3 = (V3 cond3 N32) ? V3 : N31;
4345 V1 += (V2 cond2 N22) ? 0 : STEP1;
4346 V2 = (V2 cond2 N22) ? V2 : N21;
4347 L1:
4348 if (V < count) goto L0; else goto L2;
4349 L2:
4350
4351 */
4352
4353 static void
4354 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4355 {
4356 tree type, t;
4357 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4358 gimple_stmt_iterator gsi;
4359 gimple *stmt;
4360 gcond *cond_stmt;
4361 bool broken_loop = region->cont == NULL;
4362 edge e, ne;
4363 tree *counts = NULL;
4364 int i;
4365 int safelen_int = INT_MAX;
4366 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4367 OMP_CLAUSE_SAFELEN);
4368 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4369 OMP_CLAUSE__SIMDUID_);
4370 tree n1, n2;
4371
4372 if (safelen)
4373 {
4374 poly_uint64 val;
4375 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4376 if (!poly_int_tree_p (safelen, &val))
4377 safelen_int = 0;
4378 else
4379 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4380 if (safelen_int == 1)
4381 safelen_int = 0;
4382 }
4383 type = TREE_TYPE (fd->loop.v);
4384 entry_bb = region->entry;
4385 cont_bb = region->cont;
4386 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4387 gcc_assert (broken_loop
4388 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4389 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4390 if (!broken_loop)
4391 {
4392 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4393 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4394 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4395 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4396 }
4397 else
4398 {
4399 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4400 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4401 l2_bb = single_succ (l1_bb);
4402 }
4403 exit_bb = region->exit;
4404 l2_dom_bb = NULL;
4405
4406 gsi = gsi_last_nondebug_bb (entry_bb);
4407
4408 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4409 /* Not needed in SSA form right now. */
4410 gcc_assert (!gimple_in_ssa_p (cfun));
4411 if (fd->collapse > 1)
4412 {
4413 int first_zero_iter = -1, dummy = -1;
4414 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4415
4416 counts = XALLOCAVEC (tree, fd->collapse);
4417 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4418 zero_iter_bb, first_zero_iter,
4419 dummy_bb, dummy, l2_dom_bb);
4420 }
4421 if (l2_dom_bb == NULL)
4422 l2_dom_bb = l1_bb;
4423
4424 n1 = fd->loop.n1;
4425 n2 = fd->loop.n2;
4426 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4427 {
4428 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4429 OMP_CLAUSE__LOOPTEMP_);
4430 gcc_assert (innerc);
4431 n1 = OMP_CLAUSE_DECL (innerc);
4432 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4433 OMP_CLAUSE__LOOPTEMP_);
4434 gcc_assert (innerc);
4435 n2 = OMP_CLAUSE_DECL (innerc);
4436 }
4437 tree step = fd->loop.step;
4438
4439 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4440 OMP_CLAUSE__SIMT_);
4441 if (is_simt)
4442 {
4443 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4444 is_simt = safelen_int > 1;
4445 }
4446 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4447 if (is_simt)
4448 {
4449 simt_lane = create_tmp_var (unsigned_type_node);
4450 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4451 gimple_call_set_lhs (g, simt_lane);
4452 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4453 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4454 fold_convert (TREE_TYPE (step), simt_lane));
4455 n1 = fold_convert (type, n1);
4456 if (POINTER_TYPE_P (type))
4457 n1 = fold_build_pointer_plus (n1, offset);
4458 else
4459 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4460
4461 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4462 if (fd->collapse > 1)
4463 simt_maxlane = build_one_cst (unsigned_type_node);
4464 else if (safelen_int < omp_max_simt_vf ())
4465 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4466 tree vf
4467 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4468 unsigned_type_node, 0);
4469 if (simt_maxlane)
4470 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4471 vf = fold_convert (TREE_TYPE (step), vf);
4472 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4473 }
4474
4475 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4476 if (fd->collapse > 1)
4477 {
4478 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4479 {
4480 gsi_prev (&gsi);
4481 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4482 gsi_next (&gsi);
4483 }
4484 else
4485 for (i = 0; i < fd->collapse; i++)
4486 {
4487 tree itype = TREE_TYPE (fd->loops[i].v);
4488 if (POINTER_TYPE_P (itype))
4489 itype = signed_type_for (itype);
4490 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4491 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4492 }
4493 }
4494
4495 /* Remove the GIMPLE_OMP_FOR statement. */
4496 gsi_remove (&gsi, true);
4497
4498 if (!broken_loop)
4499 {
4500 /* Code to control the increment goes in the CONT_BB. */
4501 gsi = gsi_last_nondebug_bb (cont_bb);
4502 stmt = gsi_stmt (gsi);
4503 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4504
4505 if (POINTER_TYPE_P (type))
4506 t = fold_build_pointer_plus (fd->loop.v, step);
4507 else
4508 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4509 expand_omp_build_assign (&gsi, fd->loop.v, t);
4510
4511 if (fd->collapse > 1)
4512 {
4513 i = fd->collapse - 1;
4514 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4515 {
4516 t = fold_convert (sizetype, fd->loops[i].step);
4517 t = fold_build_pointer_plus (fd->loops[i].v, t);
4518 }
4519 else
4520 {
4521 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4522 fd->loops[i].step);
4523 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4524 fd->loops[i].v, t);
4525 }
4526 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4527
4528 for (i = fd->collapse - 1; i > 0; i--)
4529 {
4530 tree itype = TREE_TYPE (fd->loops[i].v);
4531 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4532 if (POINTER_TYPE_P (itype2))
4533 itype2 = signed_type_for (itype2);
4534 t = fold_convert (itype2, fd->loops[i - 1].step);
4535 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4536 GSI_SAME_STMT);
4537 t = build3 (COND_EXPR, itype2,
4538 build2 (fd->loops[i].cond_code, boolean_type_node,
4539 fd->loops[i].v,
4540 fold_convert (itype, fd->loops[i].n2)),
4541 build_int_cst (itype2, 0), t);
4542 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4543 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4544 else
4545 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4546 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4547
4548 t = fold_convert (itype, fd->loops[i].n1);
4549 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4550 GSI_SAME_STMT);
4551 t = build3 (COND_EXPR, itype,
4552 build2 (fd->loops[i].cond_code, boolean_type_node,
4553 fd->loops[i].v,
4554 fold_convert (itype, fd->loops[i].n2)),
4555 fd->loops[i].v, t);
4556 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4557 }
4558 }
4559
4560 /* Remove GIMPLE_OMP_CONTINUE. */
4561 gsi_remove (&gsi, true);
4562 }
4563
4564 /* Emit the condition in L1_BB. */
4565 gsi = gsi_start_bb (l1_bb);
4566
4567 t = fold_convert (type, n2);
4568 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4569 false, GSI_CONTINUE_LINKING);
4570 tree v = fd->loop.v;
4571 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4572 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4573 false, GSI_CONTINUE_LINKING);
4574 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4575 cond_stmt = gimple_build_cond_empty (t);
4576 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4577 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4578 NULL, NULL)
4579 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4580 NULL, NULL))
4581 {
4582 gsi = gsi_for_stmt (cond_stmt);
4583 gimple_regimplify_operands (cond_stmt, &gsi);
4584 }
4585
4586 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4587 if (is_simt)
4588 {
4589 gsi = gsi_start_bb (l2_bb);
4590 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4591 if (POINTER_TYPE_P (type))
4592 t = fold_build_pointer_plus (fd->loop.v, step);
4593 else
4594 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4595 expand_omp_build_assign (&gsi, fd->loop.v, t);
4596 }
4597
4598 /* Remove GIMPLE_OMP_RETURN. */
4599 gsi = gsi_last_nondebug_bb (exit_bb);
4600 gsi_remove (&gsi, true);
4601
4602 /* Connect the new blocks. */
4603 remove_edge (FALLTHRU_EDGE (entry_bb));
4604
4605 if (!broken_loop)
4606 {
4607 remove_edge (BRANCH_EDGE (entry_bb));
4608 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4609
4610 e = BRANCH_EDGE (l1_bb);
4611 ne = FALLTHRU_EDGE (l1_bb);
4612 e->flags = EDGE_TRUE_VALUE;
4613 }
4614 else
4615 {
4616 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4617
4618 ne = single_succ_edge (l1_bb);
4619 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4620
4621 }
4622 ne->flags = EDGE_FALSE_VALUE;
4623 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4624 ne->probability = e->probability.invert ();
4625
4626 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4627 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4628
4629 if (simt_maxlane)
4630 {
4631 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4632 NULL_TREE, NULL_TREE);
4633 gsi = gsi_last_bb (entry_bb);
4634 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4635 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4636 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4637 FALLTHRU_EDGE (entry_bb)->probability
4638 = profile_probability::guessed_always ().apply_scale (7, 8);
4639 BRANCH_EDGE (entry_bb)->probability
4640 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4641 l2_dom_bb = entry_bb;
4642 }
4643 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4644
4645 if (!broken_loop)
4646 {
4647 struct loop *loop = alloc_loop ();
4648 loop->header = l1_bb;
4649 loop->latch = cont_bb;
4650 add_loop (loop, l1_bb->loop_father);
4651 loop->safelen = safelen_int;
4652 if (simduid)
4653 {
4654 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4655 cfun->has_simduid_loops = true;
4656 }
4657 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4658 the loop. */
4659 if ((flag_tree_loop_vectorize
4660 || !global_options_set.x_flag_tree_loop_vectorize)
4661 && flag_tree_loop_optimize
4662 && loop->safelen > 1)
4663 {
4664 loop->force_vectorize = true;
4665 cfun->has_force_vectorize_loops = true;
4666 }
4667 }
4668 else if (simduid)
4669 cfun->has_simduid_loops = true;
4670 }
4671
4672 /* Taskloop construct is represented after gimplification with
4673 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4674 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4675 which should just compute all the needed loop temporaries
4676 for GIMPLE_OMP_TASK. */
4677
4678 static void
4679 expand_omp_taskloop_for_outer (struct omp_region *region,
4680 struct omp_for_data *fd,
4681 gimple *inner_stmt)
4682 {
4683 tree type, bias = NULL_TREE;
4684 basic_block entry_bb, cont_bb, exit_bb;
4685 gimple_stmt_iterator gsi;
4686 gassign *assign_stmt;
4687 tree *counts = NULL;
4688 int i;
4689
4690 gcc_assert (inner_stmt);
4691 gcc_assert (region->cont);
4692 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4693 && gimple_omp_task_taskloop_p (inner_stmt));
4694 type = TREE_TYPE (fd->loop.v);
4695
4696 /* See if we need to bias by LLONG_MIN. */
4697 if (fd->iter_type == long_long_unsigned_type_node
4698 && TREE_CODE (type) == INTEGER_TYPE
4699 && !TYPE_UNSIGNED (type))
4700 {
4701 tree n1, n2;
4702
4703 if (fd->loop.cond_code == LT_EXPR)
4704 {
4705 n1 = fd->loop.n1;
4706 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4707 }
4708 else
4709 {
4710 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4711 n2 = fd->loop.n1;
4712 }
4713 if (TREE_CODE (n1) != INTEGER_CST
4714 || TREE_CODE (n2) != INTEGER_CST
4715 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4716 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4717 }
4718
4719 entry_bb = region->entry;
4720 cont_bb = region->cont;
4721 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4722 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4723 exit_bb = region->exit;
4724
4725 gsi = gsi_last_nondebug_bb (entry_bb);
4726 gimple *for_stmt = gsi_stmt (gsi);
4727 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4728 if (fd->collapse > 1)
4729 {
4730 int first_zero_iter = -1, dummy = -1;
4731 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4732
4733 counts = XALLOCAVEC (tree, fd->collapse);
4734 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4735 zero_iter_bb, first_zero_iter,
4736 dummy_bb, dummy, l2_dom_bb);
4737
4738 if (zero_iter_bb)
4739 {
4740 /* Some counts[i] vars might be uninitialized if
4741 some loop has zero iterations. But the body shouldn't
4742 be executed in that case, so just avoid uninit warnings. */
4743 for (i = first_zero_iter; i < fd->collapse; i++)
4744 if (SSA_VAR_P (counts[i]))
4745 TREE_NO_WARNING (counts[i]) = 1;
4746 gsi_prev (&gsi);
4747 edge e = split_block (entry_bb, gsi_stmt (gsi));
4748 entry_bb = e->dest;
4749 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4750 gsi = gsi_last_bb (entry_bb);
4751 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4752 get_immediate_dominator (CDI_DOMINATORS,
4753 zero_iter_bb));
4754 }
4755 }
4756
4757 tree t0, t1;
4758 t1 = fd->loop.n2;
4759 t0 = fd->loop.n1;
4760 if (POINTER_TYPE_P (TREE_TYPE (t0))
4761 && TYPE_PRECISION (TREE_TYPE (t0))
4762 != TYPE_PRECISION (fd->iter_type))
4763 {
4764 /* Avoid casting pointers to integer of a different size. */
4765 tree itype = signed_type_for (type);
4766 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4767 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4768 }
4769 else
4770 {
4771 t1 = fold_convert (fd->iter_type, t1);
4772 t0 = fold_convert (fd->iter_type, t0);
4773 }
4774 if (bias)
4775 {
4776 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4777 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4778 }
4779
4780 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4781 OMP_CLAUSE__LOOPTEMP_);
4782 gcc_assert (innerc);
4783 tree startvar = OMP_CLAUSE_DECL (innerc);
4784 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4785 gcc_assert (innerc);
4786 tree endvar = OMP_CLAUSE_DECL (innerc);
4787 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4788 {
4789 gcc_assert (innerc);
4790 for (i = 1; i < fd->collapse; i++)
4791 {
4792 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4793 OMP_CLAUSE__LOOPTEMP_);
4794 gcc_assert (innerc);
4795 }
4796 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4797 OMP_CLAUSE__LOOPTEMP_);
4798 if (innerc)
4799 {
4800 /* If needed (inner taskloop has lastprivate clause), propagate
4801 down the total number of iterations. */
4802 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4803 NULL_TREE, false,
4804 GSI_CONTINUE_LINKING);
4805 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4806 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4807 }
4808 }
4809
4810 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4811 GSI_CONTINUE_LINKING);
4812 assign_stmt = gimple_build_assign (startvar, t0);
4813 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4814
4815 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4816 GSI_CONTINUE_LINKING);
4817 assign_stmt = gimple_build_assign (endvar, t1);
4818 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4819 if (fd->collapse > 1)
4820 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4821
4822 /* Remove the GIMPLE_OMP_FOR statement. */
4823 gsi = gsi_for_stmt (for_stmt);
4824 gsi_remove (&gsi, true);
4825
4826 gsi = gsi_last_nondebug_bb (cont_bb);
4827 gsi_remove (&gsi, true);
4828
4829 gsi = gsi_last_nondebug_bb (exit_bb);
4830 gsi_remove (&gsi, true);
4831
4832 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4833 remove_edge (BRANCH_EDGE (entry_bb));
4834 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4835 remove_edge (BRANCH_EDGE (cont_bb));
4836 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4837 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4838 recompute_dominator (CDI_DOMINATORS, region->entry));
4839 }
4840
4841 /* Taskloop construct is represented after gimplification with
4842 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4843 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4844 GOMP_taskloop{,_ull} function arranges for each task to be given just
4845 a single range of iterations. */
4846
4847 static void
4848 expand_omp_taskloop_for_inner (struct omp_region *region,
4849 struct omp_for_data *fd,
4850 gimple *inner_stmt)
4851 {
4852 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4853 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4854 basic_block fin_bb;
4855 gimple_stmt_iterator gsi;
4856 edge ep;
4857 bool broken_loop = region->cont == NULL;
4858 tree *counts = NULL;
4859 tree n1, n2, step;
4860
4861 itype = type = TREE_TYPE (fd->loop.v);
4862 if (POINTER_TYPE_P (type))
4863 itype = signed_type_for (type);
4864
4865 /* See if we need to bias by LLONG_MIN. */
4866 if (fd->iter_type == long_long_unsigned_type_node
4867 && TREE_CODE (type) == INTEGER_TYPE
4868 && !TYPE_UNSIGNED (type))
4869 {
4870 tree n1, n2;
4871
4872 if (fd->loop.cond_code == LT_EXPR)
4873 {
4874 n1 = fd->loop.n1;
4875 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4876 }
4877 else
4878 {
4879 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4880 n2 = fd->loop.n1;
4881 }
4882 if (TREE_CODE (n1) != INTEGER_CST
4883 || TREE_CODE (n2) != INTEGER_CST
4884 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4885 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4886 }
4887
4888 entry_bb = region->entry;
4889 cont_bb = region->cont;
4890 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4891 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4892 gcc_assert (broken_loop
4893 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4894 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4895 if (!broken_loop)
4896 {
4897 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4898 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4899 }
4900 exit_bb = region->exit;
4901
4902 /* Iteration space partitioning goes in ENTRY_BB. */
4903 gsi = gsi_last_nondebug_bb (entry_bb);
4904 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4905
4906 if (fd->collapse > 1)
4907 {
4908 int first_zero_iter = -1, dummy = -1;
4909 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4910
4911 counts = XALLOCAVEC (tree, fd->collapse);
4912 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4913 fin_bb, first_zero_iter,
4914 dummy_bb, dummy, l2_dom_bb);
4915 t = NULL_TREE;
4916 }
4917 else
4918 t = integer_one_node;
4919
4920 step = fd->loop.step;
4921 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4922 OMP_CLAUSE__LOOPTEMP_);
4923 gcc_assert (innerc);
4924 n1 = OMP_CLAUSE_DECL (innerc);
4925 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4926 gcc_assert (innerc);
4927 n2 = OMP_CLAUSE_DECL (innerc);
4928 if (bias)
4929 {
4930 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4931 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4932 }
4933 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4934 true, NULL_TREE, true, GSI_SAME_STMT);
4935 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4936 true, NULL_TREE, true, GSI_SAME_STMT);
4937 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4938 true, NULL_TREE, true, GSI_SAME_STMT);
4939
4940 tree startvar = fd->loop.v;
4941 tree endvar = NULL_TREE;
4942
4943 if (gimple_omp_for_combined_p (fd->for_stmt))
4944 {
4945 tree clauses = gimple_omp_for_clauses (inner_stmt);
4946 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4947 gcc_assert (innerc);
4948 startvar = OMP_CLAUSE_DECL (innerc);
4949 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4950 OMP_CLAUSE__LOOPTEMP_);
4951 gcc_assert (innerc);
4952 endvar = OMP_CLAUSE_DECL (innerc);
4953 }
4954 t = fold_convert (TREE_TYPE (startvar), n1);
4955 t = force_gimple_operand_gsi (&gsi, t,
4956 DECL_P (startvar)
4957 && TREE_ADDRESSABLE (startvar),
4958 NULL_TREE, false, GSI_CONTINUE_LINKING);
4959 gimple *assign_stmt = gimple_build_assign (startvar, t);
4960 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4961
4962 t = fold_convert (TREE_TYPE (startvar), n2);
4963 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4964 false, GSI_CONTINUE_LINKING);
4965 if (endvar)
4966 {
4967 assign_stmt = gimple_build_assign (endvar, e);
4968 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4969 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4970 assign_stmt = gimple_build_assign (fd->loop.v, e);
4971 else
4972 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4973 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4974 }
4975 if (fd->collapse > 1)
4976 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4977
4978 if (!broken_loop)
4979 {
4980 /* The code controlling the sequential loop replaces the
4981 GIMPLE_OMP_CONTINUE. */
4982 gsi = gsi_last_nondebug_bb (cont_bb);
4983 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4984 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4985 vmain = gimple_omp_continue_control_use (cont_stmt);
4986 vback = gimple_omp_continue_control_def (cont_stmt);
4987
4988 if (!gimple_omp_for_combined_p (fd->for_stmt))
4989 {
4990 if (POINTER_TYPE_P (type))
4991 t = fold_build_pointer_plus (vmain, step);
4992 else
4993 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4994 t = force_gimple_operand_gsi (&gsi, t,
4995 DECL_P (vback)
4996 && TREE_ADDRESSABLE (vback),
4997 NULL_TREE, true, GSI_SAME_STMT);
4998 assign_stmt = gimple_build_assign (vback, t);
4999 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5000
5001 t = build2 (fd->loop.cond_code, boolean_type_node,
5002 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5003 ? t : vback, e);
5004 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5005 }
5006
5007 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5008 gsi_remove (&gsi, true);
5009
5010 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5011 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5012 }
5013
5014 /* Remove the GIMPLE_OMP_FOR statement. */
5015 gsi = gsi_for_stmt (fd->for_stmt);
5016 gsi_remove (&gsi, true);
5017
5018 /* Remove the GIMPLE_OMP_RETURN statement. */
5019 gsi = gsi_last_nondebug_bb (exit_bb);
5020 gsi_remove (&gsi, true);
5021
5022 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5023 if (!broken_loop)
5024 remove_edge (BRANCH_EDGE (entry_bb));
5025 else
5026 {
5027 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5028 region->outer->cont = NULL;
5029 }
5030
5031 /* Connect all the blocks. */
5032 if (!broken_loop)
5033 {
5034 ep = find_edge (cont_bb, body_bb);
5035 if (gimple_omp_for_combined_p (fd->for_stmt))
5036 {
5037 remove_edge (ep);
5038 ep = NULL;
5039 }
5040 else if (fd->collapse > 1)
5041 {
5042 remove_edge (ep);
5043 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5044 }
5045 else
5046 ep->flags = EDGE_TRUE_VALUE;
5047 find_edge (cont_bb, fin_bb)->flags
5048 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5049 }
5050
5051 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5052 recompute_dominator (CDI_DOMINATORS, body_bb));
5053 if (!broken_loop)
5054 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5055 recompute_dominator (CDI_DOMINATORS, fin_bb));
5056
5057 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5058 {
5059 struct loop *loop = alloc_loop ();
5060 loop->header = body_bb;
5061 if (collapse_bb == NULL)
5062 loop->latch = cont_bb;
5063 add_loop (loop, body_bb->loop_father);
5064 }
5065 }
5066
5067 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5068 partitioned loop. The lowering here is abstracted, in that the
5069 loop parameters are passed through internal functions, which are
5070 further lowered by oacc_device_lower, once we get to the target
5071 compiler. The loop is of the form:
5072
5073 for (V = B; V LTGT E; V += S) {BODY}
5074
5075 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5076 (constant 0 for no chunking) and we will have a GWV partitioning
5077 mask, specifying dimensions over which the loop is to be
5078 partitioned (see note below). We generate code that looks like
5079 (this ignores tiling):
5080
5081 <entry_bb> [incoming FALL->body, BRANCH->exit]
5082 typedef signedintify (typeof (V)) T; // underlying signed integral type
5083 T range = E - B;
5084 T chunk_no = 0;
5085 T DIR = LTGT == '<' ? +1 : -1;
5086 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5087 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5088
5089 <head_bb> [created by splitting end of entry_bb]
5090 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5091 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5092 if (!(offset LTGT bound)) goto bottom_bb;
5093
5094 <body_bb> [incoming]
5095 V = B + offset;
5096 {BODY}
5097
5098 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5099 offset += step;
5100 if (offset LTGT bound) goto body_bb; [*]
5101
5102 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5103 chunk_no++;
5104 if (chunk < chunk_max) goto head_bb;
5105
5106 <exit_bb> [incoming]
5107 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5108
5109 [*] Needed if V live at end of loop. */
5110
5111 static void
5112 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5113 {
5114 tree v = fd->loop.v;
5115 enum tree_code cond_code = fd->loop.cond_code;
5116 enum tree_code plus_code = PLUS_EXPR;
5117
5118 tree chunk_size = integer_minus_one_node;
5119 tree gwv = integer_zero_node;
5120 tree iter_type = TREE_TYPE (v);
5121 tree diff_type = iter_type;
5122 tree plus_type = iter_type;
5123 struct oacc_collapse *counts = NULL;
5124
5125 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5126 == GF_OMP_FOR_KIND_OACC_LOOP);
5127 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5128 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5129
5130 if (POINTER_TYPE_P (iter_type))
5131 {
5132 plus_code = POINTER_PLUS_EXPR;
5133 plus_type = sizetype;
5134 }
5135 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5136 diff_type = signed_type_for (diff_type);
5137 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5138 diff_type = integer_type_node;
5139
5140 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5141 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5142 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5143 basic_block bottom_bb = NULL;
5144
5145 /* entry_bb has two sucessors; the branch edge is to the exit
5146 block, fallthrough edge to body. */
5147 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5148 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5149
5150 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5151 body_bb, or to a block whose only successor is the body_bb. Its
5152 fallthrough successor is the final block (same as the branch
5153 successor of the entry_bb). */
5154 if (cont_bb)
5155 {
5156 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5157 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5158
5159 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5160 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5161 }
5162 else
5163 gcc_assert (!gimple_in_ssa_p (cfun));
5164
5165 /* The exit block only has entry_bb and cont_bb as predecessors. */
5166 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5167
5168 tree chunk_no;
5169 tree chunk_max = NULL_TREE;
5170 tree bound, offset;
5171 tree step = create_tmp_var (diff_type, ".step");
5172 bool up = cond_code == LT_EXPR;
5173 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5174 bool chunking = !gimple_in_ssa_p (cfun);
5175 bool negating;
5176
5177 /* Tiling vars. */
5178 tree tile_size = NULL_TREE;
5179 tree element_s = NULL_TREE;
5180 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5181 basic_block elem_body_bb = NULL;
5182 basic_block elem_cont_bb = NULL;
5183
5184 /* SSA instances. */
5185 tree offset_incr = NULL_TREE;
5186 tree offset_init = NULL_TREE;
5187
5188 gimple_stmt_iterator gsi;
5189 gassign *ass;
5190 gcall *call;
5191 gimple *stmt;
5192 tree expr;
5193 location_t loc;
5194 edge split, be, fte;
5195
5196 /* Split the end of entry_bb to create head_bb. */
5197 split = split_block (entry_bb, last_stmt (entry_bb));
5198 basic_block head_bb = split->dest;
5199 entry_bb = split->src;
5200
5201 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5202 gsi = gsi_last_nondebug_bb (entry_bb);
5203 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5204 loc = gimple_location (for_stmt);
5205
5206 if (gimple_in_ssa_p (cfun))
5207 {
5208 offset_init = gimple_omp_for_index (for_stmt, 0);
5209 gcc_assert (integer_zerop (fd->loop.n1));
5210 /* The SSA parallelizer does gang parallelism. */
5211 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5212 }
5213
5214 if (fd->collapse > 1 || fd->tiling)
5215 {
5216 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5217 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5218 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5219 TREE_TYPE (fd->loop.n2), loc);
5220
5221 if (SSA_VAR_P (fd->loop.n2))
5222 {
5223 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5224 true, GSI_SAME_STMT);
5225 ass = gimple_build_assign (fd->loop.n2, total);
5226 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5227 }
5228 }
5229
5230 tree b = fd->loop.n1;
5231 tree e = fd->loop.n2;
5232 tree s = fd->loop.step;
5233
5234 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5235 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5236
5237 /* Convert the step, avoiding possible unsigned->signed overflow. */
5238 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5239 if (negating)
5240 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5241 s = fold_convert (diff_type, s);
5242 if (negating)
5243 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5244 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5245
5246 if (!chunking)
5247 chunk_size = integer_zero_node;
5248 expr = fold_convert (diff_type, chunk_size);
5249 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5250 NULL_TREE, true, GSI_SAME_STMT);
5251
5252 if (fd->tiling)
5253 {
5254 /* Determine the tile size and element step,
5255 modify the outer loop step size. */
5256 tile_size = create_tmp_var (diff_type, ".tile_size");
5257 expr = build_int_cst (diff_type, 1);
5258 for (int ix = 0; ix < fd->collapse; ix++)
5259 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5260 expr = force_gimple_operand_gsi (&gsi, expr, true,
5261 NULL_TREE, true, GSI_SAME_STMT);
5262 ass = gimple_build_assign (tile_size, expr);
5263 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5264
5265 element_s = create_tmp_var (diff_type, ".element_s");
5266 ass = gimple_build_assign (element_s, s);
5267 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5268
5269 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5270 s = force_gimple_operand_gsi (&gsi, expr, true,
5271 NULL_TREE, true, GSI_SAME_STMT);
5272 }
5273
5274 /* Determine the range, avoiding possible unsigned->signed overflow. */
5275 negating = !up && TYPE_UNSIGNED (iter_type);
5276 expr = fold_build2 (MINUS_EXPR, plus_type,
5277 fold_convert (plus_type, negating ? b : e),
5278 fold_convert (plus_type, negating ? e : b));
5279 expr = fold_convert (diff_type, expr);
5280 if (negating)
5281 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5282 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5283 NULL_TREE, true, GSI_SAME_STMT);
5284
5285 chunk_no = build_int_cst (diff_type, 0);
5286 if (chunking)
5287 {
5288 gcc_assert (!gimple_in_ssa_p (cfun));
5289
5290 expr = chunk_no;
5291 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5292 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5293
5294 ass = gimple_build_assign (chunk_no, expr);
5295 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5296
5297 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5298 build_int_cst (integer_type_node,
5299 IFN_GOACC_LOOP_CHUNKS),
5300 dir, range, s, chunk_size, gwv);
5301 gimple_call_set_lhs (call, chunk_max);
5302 gimple_set_location (call, loc);
5303 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5304 }
5305 else
5306 chunk_size = chunk_no;
5307
5308 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5309 build_int_cst (integer_type_node,
5310 IFN_GOACC_LOOP_STEP),
5311 dir, range, s, chunk_size, gwv);
5312 gimple_call_set_lhs (call, step);
5313 gimple_set_location (call, loc);
5314 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5315
5316 /* Remove the GIMPLE_OMP_FOR. */
5317 gsi_remove (&gsi, true);
5318
5319 /* Fixup edges from head_bb. */
5320 be = BRANCH_EDGE (head_bb);
5321 fte = FALLTHRU_EDGE (head_bb);
5322 be->flags |= EDGE_FALSE_VALUE;
5323 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5324
5325 basic_block body_bb = fte->dest;
5326
5327 if (gimple_in_ssa_p (cfun))
5328 {
5329 gsi = gsi_last_nondebug_bb (cont_bb);
5330 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5331
5332 offset = gimple_omp_continue_control_use (cont_stmt);
5333 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5334 }
5335 else
5336 {
5337 offset = create_tmp_var (diff_type, ".offset");
5338 offset_init = offset_incr = offset;
5339 }
5340 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5341
5342 /* Loop offset & bound go into head_bb. */
5343 gsi = gsi_start_bb (head_bb);
5344
5345 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5346 build_int_cst (integer_type_node,
5347 IFN_GOACC_LOOP_OFFSET),
5348 dir, range, s,
5349 chunk_size, gwv, chunk_no);
5350 gimple_call_set_lhs (call, offset_init);
5351 gimple_set_location (call, loc);
5352 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5353
5354 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5355 build_int_cst (integer_type_node,
5356 IFN_GOACC_LOOP_BOUND),
5357 dir, range, s,
5358 chunk_size, gwv, offset_init);
5359 gimple_call_set_lhs (call, bound);
5360 gimple_set_location (call, loc);
5361 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5362
5363 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5364 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5365 GSI_CONTINUE_LINKING);
5366
5367 /* V assignment goes into body_bb. */
5368 if (!gimple_in_ssa_p (cfun))
5369 {
5370 gsi = gsi_start_bb (body_bb);
5371
5372 expr = build2 (plus_code, iter_type, b,
5373 fold_convert (plus_type, offset));
5374 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5375 true, GSI_SAME_STMT);
5376 ass = gimple_build_assign (v, expr);
5377 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5378
5379 if (fd->collapse > 1 || fd->tiling)
5380 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5381
5382 if (fd->tiling)
5383 {
5384 /* Determine the range of the element loop -- usually simply
5385 the tile_size, but could be smaller if the final
5386 iteration of the outer loop is a partial tile. */
5387 tree e_range = create_tmp_var (diff_type, ".e_range");
5388
5389 expr = build2 (MIN_EXPR, diff_type,
5390 build2 (MINUS_EXPR, diff_type, bound, offset),
5391 build2 (MULT_EXPR, diff_type, tile_size,
5392 element_s));
5393 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5394 true, GSI_SAME_STMT);
5395 ass = gimple_build_assign (e_range, expr);
5396 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5397
5398 /* Determine bound, offset & step of inner loop. */
5399 e_bound = create_tmp_var (diff_type, ".e_bound");
5400 e_offset = create_tmp_var (diff_type, ".e_offset");
5401 e_step = create_tmp_var (diff_type, ".e_step");
5402
5403 /* Mark these as element loops. */
5404 tree t, e_gwv = integer_minus_one_node;
5405 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5406
5407 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5408 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5409 element_s, chunk, e_gwv, chunk);
5410 gimple_call_set_lhs (call, e_offset);
5411 gimple_set_location (call, loc);
5412 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5413
5414 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5415 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5416 element_s, chunk, e_gwv, e_offset);
5417 gimple_call_set_lhs (call, e_bound);
5418 gimple_set_location (call, loc);
5419 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5420
5421 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5422 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5423 element_s, chunk, e_gwv);
5424 gimple_call_set_lhs (call, e_step);
5425 gimple_set_location (call, loc);
5426 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5427
5428 /* Add test and split block. */
5429 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5430 stmt = gimple_build_cond_empty (expr);
5431 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5432 split = split_block (body_bb, stmt);
5433 elem_body_bb = split->dest;
5434 if (cont_bb == body_bb)
5435 cont_bb = elem_body_bb;
5436 body_bb = split->src;
5437
5438 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5439
5440 /* Initialize the user's loop vars. */
5441 gsi = gsi_start_bb (elem_body_bb);
5442 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5443 }
5444 }
5445
5446 /* Loop increment goes into cont_bb. If this is not a loop, we
5447 will have spawned threads as if it was, and each one will
5448 execute one iteration. The specification is not explicit about
5449 whether such constructs are ill-formed or not, and they can
5450 occur, especially when noreturn routines are involved. */
5451 if (cont_bb)
5452 {
5453 gsi = gsi_last_nondebug_bb (cont_bb);
5454 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5455 loc = gimple_location (cont_stmt);
5456
5457 if (fd->tiling)
5458 {
5459 /* Insert element loop increment and test. */
5460 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5461 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5462 true, GSI_SAME_STMT);
5463 ass = gimple_build_assign (e_offset, expr);
5464 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5465 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5466
5467 stmt = gimple_build_cond_empty (expr);
5468 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5469 split = split_block (cont_bb, stmt);
5470 elem_cont_bb = split->src;
5471 cont_bb = split->dest;
5472
5473 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5474 split->probability = profile_probability::unlikely ().guessed ();
5475 edge latch_edge
5476 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5477 latch_edge->probability = profile_probability::likely ().guessed ();
5478
5479 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5480 skip_edge->probability = profile_probability::unlikely ().guessed ();
5481 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5482 loop_entry_edge->probability
5483 = profile_probability::likely ().guessed ();
5484
5485 gsi = gsi_for_stmt (cont_stmt);
5486 }
5487
5488 /* Increment offset. */
5489 if (gimple_in_ssa_p (cfun))
5490 expr = build2 (plus_code, iter_type, offset,
5491 fold_convert (plus_type, step));
5492 else
5493 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5494 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5495 true, GSI_SAME_STMT);
5496 ass = gimple_build_assign (offset_incr, expr);
5497 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5498 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5499 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5500
5501 /* Remove the GIMPLE_OMP_CONTINUE. */
5502 gsi_remove (&gsi, true);
5503
5504 /* Fixup edges from cont_bb. */
5505 be = BRANCH_EDGE (cont_bb);
5506 fte = FALLTHRU_EDGE (cont_bb);
5507 be->flags |= EDGE_TRUE_VALUE;
5508 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5509
5510 if (chunking)
5511 {
5512 /* Split the beginning of exit_bb to make bottom_bb. We
5513 need to insert a nop at the start, because splitting is
5514 after a stmt, not before. */
5515 gsi = gsi_start_bb (exit_bb);
5516 stmt = gimple_build_nop ();
5517 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5518 split = split_block (exit_bb, stmt);
5519 bottom_bb = split->src;
5520 exit_bb = split->dest;
5521 gsi = gsi_last_bb (bottom_bb);
5522
5523 /* Chunk increment and test goes into bottom_bb. */
5524 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5525 build_int_cst (diff_type, 1));
5526 ass = gimple_build_assign (chunk_no, expr);
5527 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5528
5529 /* Chunk test at end of bottom_bb. */
5530 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5531 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5532 GSI_CONTINUE_LINKING);
5533
5534 /* Fixup edges from bottom_bb. */
5535 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5536 split->probability = profile_probability::unlikely ().guessed ();
5537 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5538 latch_edge->probability = profile_probability::likely ().guessed ();
5539 }
5540 }
5541
5542 gsi = gsi_last_nondebug_bb (exit_bb);
5543 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5544 loc = gimple_location (gsi_stmt (gsi));
5545
5546 if (!gimple_in_ssa_p (cfun))
5547 {
5548 /* Insert the final value of V, in case it is live. This is the
5549 value for the only thread that survives past the join. */
5550 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5551 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5552 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5553 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5554 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5555 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5556 true, GSI_SAME_STMT);
5557 ass = gimple_build_assign (v, expr);
5558 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5559 }
5560
5561 /* Remove the OMP_RETURN. */
5562 gsi_remove (&gsi, true);
5563
5564 if (cont_bb)
5565 {
5566 /* We now have one, two or three nested loops. Update the loop
5567 structures. */
5568 struct loop *parent = entry_bb->loop_father;
5569 struct loop *body = body_bb->loop_father;
5570
5571 if (chunking)
5572 {
5573 struct loop *chunk_loop = alloc_loop ();
5574 chunk_loop->header = head_bb;
5575 chunk_loop->latch = bottom_bb;
5576 add_loop (chunk_loop, parent);
5577 parent = chunk_loop;
5578 }
5579 else if (parent != body)
5580 {
5581 gcc_assert (body->header == body_bb);
5582 gcc_assert (body->latch == cont_bb
5583 || single_pred (body->latch) == cont_bb);
5584 parent = NULL;
5585 }
5586
5587 if (parent)
5588 {
5589 struct loop *body_loop = alloc_loop ();
5590 body_loop->header = body_bb;
5591 body_loop->latch = cont_bb;
5592 add_loop (body_loop, parent);
5593
5594 if (fd->tiling)
5595 {
5596 /* Insert tiling's element loop. */
5597 struct loop *inner_loop = alloc_loop ();
5598 inner_loop->header = elem_body_bb;
5599 inner_loop->latch = elem_cont_bb;
5600 add_loop (inner_loop, body_loop);
5601 }
5602 }
5603 }
5604 }
5605
5606 /* Expand the OMP loop defined by REGION. */
5607
5608 static void
5609 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5610 {
5611 struct omp_for_data fd;
5612 struct omp_for_data_loop *loops;
5613
5614 loops
5615 = (struct omp_for_data_loop *)
5616 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5617 * sizeof (struct omp_for_data_loop));
5618 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5619 &fd, loops);
5620 region->sched_kind = fd.sched_kind;
5621 region->sched_modifiers = fd.sched_modifiers;
5622
5623 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5624 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5625 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5626 if (region->cont)
5627 {
5628 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5629 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5630 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5631 }
5632 else
5633 /* If there isn't a continue then this is a degerate case where
5634 the introduction of abnormal edges during lowering will prevent
5635 original loops from being detected. Fix that up. */
5636 loops_state_set (LOOPS_NEED_FIXUP);
5637
5638 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5639 expand_omp_simd (region, &fd);
5640 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5641 {
5642 gcc_assert (!inner_stmt);
5643 expand_oacc_for (region, &fd);
5644 }
5645 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5646 {
5647 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5648 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5649 else
5650 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5651 }
5652 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5653 && !fd.have_ordered)
5654 {
5655 if (fd.chunk_size == NULL)
5656 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5657 else
5658 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5659 }
5660 else
5661 {
5662 int fn_index, start_ix, next_ix;
5663
5664 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5665 == GF_OMP_FOR_KIND_FOR);
5666 if (fd.chunk_size == NULL
5667 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5668 fd.chunk_size = integer_zero_node;
5669 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5670 switch (fd.sched_kind)
5671 {
5672 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5673 fn_index = 3;
5674 break;
5675 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5676 case OMP_CLAUSE_SCHEDULE_GUIDED:
5677 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5678 && !fd.ordered
5679 && !fd.have_ordered)
5680 {
5681 fn_index = 3 + fd.sched_kind;
5682 break;
5683 }
5684 /* FALLTHRU */
5685 default:
5686 fn_index = fd.sched_kind;
5687 break;
5688 }
5689 if (!fd.ordered)
5690 fn_index += fd.have_ordered * 6;
5691 if (fd.ordered)
5692 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5693 else
5694 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5695 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5696 if (fd.iter_type == long_long_unsigned_type_node)
5697 {
5698 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5699 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5700 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5701 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5702 }
5703 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5704 (enum built_in_function) next_ix, inner_stmt);
5705 }
5706
5707 if (gimple_in_ssa_p (cfun))
5708 update_ssa (TODO_update_ssa_only_virtuals);
5709 }
5710
5711 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5712
5713 v = GOMP_sections_start (n);
5714 L0:
5715 switch (v)
5716 {
5717 case 0:
5718 goto L2;
5719 case 1:
5720 section 1;
5721 goto L1;
5722 case 2:
5723 ...
5724 case n:
5725 ...
5726 default:
5727 abort ();
5728 }
5729 L1:
5730 v = GOMP_sections_next ();
5731 goto L0;
5732 L2:
5733 reduction;
5734
5735 If this is a combined parallel sections, replace the call to
5736 GOMP_sections_start with call to GOMP_sections_next. */
5737
5738 static void
5739 expand_omp_sections (struct omp_region *region)
5740 {
5741 tree t, u, vin = NULL, vmain, vnext, l2;
5742 unsigned len;
5743 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5744 gimple_stmt_iterator si, switch_si;
5745 gomp_sections *sections_stmt;
5746 gimple *stmt;
5747 gomp_continue *cont;
5748 edge_iterator ei;
5749 edge e;
5750 struct omp_region *inner;
5751 unsigned i, casei;
5752 bool exit_reachable = region->cont != NULL;
5753
5754 gcc_assert (region->exit != NULL);
5755 entry_bb = region->entry;
5756 l0_bb = single_succ (entry_bb);
5757 l1_bb = region->cont;
5758 l2_bb = region->exit;
5759 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5760 l2 = gimple_block_label (l2_bb);
5761 else
5762 {
5763 /* This can happen if there are reductions. */
5764 len = EDGE_COUNT (l0_bb->succs);
5765 gcc_assert (len > 0);
5766 e = EDGE_SUCC (l0_bb, len - 1);
5767 si = gsi_last_nondebug_bb (e->dest);
5768 l2 = NULL_TREE;
5769 if (gsi_end_p (si)
5770 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5771 l2 = gimple_block_label (e->dest);
5772 else
5773 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5774 {
5775 si = gsi_last_nondebug_bb (e->dest);
5776 if (gsi_end_p (si)
5777 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5778 {
5779 l2 = gimple_block_label (e->dest);
5780 break;
5781 }
5782 }
5783 }
5784 if (exit_reachable)
5785 default_bb = create_empty_bb (l1_bb->prev_bb);
5786 else
5787 default_bb = create_empty_bb (l0_bb);
5788
5789 /* We will build a switch() with enough cases for all the
5790 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5791 and a default case to abort if something goes wrong. */
5792 len = EDGE_COUNT (l0_bb->succs);
5793
5794 /* Use vec::quick_push on label_vec throughout, since we know the size
5795 in advance. */
5796 auto_vec<tree> label_vec (len);
5797
5798 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5799 GIMPLE_OMP_SECTIONS statement. */
5800 si = gsi_last_nondebug_bb (entry_bb);
5801 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5802 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5803 vin = gimple_omp_sections_control (sections_stmt);
5804 if (!is_combined_parallel (region))
5805 {
5806 /* If we are not inside a combined parallel+sections region,
5807 call GOMP_sections_start. */
5808 t = build_int_cst (unsigned_type_node, len - 1);
5809 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5810 stmt = gimple_build_call (u, 1, t);
5811 }
5812 else
5813 {
5814 /* Otherwise, call GOMP_sections_next. */
5815 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5816 stmt = gimple_build_call (u, 0);
5817 }
5818 gimple_call_set_lhs (stmt, vin);
5819 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5820 gsi_remove (&si, true);
5821
5822 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5823 L0_BB. */
5824 switch_si = gsi_last_nondebug_bb (l0_bb);
5825 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5826 if (exit_reachable)
5827 {
5828 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5829 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5830 vmain = gimple_omp_continue_control_use (cont);
5831 vnext = gimple_omp_continue_control_def (cont);
5832 }
5833 else
5834 {
5835 vmain = vin;
5836 vnext = NULL_TREE;
5837 }
5838
5839 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5840 label_vec.quick_push (t);
5841 i = 1;
5842
5843 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5844 for (inner = region->inner, casei = 1;
5845 inner;
5846 inner = inner->next, i++, casei++)
5847 {
5848 basic_block s_entry_bb, s_exit_bb;
5849
5850 /* Skip optional reduction region. */
5851 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5852 {
5853 --i;
5854 --casei;
5855 continue;
5856 }
5857
5858 s_entry_bb = inner->entry;
5859 s_exit_bb = inner->exit;
5860
5861 t = gimple_block_label (s_entry_bb);
5862 u = build_int_cst (unsigned_type_node, casei);
5863 u = build_case_label (u, NULL, t);
5864 label_vec.quick_push (u);
5865
5866 si = gsi_last_nondebug_bb (s_entry_bb);
5867 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5868 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5869 gsi_remove (&si, true);
5870 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5871
5872 if (s_exit_bb == NULL)
5873 continue;
5874
5875 si = gsi_last_nondebug_bb (s_exit_bb);
5876 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5877 gsi_remove (&si, true);
5878
5879 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5880 }
5881
5882 /* Error handling code goes in DEFAULT_BB. */
5883 t = gimple_block_label (default_bb);
5884 u = build_case_label (NULL, NULL, t);
5885 make_edge (l0_bb, default_bb, 0);
5886 add_bb_to_loop (default_bb, current_loops->tree_root);
5887
5888 stmt = gimple_build_switch (vmain, u, label_vec);
5889 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5890 gsi_remove (&switch_si, true);
5891
5892 si = gsi_start_bb (default_bb);
5893 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5894 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5895
5896 if (exit_reachable)
5897 {
5898 tree bfn_decl;
5899
5900 /* Code to get the next section goes in L1_BB. */
5901 si = gsi_last_nondebug_bb (l1_bb);
5902 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5903
5904 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5905 stmt = gimple_build_call (bfn_decl, 0);
5906 gimple_call_set_lhs (stmt, vnext);
5907 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5908 gsi_remove (&si, true);
5909
5910 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5911 }
5912
5913 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5914 si = gsi_last_nondebug_bb (l2_bb);
5915 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5916 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5917 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5918 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5919 else
5920 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5921 stmt = gimple_build_call (t, 0);
5922 if (gimple_omp_return_lhs (gsi_stmt (si)))
5923 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5924 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5925 gsi_remove (&si, true);
5926
5927 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5928 }
5929
5930 /* Expand code for an OpenMP single directive. We've already expanded
5931 much of the code, here we simply place the GOMP_barrier call. */
5932
5933 static void
5934 expand_omp_single (struct omp_region *region)
5935 {
5936 basic_block entry_bb, exit_bb;
5937 gimple_stmt_iterator si;
5938
5939 entry_bb = region->entry;
5940 exit_bb = region->exit;
5941
5942 si = gsi_last_nondebug_bb (entry_bb);
5943 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5944 gsi_remove (&si, true);
5945 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5946
5947 si = gsi_last_nondebug_bb (exit_bb);
5948 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5949 {
5950 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5951 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5952 }
5953 gsi_remove (&si, true);
5954 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5955 }
5956
5957 /* Generic expansion for OpenMP synchronization directives: master,
5958 ordered and critical. All we need to do here is remove the entry
5959 and exit markers for REGION. */
5960
5961 static void
5962 expand_omp_synch (struct omp_region *region)
5963 {
5964 basic_block entry_bb, exit_bb;
5965 gimple_stmt_iterator si;
5966
5967 entry_bb = region->entry;
5968 exit_bb = region->exit;
5969
5970 si = gsi_last_nondebug_bb (entry_bb);
5971 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5972 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5973 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5977 gsi_remove (&si, true);
5978 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5979
5980 if (exit_bb)
5981 {
5982 si = gsi_last_nondebug_bb (exit_bb);
5983 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5984 gsi_remove (&si, true);
5985 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5986 }
5987 }
5988
5989 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
5990 operation as a normal volatile load. */
5991
5992 static bool
5993 expand_omp_atomic_load (basic_block load_bb, tree addr,
5994 tree loaded_val, int index)
5995 {
5996 enum built_in_function tmpbase;
5997 gimple_stmt_iterator gsi;
5998 basic_block store_bb;
5999 location_t loc;
6000 gimple *stmt;
6001 tree decl, call, type, itype;
6002
6003 gsi = gsi_last_nondebug_bb (load_bb);
6004 stmt = gsi_stmt (gsi);
6005 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6006 loc = gimple_location (stmt);
6007
6008 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6009 is smaller than word size, then expand_atomic_load assumes that the load
6010 is atomic. We could avoid the builtin entirely in this case. */
6011
6012 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6013 decl = builtin_decl_explicit (tmpbase);
6014 if (decl == NULL_TREE)
6015 return false;
6016
6017 type = TREE_TYPE (loaded_val);
6018 itype = TREE_TYPE (TREE_TYPE (decl));
6019
6020 call = build_call_expr_loc (loc, decl, 2, addr,
6021 build_int_cst (NULL,
6022 gimple_omp_atomic_seq_cst_p (stmt)
6023 ? MEMMODEL_SEQ_CST
6024 : MEMMODEL_RELAXED));
6025 if (!useless_type_conversion_p (type, itype))
6026 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6027 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6028
6029 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6030 gsi_remove (&gsi, true);
6031
6032 store_bb = single_succ (load_bb);
6033 gsi = gsi_last_nondebug_bb (store_bb);
6034 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6035 gsi_remove (&gsi, true);
6036
6037 if (gimple_in_ssa_p (cfun))
6038 update_ssa (TODO_update_ssa_no_phi);
6039
6040 return true;
6041 }
6042
6043 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6044 operation as a normal volatile store. */
6045
6046 static bool
6047 expand_omp_atomic_store (basic_block load_bb, tree addr,
6048 tree loaded_val, tree stored_val, int index)
6049 {
6050 enum built_in_function tmpbase;
6051 gimple_stmt_iterator gsi;
6052 basic_block store_bb = single_succ (load_bb);
6053 location_t loc;
6054 gimple *stmt;
6055 tree decl, call, type, itype;
6056 machine_mode imode;
6057 bool exchange;
6058
6059 gsi = gsi_last_nondebug_bb (load_bb);
6060 stmt = gsi_stmt (gsi);
6061 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6062
6063 /* If the load value is needed, then this isn't a store but an exchange. */
6064 exchange = gimple_omp_atomic_need_value_p (stmt);
6065
6066 gsi = gsi_last_nondebug_bb (store_bb);
6067 stmt = gsi_stmt (gsi);
6068 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6069 loc = gimple_location (stmt);
6070
6071 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6072 is smaller than word size, then expand_atomic_store assumes that the store
6073 is atomic. We could avoid the builtin entirely in this case. */
6074
6075 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6076 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6077 decl = builtin_decl_explicit (tmpbase);
6078 if (decl == NULL_TREE)
6079 return false;
6080
6081 type = TREE_TYPE (stored_val);
6082
6083 /* Dig out the type of the function's second argument. */
6084 itype = TREE_TYPE (decl);
6085 itype = TYPE_ARG_TYPES (itype);
6086 itype = TREE_CHAIN (itype);
6087 itype = TREE_VALUE (itype);
6088 imode = TYPE_MODE (itype);
6089
6090 if (exchange && !can_atomic_exchange_p (imode, true))
6091 return false;
6092
6093 if (!useless_type_conversion_p (itype, type))
6094 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6095 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6096 build_int_cst (NULL,
6097 gimple_omp_atomic_seq_cst_p (stmt)
6098 ? MEMMODEL_SEQ_CST
6099 : MEMMODEL_RELAXED));
6100 if (exchange)
6101 {
6102 if (!useless_type_conversion_p (type, itype))
6103 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6104 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6105 }
6106
6107 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6108 gsi_remove (&gsi, true);
6109
6110 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6111 gsi = gsi_last_nondebug_bb (load_bb);
6112 gsi_remove (&gsi, true);
6113
6114 if (gimple_in_ssa_p (cfun))
6115 update_ssa (TODO_update_ssa_no_phi);
6116
6117 return true;
6118 }
6119
6120 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6121 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6122 size of the data type, and thus usable to find the index of the builtin
6123 decl. Returns false if the expression is not of the proper form. */
6124
6125 static bool
6126 expand_omp_atomic_fetch_op (basic_block load_bb,
6127 tree addr, tree loaded_val,
6128 tree stored_val, int index)
6129 {
6130 enum built_in_function oldbase, newbase, tmpbase;
6131 tree decl, itype, call;
6132 tree lhs, rhs;
6133 basic_block store_bb = single_succ (load_bb);
6134 gimple_stmt_iterator gsi;
6135 gimple *stmt;
6136 location_t loc;
6137 enum tree_code code;
6138 bool need_old, need_new;
6139 machine_mode imode;
6140 bool seq_cst;
6141
6142 /* We expect to find the following sequences:
6143
6144 load_bb:
6145 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6146
6147 store_bb:
6148 val = tmp OP something; (or: something OP tmp)
6149 GIMPLE_OMP_STORE (val)
6150
6151 ???FIXME: Allow a more flexible sequence.
6152 Perhaps use data flow to pick the statements.
6153
6154 */
6155
6156 gsi = gsi_after_labels (store_bb);
6157 stmt = gsi_stmt (gsi);
6158 if (is_gimple_debug (stmt))
6159 {
6160 gsi_next_nondebug (&gsi);
6161 if (gsi_end_p (gsi))
6162 return false;
6163 stmt = gsi_stmt (gsi);
6164 }
6165 loc = gimple_location (stmt);
6166 if (!is_gimple_assign (stmt))
6167 return false;
6168 gsi_next_nondebug (&gsi);
6169 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6170 return false;
6171 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6172 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6173 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6174 gcc_checking_assert (!need_old || !need_new);
6175
6176 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6177 return false;
6178
6179 /* Check for one of the supported fetch-op operations. */
6180 code = gimple_assign_rhs_code (stmt);
6181 switch (code)
6182 {
6183 case PLUS_EXPR:
6184 case POINTER_PLUS_EXPR:
6185 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6186 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6187 break;
6188 case MINUS_EXPR:
6189 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6190 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6191 break;
6192 case BIT_AND_EXPR:
6193 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6194 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6195 break;
6196 case BIT_IOR_EXPR:
6197 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6198 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6199 break;
6200 case BIT_XOR_EXPR:
6201 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6202 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6203 break;
6204 default:
6205 return false;
6206 }
6207
6208 /* Make sure the expression is of the proper form. */
6209 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6210 rhs = gimple_assign_rhs2 (stmt);
6211 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6212 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6213 rhs = gimple_assign_rhs1 (stmt);
6214 else
6215 return false;
6216
6217 tmpbase = ((enum built_in_function)
6218 ((need_new ? newbase : oldbase) + index + 1));
6219 decl = builtin_decl_explicit (tmpbase);
6220 if (decl == NULL_TREE)
6221 return false;
6222 itype = TREE_TYPE (TREE_TYPE (decl));
6223 imode = TYPE_MODE (itype);
6224
6225 /* We could test all of the various optabs involved, but the fact of the
6226 matter is that (with the exception of i486 vs i586 and xadd) all targets
6227 that support any atomic operaton optab also implements compare-and-swap.
6228 Let optabs.c take care of expanding any compare-and-swap loop. */
6229 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6230 return false;
6231
6232 gsi = gsi_last_nondebug_bb (load_bb);
6233 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6234
6235 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6236 It only requires that the operation happen atomically. Thus we can
6237 use the RELAXED memory model. */
6238 call = build_call_expr_loc (loc, decl, 3, addr,
6239 fold_convert_loc (loc, itype, rhs),
6240 build_int_cst (NULL,
6241 seq_cst ? MEMMODEL_SEQ_CST
6242 : MEMMODEL_RELAXED));
6243
6244 if (need_old || need_new)
6245 {
6246 lhs = need_old ? loaded_val : stored_val;
6247 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6248 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6249 }
6250 else
6251 call = fold_convert_loc (loc, void_type_node, call);
6252 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6253 gsi_remove (&gsi, true);
6254
6255 gsi = gsi_last_nondebug_bb (store_bb);
6256 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6257 gsi_remove (&gsi, true);
6258 gsi = gsi_last_nondebug_bb (store_bb);
6259 stmt = gsi_stmt (gsi);
6260 gsi_remove (&gsi, true);
6261
6262 if (gimple_in_ssa_p (cfun))
6263 {
6264 release_defs (stmt);
6265 update_ssa (TODO_update_ssa_no_phi);
6266 }
6267
6268 return true;
6269 }
6270
6271 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6272
6273 oldval = *addr;
6274 repeat:
6275 newval = rhs; // with oldval replacing *addr in rhs
6276 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6277 if (oldval != newval)
6278 goto repeat;
6279
6280 INDEX is log2 of the size of the data type, and thus usable to find the
6281 index of the builtin decl. */
6282
6283 static bool
6284 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6285 tree addr, tree loaded_val, tree stored_val,
6286 int index)
6287 {
6288 tree loadedi, storedi, initial, new_storedi, old_vali;
6289 tree type, itype, cmpxchg, iaddr, atype;
6290 gimple_stmt_iterator si;
6291 basic_block loop_header = single_succ (load_bb);
6292 gimple *phi, *stmt;
6293 edge e;
6294 enum built_in_function fncode;
6295
6296 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6297 order to use the RELAXED memory model effectively. */
6298 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6299 + index + 1);
6300 cmpxchg = builtin_decl_explicit (fncode);
6301 if (cmpxchg == NULL_TREE)
6302 return false;
6303 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6304 atype = type;
6305 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6306
6307 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6308 || !can_atomic_load_p (TYPE_MODE (itype)))
6309 return false;
6310
6311 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6312 si = gsi_last_nondebug_bb (load_bb);
6313 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6314
6315 /* For floating-point values, we'll need to view-convert them to integers
6316 so that we can perform the atomic compare and swap. Simplify the
6317 following code by always setting up the "i"ntegral variables. */
6318 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6319 {
6320 tree iaddr_val;
6321
6322 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6323 true));
6324 atype = itype;
6325 iaddr_val
6326 = force_gimple_operand_gsi (&si,
6327 fold_convert (TREE_TYPE (iaddr), addr),
6328 false, NULL_TREE, true, GSI_SAME_STMT);
6329 stmt = gimple_build_assign (iaddr, iaddr_val);
6330 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6331 loadedi = create_tmp_var (itype);
6332 if (gimple_in_ssa_p (cfun))
6333 loadedi = make_ssa_name (loadedi);
6334 }
6335 else
6336 {
6337 iaddr = addr;
6338 loadedi = loaded_val;
6339 }
6340
6341 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6342 tree loaddecl = builtin_decl_explicit (fncode);
6343 if (loaddecl)
6344 initial
6345 = fold_convert (atype,
6346 build_call_expr (loaddecl, 2, iaddr,
6347 build_int_cst (NULL_TREE,
6348 MEMMODEL_RELAXED)));
6349 else
6350 {
6351 tree off
6352 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6353 true), 0);
6354 initial = build2 (MEM_REF, atype, iaddr, off);
6355 }
6356
6357 initial
6358 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6359 GSI_SAME_STMT);
6360
6361 /* Move the value to the LOADEDI temporary. */
6362 if (gimple_in_ssa_p (cfun))
6363 {
6364 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6365 phi = create_phi_node (loadedi, loop_header);
6366 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6367 initial);
6368 }
6369 else
6370 gsi_insert_before (&si,
6371 gimple_build_assign (loadedi, initial),
6372 GSI_SAME_STMT);
6373 if (loadedi != loaded_val)
6374 {
6375 gimple_stmt_iterator gsi2;
6376 tree x;
6377
6378 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6379 gsi2 = gsi_start_bb (loop_header);
6380 if (gimple_in_ssa_p (cfun))
6381 {
6382 gassign *stmt;
6383 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6384 true, GSI_SAME_STMT);
6385 stmt = gimple_build_assign (loaded_val, x);
6386 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6387 }
6388 else
6389 {
6390 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6391 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6392 true, GSI_SAME_STMT);
6393 }
6394 }
6395 gsi_remove (&si, true);
6396
6397 si = gsi_last_nondebug_bb (store_bb);
6398 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6399
6400 if (iaddr == addr)
6401 storedi = stored_val;
6402 else
6403 storedi
6404 = force_gimple_operand_gsi (&si,
6405 build1 (VIEW_CONVERT_EXPR, itype,
6406 stored_val), true, NULL_TREE, true,
6407 GSI_SAME_STMT);
6408
6409 /* Build the compare&swap statement. */
6410 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6411 new_storedi = force_gimple_operand_gsi (&si,
6412 fold_convert (TREE_TYPE (loadedi),
6413 new_storedi),
6414 true, NULL_TREE,
6415 true, GSI_SAME_STMT);
6416
6417 if (gimple_in_ssa_p (cfun))
6418 old_vali = loadedi;
6419 else
6420 {
6421 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6422 stmt = gimple_build_assign (old_vali, loadedi);
6423 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6424
6425 stmt = gimple_build_assign (loadedi, new_storedi);
6426 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6427 }
6428
6429 /* Note that we always perform the comparison as an integer, even for
6430 floating point. This allows the atomic operation to properly
6431 succeed even with NaNs and -0.0. */
6432 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6433 stmt = gimple_build_cond_empty (ne);
6434 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6435
6436 /* Update cfg. */
6437 e = single_succ_edge (store_bb);
6438 e->flags &= ~EDGE_FALLTHRU;
6439 e->flags |= EDGE_FALSE_VALUE;
6440 /* Expect no looping. */
6441 e->probability = profile_probability::guessed_always ();
6442
6443 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6444 e->probability = profile_probability::guessed_never ();
6445
6446 /* Copy the new value to loadedi (we already did that before the condition
6447 if we are not in SSA). */
6448 if (gimple_in_ssa_p (cfun))
6449 {
6450 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6451 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6452 }
6453
6454 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6455 gsi_remove (&si, true);
6456
6457 struct loop *loop = alloc_loop ();
6458 loop->header = loop_header;
6459 loop->latch = store_bb;
6460 add_loop (loop, loop_header->loop_father);
6461
6462 if (gimple_in_ssa_p (cfun))
6463 update_ssa (TODO_update_ssa_no_phi);
6464
6465 return true;
6466 }
6467
6468 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6469
6470 GOMP_atomic_start ();
6471 *addr = rhs;
6472 GOMP_atomic_end ();
6473
6474 The result is not globally atomic, but works so long as all parallel
6475 references are within #pragma omp atomic directives. According to
6476 responses received from omp@openmp.org, appears to be within spec.
6477 Which makes sense, since that's how several other compilers handle
6478 this situation as well.
6479 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6480 expanding. STORED_VAL is the operand of the matching
6481 GIMPLE_OMP_ATOMIC_STORE.
6482
6483 We replace
6484 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6485 loaded_val = *addr;
6486
6487 and replace
6488 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6489 *addr = stored_val;
6490 */
6491
6492 static bool
6493 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6494 tree addr, tree loaded_val, tree stored_val)
6495 {
6496 gimple_stmt_iterator si;
6497 gassign *stmt;
6498 tree t;
6499
6500 si = gsi_last_nondebug_bb (load_bb);
6501 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6502
6503 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6504 t = build_call_expr (t, 0);
6505 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6506
6507 tree mem = build_simple_mem_ref (addr);
6508 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6509 TREE_OPERAND (mem, 1)
6510 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6511 true),
6512 TREE_OPERAND (mem, 1));
6513 stmt = gimple_build_assign (loaded_val, mem);
6514 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6515 gsi_remove (&si, true);
6516
6517 si = gsi_last_nondebug_bb (store_bb);
6518 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6519
6520 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6521 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6522
6523 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6524 t = build_call_expr (t, 0);
6525 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6526 gsi_remove (&si, true);
6527
6528 if (gimple_in_ssa_p (cfun))
6529 update_ssa (TODO_update_ssa_no_phi);
6530 return true;
6531 }
6532
6533 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6534 using expand_omp_atomic_fetch_op. If it failed, we try to
6535 call expand_omp_atomic_pipeline, and if it fails too, the
6536 ultimate fallback is wrapping the operation in a mutex
6537 (expand_omp_atomic_mutex). REGION is the atomic region built
6538 by build_omp_regions_1(). */
6539
6540 static void
6541 expand_omp_atomic (struct omp_region *region)
6542 {
6543 basic_block load_bb = region->entry, store_bb = region->exit;
6544 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6545 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6546 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6547 tree addr = gimple_omp_atomic_load_rhs (load);
6548 tree stored_val = gimple_omp_atomic_store_val (store);
6549 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6550 HOST_WIDE_INT index;
6551
6552 /* Make sure the type is one of the supported sizes. */
6553 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6554 index = exact_log2 (index);
6555 if (index >= 0 && index <= 4)
6556 {
6557 unsigned int align = TYPE_ALIGN_UNIT (type);
6558
6559 /* __sync builtins require strict data alignment. */
6560 if (exact_log2 (align) >= index)
6561 {
6562 /* Atomic load. */
6563 scalar_mode smode;
6564 if (loaded_val == stored_val
6565 && (is_int_mode (TYPE_MODE (type), &smode)
6566 || is_float_mode (TYPE_MODE (type), &smode))
6567 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6568 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6569 return;
6570
6571 /* Atomic store. */
6572 if ((is_int_mode (TYPE_MODE (type), &smode)
6573 || is_float_mode (TYPE_MODE (type), &smode))
6574 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6575 && store_bb == single_succ (load_bb)
6576 && first_stmt (store_bb) == store
6577 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6578 stored_val, index))
6579 return;
6580
6581 /* When possible, use specialized atomic update functions. */
6582 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6583 && store_bb == single_succ (load_bb)
6584 && expand_omp_atomic_fetch_op (load_bb, addr,
6585 loaded_val, stored_val, index))
6586 return;
6587
6588 /* If we don't have specialized __sync builtins, try and implement
6589 as a compare and swap loop. */
6590 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6591 loaded_val, stored_val, index))
6592 return;
6593 }
6594 }
6595
6596 /* The ultimate fallback is wrapping the operation in a mutex. */
6597 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6598 }
6599
6600 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6601 at REGION_EXIT. */
6602
6603 static void
6604 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6605 basic_block region_exit)
6606 {
6607 struct loop *outer = region_entry->loop_father;
6608 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6609
6610 /* Don't parallelize the kernels region if it contains more than one outer
6611 loop. */
6612 unsigned int nr_outer_loops = 0;
6613 struct loop *single_outer = NULL;
6614 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6615 {
6616 gcc_assert (loop_outer (loop) == outer);
6617
6618 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6619 continue;
6620
6621 if (region_exit != NULL
6622 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6623 continue;
6624
6625 nr_outer_loops++;
6626 single_outer = loop;
6627 }
6628 if (nr_outer_loops != 1)
6629 return;
6630
6631 for (struct loop *loop = single_outer->inner;
6632 loop != NULL;
6633 loop = loop->inner)
6634 if (loop->next)
6635 return;
6636
6637 /* Mark the loops in the region. */
6638 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6639 loop->in_oacc_kernels_region = true;
6640 }
6641
6642 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6643
6644 struct GTY(()) grid_launch_attributes_trees
6645 {
6646 tree kernel_dim_array_type;
6647 tree kernel_lattrs_dimnum_decl;
6648 tree kernel_lattrs_grid_decl;
6649 tree kernel_lattrs_group_decl;
6650 tree kernel_launch_attributes_type;
6651 };
6652
6653 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6654
6655 /* Create types used to pass kernel launch attributes to target. */
6656
6657 static void
6658 grid_create_kernel_launch_attr_types (void)
6659 {
6660 if (grid_attr_trees)
6661 return;
6662 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6663
6664 tree dim_arr_index_type
6665 = build_index_type (build_int_cst (integer_type_node, 2));
6666 grid_attr_trees->kernel_dim_array_type
6667 = build_array_type (uint32_type_node, dim_arr_index_type);
6668
6669 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6670 grid_attr_trees->kernel_lattrs_dimnum_decl
6671 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6672 uint32_type_node);
6673 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6674
6675 grid_attr_trees->kernel_lattrs_grid_decl
6676 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6677 grid_attr_trees->kernel_dim_array_type);
6678 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6679 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6680 grid_attr_trees->kernel_lattrs_group_decl
6681 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6682 grid_attr_trees->kernel_dim_array_type);
6683 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6684 = grid_attr_trees->kernel_lattrs_grid_decl;
6685 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6686 "__gomp_kernel_launch_attributes",
6687 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6688 }
6689
6690 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6691 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6692 of type uint32_type_node. */
6693
6694 static void
6695 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6696 tree fld_decl, int index, tree value)
6697 {
6698 tree ref = build4 (ARRAY_REF, uint32_type_node,
6699 build3 (COMPONENT_REF,
6700 grid_attr_trees->kernel_dim_array_type,
6701 range_var, fld_decl, NULL_TREE),
6702 build_int_cst (integer_type_node, index),
6703 NULL_TREE, NULL_TREE);
6704 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6705 }
6706
6707 /* Return a tree representation of a pointer to a structure with grid and
6708 work-group size information. Statements filling that information will be
6709 inserted before GSI, TGT_STMT is the target statement which has the
6710 necessary information in it. */
6711
6712 static tree
6713 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6714 gomp_target *tgt_stmt)
6715 {
6716 grid_create_kernel_launch_attr_types ();
6717 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6718 "__kernel_launch_attrs");
6719
6720 unsigned max_dim = 0;
6721 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6722 clause;
6723 clause = OMP_CLAUSE_CHAIN (clause))
6724 {
6725 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6726 continue;
6727
6728 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6729 max_dim = MAX (dim, max_dim);
6730
6731 grid_insert_store_range_dim (gsi, lattrs,
6732 grid_attr_trees->kernel_lattrs_grid_decl,
6733 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6734 grid_insert_store_range_dim (gsi, lattrs,
6735 grid_attr_trees->kernel_lattrs_group_decl,
6736 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6737 }
6738
6739 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6740 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6741 gcc_checking_assert (max_dim <= 2);
6742 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6743 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6744 GSI_SAME_STMT);
6745 TREE_ADDRESSABLE (lattrs) = 1;
6746 return build_fold_addr_expr (lattrs);
6747 }
6748
6749 /* Build target argument identifier from the DEVICE identifier, value
6750 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6751
6752 static tree
6753 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6754 {
6755 tree t = build_int_cst (integer_type_node, device);
6756 if (subseqent_param)
6757 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6758 build_int_cst (integer_type_node,
6759 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6760 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6761 build_int_cst (integer_type_node, id));
6762 return t;
6763 }
6764
6765 /* Like above but return it in type that can be directly stored as an element
6766 of the argument array. */
6767
6768 static tree
6769 get_target_argument_identifier (int device, bool subseqent_param, int id)
6770 {
6771 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6772 return fold_convert (ptr_type_node, t);
6773 }
6774
6775 /* Return a target argument consisting of DEVICE identifier, value identifier
6776 ID, and the actual VALUE. */
6777
6778 static tree
6779 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6780 tree value)
6781 {
6782 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6783 fold_convert (integer_type_node, value),
6784 build_int_cst (unsigned_type_node,
6785 GOMP_TARGET_ARG_VALUE_SHIFT));
6786 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6787 get_target_argument_identifier_1 (device, false, id));
6788 t = fold_convert (ptr_type_node, t);
6789 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6790 }
6791
6792 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6793 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6794 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6795 arguments. */
6796
6797 static void
6798 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6799 int id, tree value, vec <tree> *args)
6800 {
6801 if (tree_fits_shwi_p (value)
6802 && tree_to_shwi (value) > -(1 << 15)
6803 && tree_to_shwi (value) < (1 << 15))
6804 args->quick_push (get_target_argument_value (gsi, device, id, value));
6805 else
6806 {
6807 args->quick_push (get_target_argument_identifier (device, true, id));
6808 value = fold_convert (ptr_type_node, value);
6809 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6810 GSI_SAME_STMT);
6811 args->quick_push (value);
6812 }
6813 }
6814
6815 /* Create an array of arguments that is then passed to GOMP_target. */
6816
6817 static tree
6818 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6819 {
6820 auto_vec <tree, 6> args;
6821 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6822 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6823 if (c)
6824 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6825 else
6826 t = integer_minus_one_node;
6827 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6828 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6829
6830 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6831 if (c)
6832 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6833 else
6834 t = integer_minus_one_node;
6835 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6836 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6837 &args);
6838
6839 /* Add HSA-specific grid sizes, if available. */
6840 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6841 OMP_CLAUSE__GRIDDIM_))
6842 {
6843 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6844 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6845 args.quick_push (t);
6846 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6847 }
6848
6849 /* Produce more, perhaps device specific, arguments here. */
6850
6851 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6852 args.length () + 1),
6853 ".omp_target_args");
6854 for (unsigned i = 0; i < args.length (); i++)
6855 {
6856 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6857 build_int_cst (integer_type_node, i),
6858 NULL_TREE, NULL_TREE);
6859 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6860 GSI_SAME_STMT);
6861 }
6862 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6863 build_int_cst (integer_type_node, args.length ()),
6864 NULL_TREE, NULL_TREE);
6865 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6866 GSI_SAME_STMT);
6867 TREE_ADDRESSABLE (argarray) = 1;
6868 return build_fold_addr_expr (argarray);
6869 }
6870
6871 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6872
6873 static void
6874 expand_omp_target (struct omp_region *region)
6875 {
6876 basic_block entry_bb, exit_bb, new_bb;
6877 struct function *child_cfun;
6878 tree child_fn, block, t;
6879 gimple_stmt_iterator gsi;
6880 gomp_target *entry_stmt;
6881 gimple *stmt;
6882 edge e;
6883 bool offloaded, data_region;
6884
6885 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6886 new_bb = region->entry;
6887
6888 offloaded = is_gimple_omp_offloaded (entry_stmt);
6889 switch (gimple_omp_target_kind (entry_stmt))
6890 {
6891 case GF_OMP_TARGET_KIND_REGION:
6892 case GF_OMP_TARGET_KIND_UPDATE:
6893 case GF_OMP_TARGET_KIND_ENTER_DATA:
6894 case GF_OMP_TARGET_KIND_EXIT_DATA:
6895 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6896 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6897 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6898 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6899 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6900 data_region = false;
6901 break;
6902 case GF_OMP_TARGET_KIND_DATA:
6903 case GF_OMP_TARGET_KIND_OACC_DATA:
6904 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6905 data_region = true;
6906 break;
6907 default:
6908 gcc_unreachable ();
6909 }
6910
6911 child_fn = NULL_TREE;
6912 child_cfun = NULL;
6913 if (offloaded)
6914 {
6915 child_fn = gimple_omp_target_child_fn (entry_stmt);
6916 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6917 }
6918
6919 /* Supported by expand_omp_taskreg, but not here. */
6920 if (child_cfun != NULL)
6921 gcc_checking_assert (!child_cfun->cfg);
6922 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6923
6924 entry_bb = region->entry;
6925 exit_bb = region->exit;
6926
6927 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6928 {
6929 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6930
6931 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6932 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6933 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6934 DECL_ATTRIBUTES (child_fn)
6935 = tree_cons (get_identifier ("oacc kernels"),
6936 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6937 }
6938
6939 if (offloaded)
6940 {
6941 unsigned srcidx, dstidx, num;
6942
6943 /* If the offloading region needs data sent from the parent
6944 function, then the very first statement (except possible
6945 tree profile counter updates) of the offloading body
6946 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6947 &.OMP_DATA_O is passed as an argument to the child function,
6948 we need to replace it with the argument as seen by the child
6949 function.
6950
6951 In most cases, this will end up being the identity assignment
6952 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6953 a function call that has been inlined, the original PARM_DECL
6954 .OMP_DATA_I may have been converted into a different local
6955 variable. In which case, we need to keep the assignment. */
6956 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6957 if (data_arg)
6958 {
6959 basic_block entry_succ_bb = single_succ (entry_bb);
6960 gimple_stmt_iterator gsi;
6961 tree arg;
6962 gimple *tgtcopy_stmt = NULL;
6963 tree sender = TREE_VEC_ELT (data_arg, 0);
6964
6965 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6966 {
6967 gcc_assert (!gsi_end_p (gsi));
6968 stmt = gsi_stmt (gsi);
6969 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6970 continue;
6971
6972 if (gimple_num_ops (stmt) == 2)
6973 {
6974 tree arg = gimple_assign_rhs1 (stmt);
6975
6976 /* We're ignoring the subcode because we're
6977 effectively doing a STRIP_NOPS. */
6978
6979 if (TREE_CODE (arg) == ADDR_EXPR
6980 && TREE_OPERAND (arg, 0) == sender)
6981 {
6982 tgtcopy_stmt = stmt;
6983 break;
6984 }
6985 }
6986 }
6987
6988 gcc_assert (tgtcopy_stmt != NULL);
6989 arg = DECL_ARGUMENTS (child_fn);
6990
6991 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6992 gsi_remove (&gsi, true);
6993 }
6994
6995 /* Declare local variables needed in CHILD_CFUN. */
6996 block = DECL_INITIAL (child_fn);
6997 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6998 /* The gimplifier could record temporaries in the offloading block
6999 rather than in containing function's local_decls chain,
7000 which would mean cgraph missed finalizing them. Do it now. */
7001 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7002 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7003 varpool_node::finalize_decl (t);
7004 DECL_SAVED_TREE (child_fn) = NULL;
7005 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7006 gimple_set_body (child_fn, NULL);
7007 TREE_USED (block) = 1;
7008
7009 /* Reset DECL_CONTEXT on function arguments. */
7010 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7011 DECL_CONTEXT (t) = child_fn;
7012
7013 /* Split ENTRY_BB at GIMPLE_*,
7014 so that it can be moved to the child function. */
7015 gsi = gsi_last_nondebug_bb (entry_bb);
7016 stmt = gsi_stmt (gsi);
7017 gcc_assert (stmt
7018 && gimple_code (stmt) == gimple_code (entry_stmt));
7019 e = split_block (entry_bb, stmt);
7020 gsi_remove (&gsi, true);
7021 entry_bb = e->dest;
7022 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7023
7024 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7025 if (exit_bb)
7026 {
7027 gsi = gsi_last_nondebug_bb (exit_bb);
7028 gcc_assert (!gsi_end_p (gsi)
7029 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7030 stmt = gimple_build_return (NULL);
7031 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7032 gsi_remove (&gsi, true);
7033 }
7034
7035 /* Make sure to generate early debug for the function before
7036 outlining anything. */
7037 if (! gimple_in_ssa_p (cfun))
7038 (*debug_hooks->early_global_decl) (cfun->decl);
7039
7040 /* Move the offloading region into CHILD_CFUN. */
7041
7042 block = gimple_block (entry_stmt);
7043
7044 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7045 if (exit_bb)
7046 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7047 /* When the OMP expansion process cannot guarantee an up-to-date
7048 loop tree arrange for the child function to fixup loops. */
7049 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7050 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7051
7052 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7053 num = vec_safe_length (child_cfun->local_decls);
7054 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7055 {
7056 t = (*child_cfun->local_decls)[srcidx];
7057 if (DECL_CONTEXT (t) == cfun->decl)
7058 continue;
7059 if (srcidx != dstidx)
7060 (*child_cfun->local_decls)[dstidx] = t;
7061 dstidx++;
7062 }
7063 if (dstidx != num)
7064 vec_safe_truncate (child_cfun->local_decls, dstidx);
7065
7066 /* Inform the callgraph about the new function. */
7067 child_cfun->curr_properties = cfun->curr_properties;
7068 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7069 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7070 cgraph_node *node = cgraph_node::get_create (child_fn);
7071 node->parallelized_function = 1;
7072 cgraph_node::add_new_function (child_fn, true);
7073
7074 /* Add the new function to the offload table. */
7075 if (ENABLE_OFFLOADING)
7076 {
7077 if (in_lto_p)
7078 DECL_PRESERVE_P (child_fn) = 1;
7079 vec_safe_push (offload_funcs, child_fn);
7080 }
7081
7082 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7083 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7084
7085 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7086 fixed in a following pass. */
7087 push_cfun (child_cfun);
7088 if (need_asm)
7089 assign_assembler_name_if_needed (child_fn);
7090 cgraph_edge::rebuild_edges ();
7091
7092 /* Some EH regions might become dead, see PR34608. If
7093 pass_cleanup_cfg isn't the first pass to happen with the
7094 new child, these dead EH edges might cause problems.
7095 Clean them up now. */
7096 if (flag_exceptions)
7097 {
7098 basic_block bb;
7099 bool changed = false;
7100
7101 FOR_EACH_BB_FN (bb, cfun)
7102 changed |= gimple_purge_dead_eh_edges (bb);
7103 if (changed)
7104 cleanup_tree_cfg ();
7105 }
7106 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7107 verify_loop_structure ();
7108 pop_cfun ();
7109
7110 if (dump_file && !gimple_in_ssa_p (cfun))
7111 {
7112 omp_any_child_fn_dumped = true;
7113 dump_function_header (dump_file, child_fn, dump_flags);
7114 dump_function_to_file (child_fn, dump_file, dump_flags);
7115 }
7116 }
7117
7118 /* Emit a library call to launch the offloading region, or do data
7119 transfers. */
7120 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7121 enum built_in_function start_ix;
7122 location_t clause_loc;
7123 unsigned int flags_i = 0;
7124
7125 switch (gimple_omp_target_kind (entry_stmt))
7126 {
7127 case GF_OMP_TARGET_KIND_REGION:
7128 start_ix = BUILT_IN_GOMP_TARGET;
7129 break;
7130 case GF_OMP_TARGET_KIND_DATA:
7131 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7132 break;
7133 case GF_OMP_TARGET_KIND_UPDATE:
7134 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7135 break;
7136 case GF_OMP_TARGET_KIND_ENTER_DATA:
7137 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7138 break;
7139 case GF_OMP_TARGET_KIND_EXIT_DATA:
7140 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7141 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7142 break;
7143 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7144 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7145 start_ix = BUILT_IN_GOACC_PARALLEL;
7146 break;
7147 case GF_OMP_TARGET_KIND_OACC_DATA:
7148 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7149 start_ix = BUILT_IN_GOACC_DATA_START;
7150 break;
7151 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7152 start_ix = BUILT_IN_GOACC_UPDATE;
7153 break;
7154 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7155 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7156 break;
7157 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7158 start_ix = BUILT_IN_GOACC_DECLARE;
7159 break;
7160 default:
7161 gcc_unreachable ();
7162 }
7163
7164 clauses = gimple_omp_target_clauses (entry_stmt);
7165
7166 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7167 library choose) and there is no conditional. */
7168 cond = NULL_TREE;
7169 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7170
7171 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7172 if (c)
7173 cond = OMP_CLAUSE_IF_EXPR (c);
7174
7175 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7176 if (c)
7177 {
7178 /* Even if we pass it to all library function calls, it is currently only
7179 defined/used for the OpenMP target ones. */
7180 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7181 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7182 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7183 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7184
7185 device = OMP_CLAUSE_DEVICE_ID (c);
7186 clause_loc = OMP_CLAUSE_LOCATION (c);
7187 }
7188 else
7189 clause_loc = gimple_location (entry_stmt);
7190
7191 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7192 if (c)
7193 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7194
7195 /* Ensure 'device' is of the correct type. */
7196 device = fold_convert_loc (clause_loc, integer_type_node, device);
7197
7198 /* If we found the clause 'if (cond)', build
7199 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7200 if (cond)
7201 {
7202 cond = gimple_boolify (cond);
7203
7204 basic_block cond_bb, then_bb, else_bb;
7205 edge e;
7206 tree tmp_var;
7207
7208 tmp_var = create_tmp_var (TREE_TYPE (device));
7209 if (offloaded)
7210 e = split_block_after_labels (new_bb);
7211 else
7212 {
7213 gsi = gsi_last_nondebug_bb (new_bb);
7214 gsi_prev (&gsi);
7215 e = split_block (new_bb, gsi_stmt (gsi));
7216 }
7217 cond_bb = e->src;
7218 new_bb = e->dest;
7219 remove_edge (e);
7220
7221 then_bb = create_empty_bb (cond_bb);
7222 else_bb = create_empty_bb (then_bb);
7223 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7224 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7225
7226 stmt = gimple_build_cond_empty (cond);
7227 gsi = gsi_last_bb (cond_bb);
7228 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7229
7230 gsi = gsi_start_bb (then_bb);
7231 stmt = gimple_build_assign (tmp_var, device);
7232 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7233
7234 gsi = gsi_start_bb (else_bb);
7235 stmt = gimple_build_assign (tmp_var,
7236 build_int_cst (integer_type_node,
7237 GOMP_DEVICE_HOST_FALLBACK));
7238 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7239
7240 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7241 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7242 add_bb_to_loop (then_bb, cond_bb->loop_father);
7243 add_bb_to_loop (else_bb, cond_bb->loop_father);
7244 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7245 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7246
7247 device = tmp_var;
7248 gsi = gsi_last_nondebug_bb (new_bb);
7249 }
7250 else
7251 {
7252 gsi = gsi_last_nondebug_bb (new_bb);
7253 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7254 true, GSI_SAME_STMT);
7255 }
7256
7257 t = gimple_omp_target_data_arg (entry_stmt);
7258 if (t == NULL)
7259 {
7260 t1 = size_zero_node;
7261 t2 = build_zero_cst (ptr_type_node);
7262 t3 = t2;
7263 t4 = t2;
7264 }
7265 else
7266 {
7267 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7268 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7269 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7270 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7271 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7272 }
7273
7274 gimple *g;
7275 bool tagging = false;
7276 /* The maximum number used by any start_ix, without varargs. */
7277 auto_vec<tree, 11> args;
7278 args.quick_push (device);
7279 if (offloaded)
7280 args.quick_push (build_fold_addr_expr (child_fn));
7281 args.quick_push (t1);
7282 args.quick_push (t2);
7283 args.quick_push (t3);
7284 args.quick_push (t4);
7285 switch (start_ix)
7286 {
7287 case BUILT_IN_GOACC_DATA_START:
7288 case BUILT_IN_GOACC_DECLARE:
7289 case BUILT_IN_GOMP_TARGET_DATA:
7290 break;
7291 case BUILT_IN_GOMP_TARGET:
7292 case BUILT_IN_GOMP_TARGET_UPDATE:
7293 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7294 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7295 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7296 if (c)
7297 depend = OMP_CLAUSE_DECL (c);
7298 else
7299 depend = build_int_cst (ptr_type_node, 0);
7300 args.quick_push (depend);
7301 if (start_ix == BUILT_IN_GOMP_TARGET)
7302 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7303 break;
7304 case BUILT_IN_GOACC_PARALLEL:
7305 oacc_set_fn_attrib (child_fn, clauses, &args);
7306 tagging = true;
7307 /* FALLTHRU */
7308 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7309 case BUILT_IN_GOACC_UPDATE:
7310 {
7311 tree t_async = NULL_TREE;
7312
7313 /* If present, use the value specified by the respective
7314 clause, making sure that is of the correct type. */
7315 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7316 if (c)
7317 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7318 integer_type_node,
7319 OMP_CLAUSE_ASYNC_EXPR (c));
7320 else if (!tagging)
7321 /* Default values for t_async. */
7322 t_async = fold_convert_loc (gimple_location (entry_stmt),
7323 integer_type_node,
7324 build_int_cst (integer_type_node,
7325 GOMP_ASYNC_SYNC));
7326 if (tagging && t_async)
7327 {
7328 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7329
7330 if (TREE_CODE (t_async) == INTEGER_CST)
7331 {
7332 /* See if we can pack the async arg in to the tag's
7333 operand. */
7334 i_async = TREE_INT_CST_LOW (t_async);
7335 if (i_async < GOMP_LAUNCH_OP_MAX)
7336 t_async = NULL_TREE;
7337 else
7338 i_async = GOMP_LAUNCH_OP_MAX;
7339 }
7340 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7341 i_async));
7342 }
7343 if (t_async)
7344 args.safe_push (t_async);
7345
7346 /* Save the argument index, and ... */
7347 unsigned t_wait_idx = args.length ();
7348 unsigned num_waits = 0;
7349 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7350 if (!tagging || c)
7351 /* ... push a placeholder. */
7352 args.safe_push (integer_zero_node);
7353
7354 for (; c; c = OMP_CLAUSE_CHAIN (c))
7355 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7356 {
7357 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7358 integer_type_node,
7359 OMP_CLAUSE_WAIT_EXPR (c)));
7360 num_waits++;
7361 }
7362
7363 if (!tagging || num_waits)
7364 {
7365 tree len;
7366
7367 /* Now that we know the number, update the placeholder. */
7368 if (tagging)
7369 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7370 else
7371 len = build_int_cst (integer_type_node, num_waits);
7372 len = fold_convert_loc (gimple_location (entry_stmt),
7373 unsigned_type_node, len);
7374 args[t_wait_idx] = len;
7375 }
7376 }
7377 break;
7378 default:
7379 gcc_unreachable ();
7380 }
7381 if (tagging)
7382 /* Push terminal marker - zero. */
7383 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7384
7385 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7386 gimple_set_location (g, gimple_location (entry_stmt));
7387 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7388 if (!offloaded)
7389 {
7390 g = gsi_stmt (gsi);
7391 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7392 gsi_remove (&gsi, true);
7393 }
7394 if (data_region && region->exit)
7395 {
7396 gsi = gsi_last_nondebug_bb (region->exit);
7397 g = gsi_stmt (gsi);
7398 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7399 gsi_remove (&gsi, true);
7400 }
7401 }
7402
7403 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7404 iteration variable derived from the thread number. INTRA_GROUP means this
7405 is an expansion of a loop iterating over work-items within a separate
7406 iteration over groups. */
7407
7408 static void
7409 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7410 {
7411 gimple_stmt_iterator gsi;
7412 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7413 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7414 == GF_OMP_FOR_KIND_GRID_LOOP);
7415 size_t collapse = gimple_omp_for_collapse (for_stmt);
7416 struct omp_for_data_loop *loops
7417 = XALLOCAVEC (struct omp_for_data_loop,
7418 gimple_omp_for_collapse (for_stmt));
7419 struct omp_for_data fd;
7420
7421 remove_edge (BRANCH_EDGE (kfor->entry));
7422 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7423
7424 gcc_assert (kfor->cont);
7425 omp_extract_for_data (for_stmt, &fd, loops);
7426
7427 gsi = gsi_start_bb (body_bb);
7428
7429 for (size_t dim = 0; dim < collapse; dim++)
7430 {
7431 tree type, itype;
7432 itype = type = TREE_TYPE (fd.loops[dim].v);
7433 if (POINTER_TYPE_P (type))
7434 itype = signed_type_for (type);
7435
7436 tree n1 = fd.loops[dim].n1;
7437 tree step = fd.loops[dim].step;
7438 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7439 true, NULL_TREE, true, GSI_SAME_STMT);
7440 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7441 true, NULL_TREE, true, GSI_SAME_STMT);
7442 tree threadid;
7443 if (gimple_omp_for_grid_group_iter (for_stmt))
7444 {
7445 gcc_checking_assert (!intra_group);
7446 threadid = build_call_expr (builtin_decl_explicit
7447 (BUILT_IN_HSA_WORKGROUPID), 1,
7448 build_int_cstu (unsigned_type_node, dim));
7449 }
7450 else if (intra_group)
7451 threadid = build_call_expr (builtin_decl_explicit
7452 (BUILT_IN_HSA_WORKITEMID), 1,
7453 build_int_cstu (unsigned_type_node, dim));
7454 else
7455 threadid = build_call_expr (builtin_decl_explicit
7456 (BUILT_IN_HSA_WORKITEMABSID), 1,
7457 build_int_cstu (unsigned_type_node, dim));
7458 threadid = fold_convert (itype, threadid);
7459 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7460 true, GSI_SAME_STMT);
7461
7462 tree startvar = fd.loops[dim].v;
7463 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7464 if (POINTER_TYPE_P (type))
7465 t = fold_build_pointer_plus (n1, t);
7466 else
7467 t = fold_build2 (PLUS_EXPR, type, t, n1);
7468 t = fold_convert (type, t);
7469 t = force_gimple_operand_gsi (&gsi, t,
7470 DECL_P (startvar)
7471 && TREE_ADDRESSABLE (startvar),
7472 NULL_TREE, true, GSI_SAME_STMT);
7473 gassign *assign_stmt = gimple_build_assign (startvar, t);
7474 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7475 }
7476 /* Remove the omp for statement. */
7477 gsi = gsi_last_nondebug_bb (kfor->entry);
7478 gsi_remove (&gsi, true);
7479
7480 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7481 gsi = gsi_last_nondebug_bb (kfor->cont);
7482 gcc_assert (!gsi_end_p (gsi)
7483 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7484 gsi_remove (&gsi, true);
7485
7486 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7487 gsi = gsi_last_nondebug_bb (kfor->exit);
7488 gcc_assert (!gsi_end_p (gsi)
7489 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7490 if (intra_group)
7491 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7492 gsi_remove (&gsi, true);
7493
7494 /* Fixup the much simpler CFG. */
7495 remove_edge (find_edge (kfor->cont, body_bb));
7496
7497 if (kfor->cont != body_bb)
7498 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7499 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7500 }
7501
7502 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7503 argument_decls. */
7504
7505 struct grid_arg_decl_map
7506 {
7507 tree old_arg;
7508 tree new_arg;
7509 };
7510
7511 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7512 pertaining to kernel function. */
7513
7514 static tree
7515 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7516 {
7517 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7518 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7519 tree t = *tp;
7520
7521 if (t == adm->old_arg)
7522 *tp = adm->new_arg;
7523 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7524 return NULL_TREE;
7525 }
7526
7527 /* If TARGET region contains a kernel body for loop, remove its region from the
7528 TARGET and expand it in HSA gridified kernel fashion. */
7529
7530 static void
7531 grid_expand_target_grid_body (struct omp_region *target)
7532 {
7533 if (!hsa_gen_requested_p ())
7534 return;
7535
7536 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7537 struct omp_region **pp;
7538
7539 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7540 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7541 break;
7542
7543 struct omp_region *gpukernel = *pp;
7544
7545 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7546 if (!gpukernel)
7547 {
7548 /* HSA cannot handle OACC stuff. */
7549 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7550 return;
7551 gcc_checking_assert (orig_child_fndecl);
7552 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7553 OMP_CLAUSE__GRIDDIM_));
7554 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7555
7556 hsa_register_kernel (n);
7557 return;
7558 }
7559
7560 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7561 OMP_CLAUSE__GRIDDIM_));
7562 tree inside_block
7563 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7564 *pp = gpukernel->next;
7565 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7566 if ((*pp)->type == GIMPLE_OMP_FOR)
7567 break;
7568
7569 struct omp_region *kfor = *pp;
7570 gcc_assert (kfor);
7571 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7572 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7573 *pp = kfor->next;
7574 if (kfor->inner)
7575 {
7576 if (gimple_omp_for_grid_group_iter (for_stmt))
7577 {
7578 struct omp_region **next_pp;
7579 for (pp = &kfor->inner; *pp; pp = next_pp)
7580 {
7581 next_pp = &(*pp)->next;
7582 if ((*pp)->type != GIMPLE_OMP_FOR)
7583 continue;
7584 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7585 gcc_assert (gimple_omp_for_kind (inner)
7586 == GF_OMP_FOR_KIND_GRID_LOOP);
7587 grid_expand_omp_for_loop (*pp, true);
7588 *pp = (*pp)->next;
7589 next_pp = pp;
7590 }
7591 }
7592 expand_omp (kfor->inner);
7593 }
7594 if (gpukernel->inner)
7595 expand_omp (gpukernel->inner);
7596
7597 tree kern_fndecl = copy_node (orig_child_fndecl);
7598 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7599 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7600 tree tgtblock = gimple_block (tgt_stmt);
7601 tree fniniblock = make_node (BLOCK);
7602 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7603 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7604 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7605 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7606 DECL_INITIAL (kern_fndecl) = fniniblock;
7607 push_struct_function (kern_fndecl);
7608 cfun->function_end_locus = gimple_location (tgt_stmt);
7609 init_tree_ssa (cfun);
7610 pop_cfun ();
7611
7612 /* Make sure to generate early debug for the function before
7613 outlining anything. */
7614 if (! gimple_in_ssa_p (cfun))
7615 (*debug_hooks->early_global_decl) (cfun->decl);
7616
7617 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7618 gcc_assert (!DECL_CHAIN (old_parm_decl));
7619 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7620 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7621 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7622 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7623 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7624 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7625 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7626 kern_cfun->curr_properties = cfun->curr_properties;
7627
7628 grid_expand_omp_for_loop (kfor, false);
7629
7630 /* Remove the omp for statement. */
7631 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7632 gsi_remove (&gsi, true);
7633 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7634 return. */
7635 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7636 gcc_assert (!gsi_end_p (gsi)
7637 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7638 gimple *ret_stmt = gimple_build_return (NULL);
7639 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7640 gsi_remove (&gsi, true);
7641
7642 /* Statements in the first BB in the target construct have been produced by
7643 target lowering and must be copied inside the GPUKERNEL, with the two
7644 exceptions of the first OMP statement and the OMP_DATA assignment
7645 statement. */
7646 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7647 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7648 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7649 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7650 !gsi_end_p (tsi); gsi_next (&tsi))
7651 {
7652 gimple *stmt = gsi_stmt (tsi);
7653 if (is_gimple_omp (stmt))
7654 break;
7655 if (sender
7656 && is_gimple_assign (stmt)
7657 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7658 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7659 continue;
7660 gimple *copy = gimple_copy (stmt);
7661 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7662 gimple_set_block (copy, fniniblock);
7663 }
7664
7665 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7666 gpukernel->exit, inside_block);
7667
7668 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7669 kcn->mark_force_output ();
7670 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7671
7672 hsa_register_kernel (kcn, orig_child);
7673
7674 cgraph_node::add_new_function (kern_fndecl, true);
7675 push_cfun (kern_cfun);
7676 cgraph_edge::rebuild_edges ();
7677
7678 /* Re-map any mention of the PARM_DECL of the original function to the
7679 PARM_DECL of the new one.
7680
7681 TODO: It would be great if lowering produced references into the GPU
7682 kernel decl straight away and we did not have to do this. */
7683 struct grid_arg_decl_map adm;
7684 adm.old_arg = old_parm_decl;
7685 adm.new_arg = new_parm_decl;
7686 basic_block bb;
7687 FOR_EACH_BB_FN (bb, kern_cfun)
7688 {
7689 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7690 {
7691 gimple *stmt = gsi_stmt (gsi);
7692 struct walk_stmt_info wi;
7693 memset (&wi, 0, sizeof (wi));
7694 wi.info = &adm;
7695 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7696 }
7697 }
7698 pop_cfun ();
7699
7700 return;
7701 }
7702
7703 /* Expand the parallel region tree rooted at REGION. Expansion
7704 proceeds in depth-first order. Innermost regions are expanded
7705 first. This way, parallel regions that require a new function to
7706 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7707 internal dependencies in their body. */
7708
7709 static void
7710 expand_omp (struct omp_region *region)
7711 {
7712 omp_any_child_fn_dumped = false;
7713 while (region)
7714 {
7715 location_t saved_location;
7716 gimple *inner_stmt = NULL;
7717
7718 /* First, determine whether this is a combined parallel+workshare
7719 region. */
7720 if (region->type == GIMPLE_OMP_PARALLEL)
7721 determine_parallel_type (region);
7722 else if (region->type == GIMPLE_OMP_TARGET)
7723 grid_expand_target_grid_body (region);
7724
7725 if (region->type == GIMPLE_OMP_FOR
7726 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7727 inner_stmt = last_stmt (region->inner->entry);
7728
7729 if (region->inner)
7730 expand_omp (region->inner);
7731
7732 saved_location = input_location;
7733 if (gimple_has_location (last_stmt (region->entry)))
7734 input_location = gimple_location (last_stmt (region->entry));
7735
7736 switch (region->type)
7737 {
7738 case GIMPLE_OMP_PARALLEL:
7739 case GIMPLE_OMP_TASK:
7740 expand_omp_taskreg (region);
7741 break;
7742
7743 case GIMPLE_OMP_FOR:
7744 expand_omp_for (region, inner_stmt);
7745 break;
7746
7747 case GIMPLE_OMP_SECTIONS:
7748 expand_omp_sections (region);
7749 break;
7750
7751 case GIMPLE_OMP_SECTION:
7752 /* Individual omp sections are handled together with their
7753 parent GIMPLE_OMP_SECTIONS region. */
7754 break;
7755
7756 case GIMPLE_OMP_SINGLE:
7757 expand_omp_single (region);
7758 break;
7759
7760 case GIMPLE_OMP_ORDERED:
7761 {
7762 gomp_ordered *ord_stmt
7763 = as_a <gomp_ordered *> (last_stmt (region->entry));
7764 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7765 OMP_CLAUSE_DEPEND))
7766 {
7767 /* We'll expand these when expanding corresponding
7768 worksharing region with ordered(n) clause. */
7769 gcc_assert (region->outer
7770 && region->outer->type == GIMPLE_OMP_FOR);
7771 region->ord_stmt = ord_stmt;
7772 break;
7773 }
7774 }
7775 /* FALLTHRU */
7776 case GIMPLE_OMP_MASTER:
7777 case GIMPLE_OMP_TASKGROUP:
7778 case GIMPLE_OMP_CRITICAL:
7779 case GIMPLE_OMP_TEAMS:
7780 expand_omp_synch (region);
7781 break;
7782
7783 case GIMPLE_OMP_ATOMIC_LOAD:
7784 expand_omp_atomic (region);
7785 break;
7786
7787 case GIMPLE_OMP_TARGET:
7788 expand_omp_target (region);
7789 break;
7790
7791 default:
7792 gcc_unreachable ();
7793 }
7794
7795 input_location = saved_location;
7796 region = region->next;
7797 }
7798 if (omp_any_child_fn_dumped)
7799 {
7800 if (dump_file)
7801 dump_function_header (dump_file, current_function_decl, dump_flags);
7802 omp_any_child_fn_dumped = false;
7803 }
7804 }
7805
7806 /* Helper for build_omp_regions. Scan the dominator tree starting at
7807 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7808 true, the function ends once a single tree is built (otherwise, whole
7809 forest of OMP constructs may be built). */
7810
7811 static void
7812 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7813 bool single_tree)
7814 {
7815 gimple_stmt_iterator gsi;
7816 gimple *stmt;
7817 basic_block son;
7818
7819 gsi = gsi_last_nondebug_bb (bb);
7820 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7821 {
7822 struct omp_region *region;
7823 enum gimple_code code;
7824
7825 stmt = gsi_stmt (gsi);
7826 code = gimple_code (stmt);
7827 if (code == GIMPLE_OMP_RETURN)
7828 {
7829 /* STMT is the return point out of region PARENT. Mark it
7830 as the exit point and make PARENT the immediately
7831 enclosing region. */
7832 gcc_assert (parent);
7833 region = parent;
7834 region->exit = bb;
7835 parent = parent->outer;
7836 }
7837 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7838 {
7839 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7840 GIMPLE_OMP_RETURN, but matches with
7841 GIMPLE_OMP_ATOMIC_LOAD. */
7842 gcc_assert (parent);
7843 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7844 region = parent;
7845 region->exit = bb;
7846 parent = parent->outer;
7847 }
7848 else if (code == GIMPLE_OMP_CONTINUE)
7849 {
7850 gcc_assert (parent);
7851 parent->cont = bb;
7852 }
7853 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7854 {
7855 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7856 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7857 }
7858 else
7859 {
7860 region = new_omp_region (bb, code, parent);
7861 /* Otherwise... */
7862 if (code == GIMPLE_OMP_TARGET)
7863 {
7864 switch (gimple_omp_target_kind (stmt))
7865 {
7866 case GF_OMP_TARGET_KIND_REGION:
7867 case GF_OMP_TARGET_KIND_DATA:
7868 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7869 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7870 case GF_OMP_TARGET_KIND_OACC_DATA:
7871 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7872 break;
7873 case GF_OMP_TARGET_KIND_UPDATE:
7874 case GF_OMP_TARGET_KIND_ENTER_DATA:
7875 case GF_OMP_TARGET_KIND_EXIT_DATA:
7876 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7877 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7878 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7879 /* ..., other than for those stand-alone directives... */
7880 region = NULL;
7881 break;
7882 default:
7883 gcc_unreachable ();
7884 }
7885 }
7886 else if (code == GIMPLE_OMP_ORDERED
7887 && omp_find_clause (gimple_omp_ordered_clauses
7888 (as_a <gomp_ordered *> (stmt)),
7889 OMP_CLAUSE_DEPEND))
7890 /* #pragma omp ordered depend is also just a stand-alone
7891 directive. */
7892 region = NULL;
7893 /* ..., this directive becomes the parent for a new region. */
7894 if (region)
7895 parent = region;
7896 }
7897 }
7898
7899 if (single_tree && !parent)
7900 return;
7901
7902 for (son = first_dom_son (CDI_DOMINATORS, bb);
7903 son;
7904 son = next_dom_son (CDI_DOMINATORS, son))
7905 build_omp_regions_1 (son, parent, single_tree);
7906 }
7907
7908 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7909 root_omp_region. */
7910
7911 static void
7912 build_omp_regions_root (basic_block root)
7913 {
7914 gcc_assert (root_omp_region == NULL);
7915 build_omp_regions_1 (root, NULL, true);
7916 gcc_assert (root_omp_region != NULL);
7917 }
7918
7919 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7920
7921 void
7922 omp_expand_local (basic_block head)
7923 {
7924 build_omp_regions_root (head);
7925 if (dump_file && (dump_flags & TDF_DETAILS))
7926 {
7927 fprintf (dump_file, "\nOMP region tree\n\n");
7928 dump_omp_region (dump_file, root_omp_region, 0);
7929 fprintf (dump_file, "\n");
7930 }
7931
7932 remove_exit_barriers (root_omp_region);
7933 expand_omp (root_omp_region);
7934
7935 omp_free_regions ();
7936 }
7937
7938 /* Scan the CFG and build a tree of OMP regions. Return the root of
7939 the OMP region tree. */
7940
7941 static void
7942 build_omp_regions (void)
7943 {
7944 gcc_assert (root_omp_region == NULL);
7945 calculate_dominance_info (CDI_DOMINATORS);
7946 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7947 }
7948
7949 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7950
7951 static unsigned int
7952 execute_expand_omp (void)
7953 {
7954 build_omp_regions ();
7955
7956 if (!root_omp_region)
7957 return 0;
7958
7959 if (dump_file)
7960 {
7961 fprintf (dump_file, "\nOMP region tree\n\n");
7962 dump_omp_region (dump_file, root_omp_region, 0);
7963 fprintf (dump_file, "\n");
7964 }
7965
7966 remove_exit_barriers (root_omp_region);
7967
7968 expand_omp (root_omp_region);
7969
7970 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7971 verify_loop_structure ();
7972 cleanup_tree_cfg ();
7973
7974 omp_free_regions ();
7975
7976 return 0;
7977 }
7978
7979 /* OMP expansion -- the default pass, run before creation of SSA form. */
7980
7981 namespace {
7982
7983 const pass_data pass_data_expand_omp =
7984 {
7985 GIMPLE_PASS, /* type */
7986 "ompexp", /* name */
7987 OPTGROUP_OMP, /* optinfo_flags */
7988 TV_NONE, /* tv_id */
7989 PROP_gimple_any, /* properties_required */
7990 PROP_gimple_eomp, /* properties_provided */
7991 0, /* properties_destroyed */
7992 0, /* todo_flags_start */
7993 0, /* todo_flags_finish */
7994 };
7995
7996 class pass_expand_omp : public gimple_opt_pass
7997 {
7998 public:
7999 pass_expand_omp (gcc::context *ctxt)
8000 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8001 {}
8002
8003 /* opt_pass methods: */
8004 virtual unsigned int execute (function *)
8005 {
8006 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8007 || flag_openmp_simd != 0)
8008 && !seen_error ());
8009
8010 /* This pass always runs, to provide PROP_gimple_eomp.
8011 But often, there is nothing to do. */
8012 if (!gate)
8013 return 0;
8014
8015 return execute_expand_omp ();
8016 }
8017
8018 }; // class pass_expand_omp
8019
8020 } // anon namespace
8021
8022 gimple_opt_pass *
8023 make_pass_expand_omp (gcc::context *ctxt)
8024 {
8025 return new pass_expand_omp (ctxt);
8026 }
8027
8028 namespace {
8029
8030 const pass_data pass_data_expand_omp_ssa =
8031 {
8032 GIMPLE_PASS, /* type */
8033 "ompexpssa", /* name */
8034 OPTGROUP_OMP, /* optinfo_flags */
8035 TV_NONE, /* tv_id */
8036 PROP_cfg | PROP_ssa, /* properties_required */
8037 PROP_gimple_eomp, /* properties_provided */
8038 0, /* properties_destroyed */
8039 0, /* todo_flags_start */
8040 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8041 };
8042
8043 class pass_expand_omp_ssa : public gimple_opt_pass
8044 {
8045 public:
8046 pass_expand_omp_ssa (gcc::context *ctxt)
8047 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8048 {}
8049
8050 /* opt_pass methods: */
8051 virtual bool gate (function *fun)
8052 {
8053 return !(fun->curr_properties & PROP_gimple_eomp);
8054 }
8055 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8056 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8057
8058 }; // class pass_expand_omp_ssa
8059
8060 } // anon namespace
8061
8062 gimple_opt_pass *
8063 make_pass_expand_omp_ssa (gcc::context *ctxt)
8064 {
8065 return new pass_expand_omp_ssa (ctxt);
8066 }
8067
8068 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8069 GIMPLE_* codes. */
8070
8071 bool
8072 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8073 int *region_idx)
8074 {
8075 gimple *last = last_stmt (bb);
8076 enum gimple_code code = gimple_code (last);
8077 struct omp_region *cur_region = *region;
8078 bool fallthru = false;
8079
8080 switch (code)
8081 {
8082 case GIMPLE_OMP_PARALLEL:
8083 case GIMPLE_OMP_TASK:
8084 case GIMPLE_OMP_FOR:
8085 case GIMPLE_OMP_SINGLE:
8086 case GIMPLE_OMP_TEAMS:
8087 case GIMPLE_OMP_MASTER:
8088 case GIMPLE_OMP_TASKGROUP:
8089 case GIMPLE_OMP_CRITICAL:
8090 case GIMPLE_OMP_SECTION:
8091 case GIMPLE_OMP_GRID_BODY:
8092 cur_region = new_omp_region (bb, code, cur_region);
8093 fallthru = true;
8094 break;
8095
8096 case GIMPLE_OMP_ORDERED:
8097 cur_region = new_omp_region (bb, code, cur_region);
8098 fallthru = true;
8099 if (omp_find_clause (gimple_omp_ordered_clauses
8100 (as_a <gomp_ordered *> (last)),
8101 OMP_CLAUSE_DEPEND))
8102 cur_region = cur_region->outer;
8103 break;
8104
8105 case GIMPLE_OMP_TARGET:
8106 cur_region = new_omp_region (bb, code, cur_region);
8107 fallthru = true;
8108 switch (gimple_omp_target_kind (last))
8109 {
8110 case GF_OMP_TARGET_KIND_REGION:
8111 case GF_OMP_TARGET_KIND_DATA:
8112 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8113 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8114 case GF_OMP_TARGET_KIND_OACC_DATA:
8115 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8116 break;
8117 case GF_OMP_TARGET_KIND_UPDATE:
8118 case GF_OMP_TARGET_KIND_ENTER_DATA:
8119 case GF_OMP_TARGET_KIND_EXIT_DATA:
8120 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8121 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8122 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8123 cur_region = cur_region->outer;
8124 break;
8125 default:
8126 gcc_unreachable ();
8127 }
8128 break;
8129
8130 case GIMPLE_OMP_SECTIONS:
8131 cur_region = new_omp_region (bb, code, cur_region);
8132 fallthru = true;
8133 break;
8134
8135 case GIMPLE_OMP_SECTIONS_SWITCH:
8136 fallthru = false;
8137 break;
8138
8139 case GIMPLE_OMP_ATOMIC_LOAD:
8140 case GIMPLE_OMP_ATOMIC_STORE:
8141 fallthru = true;
8142 break;
8143
8144 case GIMPLE_OMP_RETURN:
8145 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8146 somewhere other than the next block. This will be
8147 created later. */
8148 cur_region->exit = bb;
8149 if (cur_region->type == GIMPLE_OMP_TASK)
8150 /* Add an edge corresponding to not scheduling the task
8151 immediately. */
8152 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8153 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8154 cur_region = cur_region->outer;
8155 break;
8156
8157 case GIMPLE_OMP_CONTINUE:
8158 cur_region->cont = bb;
8159 switch (cur_region->type)
8160 {
8161 case GIMPLE_OMP_FOR:
8162 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8163 succs edges as abnormal to prevent splitting
8164 them. */
8165 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8166 /* Make the loopback edge. */
8167 make_edge (bb, single_succ (cur_region->entry),
8168 EDGE_ABNORMAL);
8169
8170 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8171 corresponds to the case that the body of the loop
8172 is not executed at all. */
8173 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8174 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8175 fallthru = false;
8176 break;
8177
8178 case GIMPLE_OMP_SECTIONS:
8179 /* Wire up the edges into and out of the nested sections. */
8180 {
8181 basic_block switch_bb = single_succ (cur_region->entry);
8182
8183 struct omp_region *i;
8184 for (i = cur_region->inner; i ; i = i->next)
8185 {
8186 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8187 make_edge (switch_bb, i->entry, 0);
8188 make_edge (i->exit, bb, EDGE_FALLTHRU);
8189 }
8190
8191 /* Make the loopback edge to the block with
8192 GIMPLE_OMP_SECTIONS_SWITCH. */
8193 make_edge (bb, switch_bb, 0);
8194
8195 /* Make the edge from the switch to exit. */
8196 make_edge (switch_bb, bb->next_bb, 0);
8197 fallthru = false;
8198 }
8199 break;
8200
8201 case GIMPLE_OMP_TASK:
8202 fallthru = true;
8203 break;
8204
8205 default:
8206 gcc_unreachable ();
8207 }
8208 break;
8209
8210 default:
8211 gcc_unreachable ();
8212 }
8213
8214 if (*region != cur_region)
8215 {
8216 *region = cur_region;
8217 if (cur_region)
8218 *region_idx = cur_region->entry->index;
8219 else
8220 *region_idx = 0;
8221 }
8222
8223 return fallthru;
8224 }
8225
8226 #include "gt-omp-expand.h"