[Ada] Argument_String_To_List creates empty items from whitespace
[gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
107 };
108
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
111
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
116
117 /* Return true if REGION is a combined parallel+workshare region. */
118
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
121 {
122 return region->is_combined_parallel;
123 }
124
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
135
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
138
139 Is lowered into:
140
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
150
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
155
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
161
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
166
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 {
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
172
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
175
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
184
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
195
196 return true;
197 }
198
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
201
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 {
205 if (!simd_schedule)
206 return chunk_size;
207
208 poly_uint64 vf = omp_max_vf ();
209 if (known_eq (vf, 1U))
210 return chunk_size;
211
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
217 }
218
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
222
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 {
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
229
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 {
232 struct omp_for_data fd;
233 tree n1, n2;
234
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
238
239 if (gimple_omp_for_combined_into_p (for_stmt))
240 {
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
250 }
251
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
256
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
259
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
262
263 if (fd.chunk_size)
264 {
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
268 }
269
270 return ws_args;
271 }
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 {
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
282 }
283
284 gcc_unreachable ();
285 }
286
287 /* Discover whether REGION is a combined parallel+workshare region. */
288
289 static void
290 determine_parallel_type (struct omp_region *region)
291 {
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
294
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
299
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
305
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
312
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
319 {
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
322
323 if (region->inner->type == GIMPLE_OMP_FOR)
324 {
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 {
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
344 }
345 }
346
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350 }
351 }
352
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
357
358 /* Dump the parallel region tree rooted at REGION. */
359
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 {
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
365
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
368
369 if (region->cont)
370 {
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
373 }
374
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
380
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
383 }
384
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
387 {
388 dump_omp_region (stderr, region, 0);
389 }
390
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
393 {
394 dump_omp_region (stderr, root_omp_region, 0);
395 }
396
397 /* Create a new parallel region starting at STMT inside region PARENT. */
398
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
402 {
403 struct omp_region *region = XCNEW (struct omp_region);
404
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
408
409 if (parent)
410 {
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
415 }
416 else
417 {
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
422 }
423
424 return region;
425 }
426
427 /* Release the memory associated with the region tree rooted at REGION. */
428
429 static void
430 free_omp_region_1 (struct omp_region *region)
431 {
432 struct omp_region *i, *n;
433
434 for (i = region->inner; i ; i = n)
435 {
436 n = i->next;
437 free_omp_region_1 (i);
438 }
439
440 free (region);
441 }
442
443 /* Release the memory for the entire omp region tree. */
444
445 void
446 omp_free_regions (void)
447 {
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
450 {
451 n = r->next;
452 free_omp_region_1 (r);
453 }
454 root_omp_region = NULL;
455 }
456
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
459
460 static gcond *
461 gimple_build_cond_empty (tree cond)
462 {
463 enum tree_code pred_code;
464 tree lhs, rhs;
465
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468 }
469
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
472
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 {
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
478 {
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
482 {
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
485
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
491 }
492 }
493
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
497
498 return false;
499 }
500
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
505
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
508
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
511 {
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
513 {
514 tree b = BLOCK_SUPERCONTEXT (entry_block);
515
516 if (TREE_CODE (b) == BLOCK)
517 {
518 tree parent_fndecl;
519
520 /* Follow supercontext chain until the parent fndecl
521 is found. */
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 TREE_CODE (parent_fndecl) == BLOCK;
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
525 ;
526
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
528
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 BLOCK_VARS (b) = child_fndecl;
533 }
534 }
535 }
536
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
543
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 gomp_parallel *entry_stmt,
547 vec<tree, va_gc> *ws_args)
548 {
549 tree t, t1, t2, val, cond, c, clauses, flags;
550 gimple_stmt_iterator gsi;
551 gimple *stmt;
552 enum built_in_function start_ix;
553 int start_ix2;
554 location_t clause_loc;
555 vec<tree, va_gc> *args;
556
557 clauses = gimple_omp_parallel_clauses (entry_stmt);
558
559 /* Determine what flavor of GOMP_parallel we will be
560 emitting. */
561 start_ix = BUILT_IN_GOMP_PARALLEL;
562 if (is_combined_parallel (region))
563 {
564 switch (region->inner->type)
565 {
566 case GIMPLE_OMP_FOR:
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 switch (region->inner->sched_kind)
569 {
570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 start_ix2 = 3;
572 break;
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 case OMP_CLAUSE_SCHEDULE_GUIDED:
575 if (region->inner->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
577 {
578 start_ix2 = 3 + region->inner->sched_kind;
579 break;
580 }
581 /* FALLTHRU */
582 default:
583 start_ix2 = region->inner->sched_kind;
584 break;
585 }
586 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 start_ix = (enum built_in_function) start_ix2;
588 break;
589 case GIMPLE_OMP_SECTIONS:
590 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 break;
592 default:
593 gcc_unreachable ();
594 }
595 }
596
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
599 cond = NULL_TREE;
600 val = build_int_cst (unsigned_type_node, 0);
601 flags = build_int_cst (unsigned_type_node, 0);
602
603 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604 if (c)
605 cond = OMP_CLAUSE_IF_EXPR (c);
606
607 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608 if (c)
609 {
610 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611 clause_loc = OMP_CLAUSE_LOCATION (c);
612 }
613 else
614 clause_loc = gimple_location (entry_stmt);
615
616 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617 if (c)
618 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
619
620 /* Ensure 'val' is of the correct type. */
621 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
622
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
625 if (cond)
626 {
627 cond = gimple_boolify (cond);
628
629 if (integer_zerop (val))
630 val = fold_build2_loc (clause_loc,
631 EQ_EXPR, unsigned_type_node, cond,
632 build_int_cst (TREE_TYPE (cond), 0));
633 else
634 {
635 basic_block cond_bb, then_bb, else_bb;
636 edge e, e_then, e_else;
637 tree tmp_then, tmp_else, tmp_join, tmp_var;
638
639 tmp_var = create_tmp_var (TREE_TYPE (val));
640 if (gimple_in_ssa_p (cfun))
641 {
642 tmp_then = make_ssa_name (tmp_var);
643 tmp_else = make_ssa_name (tmp_var);
644 tmp_join = make_ssa_name (tmp_var);
645 }
646 else
647 {
648 tmp_then = tmp_var;
649 tmp_else = tmp_var;
650 tmp_join = tmp_var;
651 }
652
653 e = split_block_after_labels (bb);
654 cond_bb = e->src;
655 bb = e->dest;
656 remove_edge (e);
657
658 then_bb = create_empty_bb (cond_bb);
659 else_bb = create_empty_bb (then_bb);
660 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
662
663 stmt = gimple_build_cond_empty (cond);
664 gsi = gsi_start_bb (cond_bb);
665 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
666
667 gsi = gsi_start_bb (then_bb);
668 expand_omp_build_assign (&gsi, tmp_then, val, true);
669
670 gsi = gsi_start_bb (else_bb);
671 expand_omp_build_assign (&gsi, tmp_else,
672 build_int_cst (unsigned_type_node, 1),
673 true);
674
675 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 add_bb_to_loop (then_bb, cond_bb->loop_father);
678 add_bb_to_loop (else_bb, cond_bb->loop_father);
679 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
681
682 if (gimple_in_ssa_p (cfun))
683 {
684 gphi *phi = create_phi_node (tmp_join, bb);
685 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
687 }
688
689 val = tmp_join;
690 }
691
692 gsi = gsi_start_bb (bb);
693 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 false, GSI_CONTINUE_LINKING);
695 }
696
697 gsi = gsi_last_nondebug_bb (bb);
698 t = gimple_omp_parallel_data_arg (entry_stmt);
699 if (t == NULL)
700 t1 = null_pointer_node;
701 else
702 t1 = build_fold_addr_expr (t);
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704 t2 = build_fold_addr_expr (child_fndecl);
705
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
707
708 vec_alloc (args, 4 + vec_safe_length (ws_args));
709 args->quick_push (t2);
710 args->quick_push (t1);
711 args->quick_push (val);
712 if (ws_args)
713 args->splice (*ws_args);
714 args->quick_push (flags);
715
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 builtin_decl_explicit (start_ix), args);
718
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 false, GSI_CONTINUE_LINKING);
721
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region))
724 {
725 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726 hsa_register_kernel (child_cnode);
727 }
728 }
729
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
732
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
736 {
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
740
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
742
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
749
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
754
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
760 {
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
779 {
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
782 {
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
785 }
786 else
787 num_tasks = integer_zero_node;
788 }
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
795 }
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
798
799 tree flags = build_int_cst (unsigned_type_node, iflags);
800
801 tree cond = boolean_true_node;
802 if (ifc)
803 {
804 if (taskloop_p)
805 {
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
813 }
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
816 }
817
818 if (finalc)
819 {
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
826 }
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
836
837 gsi = gsi_last_nondebug_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
849
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
864
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867 }
868
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
870
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
873 {
874 tree chain = NULL_TREE, t;
875 unsigned ix;
876
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
878 {
879 DECL_CHAIN (t) = chain;
880 chain = t;
881 }
882
883 return chain;
884 }
885
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
891
892 static void
893 remove_exit_barrier (struct omp_region *region)
894 {
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
901
902 exit_bb = region->exit;
903
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
908
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_nondebug_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev_nondebug (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
919
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
921 {
922 gsi = gsi_last_nondebug_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
928 {
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
938 {
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
944
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
948 {
949 any_addressable_vars = 1;
950 break;
951 }
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
957 {
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
962 {
963 any_addressable_vars = 1;
964 break;
965 }
966 if (block == gimple_block (parallel_stmt))
967 break;
968 }
969 }
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
972 }
973 }
974 }
975
976 static void
977 remove_exit_barriers (struct omp_region *region)
978 {
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
981
982 if (region->inner)
983 {
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
987 {
988 region = region->next;
989 remove_exit_barriers (region);
990 }
991 }
992 }
993
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1001
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1004 {
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1014
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1017 {
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1020
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1026 {
1027 tree built_in;
1028
1029 if (DECL_NAME (decl) == thr_num_id)
1030 {
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1036 }
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1041
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1045
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1048
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1053
1054 gimple_call_set_fndecl (call, built_in);
1055 }
1056 }
1057 }
1058
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1061
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1064 {
1065 tree t = *tp;
1066
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1070
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1073
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1076 }
1077
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1079
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1083 {
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1095 {
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1098 }
1099 }
1100
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1102
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1105 {
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1113
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1117
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1123
1124 if (is_combined_parallel (region))
1125 ws_args = region->ws_args;
1126 else
1127 ws_args = NULL;
1128
1129 if (child_cfun->cfg)
1130 {
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e, exit_succ_e;
1135
1136 entry_succ_e = single_succ_edge (entry_bb);
1137
1138 gsi = gsi_last_nondebug_bb (entry_bb);
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141 gsi_remove (&gsi, true);
1142
1143 new_bb = entry_bb;
1144 if (exit_bb)
1145 {
1146 exit_succ_e = single_succ_edge (exit_bb);
1147 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1148 }
1149 remove_edge_and_dominated_blocks (entry_succ_e);
1150 }
1151 else
1152 {
1153 unsigned srcidx, dstidx, num;
1154
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1161 function.
1162
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt))
1169 {
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 : FALLTHRU_EDGE (entry_bb)->dest;
1173 tree arg;
1174 gimple *parcopy_stmt = NULL;
1175
1176 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1177 {
1178 gimple *stmt;
1179
1180 gcc_assert (!gsi_end_p (gsi));
1181 stmt = gsi_stmt (gsi);
1182 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 continue;
1184
1185 if (gimple_num_ops (stmt) == 2)
1186 {
1187 tree arg = gimple_assign_rhs1 (stmt);
1188
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1191
1192 if (TREE_CODE (arg) == ADDR_EXPR
1193 && TREE_OPERAND (arg, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt))
1195 {
1196 parcopy_stmt = stmt;
1197 break;
1198 }
1199 }
1200 }
1201
1202 gcc_assert (parcopy_stmt != NULL);
1203 arg = DECL_ARGUMENTS (child_fn);
1204
1205 if (!gimple_in_ssa_p (cfun))
1206 {
1207 if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 gsi_remove (&gsi, true);
1209 else
1210 {
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1214 }
1215 }
1216 else
1217 {
1218 tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 update_stmt (parcopy_stmt);
1228 }
1229 }
1230
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block = DECL_INITIAL (child_fn);
1233 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 varpool_node::finalize_decl (t);
1240 DECL_SAVED_TREE (child_fn) = NULL;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn, NULL);
1243 TREE_USED (block) = 1;
1244
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 DECL_CONTEXT (t) = child_fn;
1248
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi = gsi_last_nondebug_bb (entry_bb);
1252 stmt = gsi_stmt (gsi);
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255 e = split_block (entry_bb, stmt);
1256 gsi_remove (&gsi, true);
1257 entry_bb = e->dest;
1258 edge e2 = NULL;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261 else
1262 {
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 gcc_assert (e2->dest == region->exit);
1265 remove_edge (BRANCH_EDGE (entry_bb));
1266 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 gsi = gsi_last_nondebug_bb (region->exit);
1268 gcc_assert (!gsi_end_p (gsi)
1269 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 gsi_remove (&gsi, true);
1271 }
1272
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1274 if (exit_bb)
1275 {
1276 gsi = gsi_last_nondebug_bb (exit_bb);
1277 gcc_assert (!gsi_end_p (gsi)
1278 && (gimple_code (gsi_stmt (gsi))
1279 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 stmt = gimple_build_return (NULL);
1281 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 gsi_remove (&gsi, true);
1283 }
1284
1285 /* Move the parallel region into CHILD_CFUN. */
1286
1287 if (gimple_in_ssa_p (cfun))
1288 {
1289 init_tree_ssa (child_cfun);
1290 init_ssa_operands (child_cfun);
1291 child_cfun->gimple_df->in_ssa_p = true;
1292 block = NULL_TREE;
1293 }
1294 else
1295 block = gimple_block (entry_stmt);
1296
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun))
1300 (*debug_hooks->early_global_decl) (cfun->decl);
1301
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303 if (exit_bb)
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305 if (e2)
1306 {
1307 basic_block dest_bb = e2->dest;
1308 if (!exit_bb)
1309 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 remove_edge (e2);
1311 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1312 }
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1317
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num = vec_safe_length (child_cfun->local_decls);
1320 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1321 {
1322 t = (*child_cfun->local_decls)[srcidx];
1323 if (DECL_CONTEXT (t) == cfun->decl)
1324 continue;
1325 if (srcidx != dstidx)
1326 (*child_cfun->local_decls)[dstidx] = t;
1327 dstidx++;
1328 }
1329 if (dstidx != num)
1330 vec_safe_truncate (child_cfun->local_decls, dstidx);
1331
1332 /* Inform the callgraph about the new function. */
1333 child_cfun->curr_properties = cfun->curr_properties;
1334 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336 cgraph_node *node = cgraph_node::get_create (child_fn);
1337 node->parallelized_function = 1;
1338 cgraph_node::add_new_function (child_fn, true);
1339
1340 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1342
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun);
1346 if (need_asm)
1347 assign_assembler_name_if_needed (child_fn);
1348
1349 if (optimize)
1350 optimize_omp_library_calls (entry_stmt);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1353
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions)
1359 {
1360 basic_block bb;
1361 bool changed = false;
1362
1363 FOR_EACH_BB_FN (bb, cfun)
1364 changed |= gimple_purge_dead_eh_edges (bb);
1365 if (changed)
1366 cleanup_tree_cfg ();
1367 }
1368 if (gimple_in_ssa_p (cfun))
1369 update_ssa (TODO_update_ssa);
1370 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 verify_loop_structure ();
1372 pop_cfun ();
1373
1374 if (dump_file && !gimple_in_ssa_p (cfun))
1375 {
1376 omp_any_child_fn_dumped = true;
1377 dump_function_header (dump_file, child_fn, dump_flags);
1378 dump_function_to_file (child_fn, dump_file, dump_flags);
1379 }
1380 }
1381
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383 expand_parallel_call (region, new_bb,
1384 as_a <gomp_parallel *> (entry_stmt), ws_args);
1385 else
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387 if (gimple_in_ssa_p (cfun))
1388 update_ssa (TODO_update_ssa_only_virtuals);
1389 }
1390
1391 /* Information about members of an OpenACC collapsed loop nest. */
1392
1393 struct oacc_collapse
1394 {
1395 tree base; /* Base value. */
1396 tree iters; /* Number of steps. */
1397 tree step; /* Step size. */
1398 tree tile; /* Tile increment (if tiled). */
1399 tree outer; /* Tile iterator var. */
1400 };
1401
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1405
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 gimple_stmt_iterator *gsi,
1409 oacc_collapse *counts, tree bound_type,
1410 location_t loc)
1411 {
1412 tree tiling = fd->tiling;
1413 tree total = build_int_cst (bound_type, 1);
1414 int ix;
1415
1416 gcc_assert (integer_onep (fd->loop.step));
1417 gcc_assert (integer_zerop (fd->loop.n1));
1418
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix = fd->collapse; ix--;)
1423 {
1424 const omp_for_data_loop *loop = &fd->loops[ix];
1425
1426 tree iter_type = TREE_TYPE (loop->v);
1427 tree diff_type = iter_type;
1428 tree plus_type = iter_type;
1429
1430 gcc_assert (loop->cond_code == fd->loop.cond_code);
1431
1432 if (POINTER_TYPE_P (iter_type))
1433 plus_type = sizetype;
1434 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 diff_type = signed_type_for (diff_type);
1436 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1437 diff_type = integer_type_node;
1438
1439 if (tiling)
1440 {
1441 tree num = build_int_cst (integer_type_node, fd->collapse);
1442 tree loop_no = build_int_cst (integer_type_node, ix);
1443 tree tile = TREE_VALUE (tiling);
1444 gcall *call
1445 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1446 /* gwv-outer=*/integer_zero_node,
1447 /* gwv-inner=*/integer_zero_node);
1448
1449 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1450 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1451 gimple_call_set_lhs (call, counts[ix].tile);
1452 gimple_set_location (call, loc);
1453 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1454
1455 tiling = TREE_CHAIN (tiling);
1456 }
1457 else
1458 {
1459 counts[ix].tile = NULL;
1460 counts[ix].outer = loop->v;
1461 }
1462
1463 tree b = loop->n1;
1464 tree e = loop->n2;
1465 tree s = loop->step;
1466 bool up = loop->cond_code == LT_EXPR;
1467 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1468 bool negating;
1469 tree expr;
1470
1471 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1472 true, GSI_SAME_STMT);
1473 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1474 true, GSI_SAME_STMT);
1475
1476 /* Convert the step, avoiding possible unsigned->signed overflow. */
1477 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1478 if (negating)
1479 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1480 s = fold_convert (diff_type, s);
1481 if (negating)
1482 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1483 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1484 true, GSI_SAME_STMT);
1485
1486 /* Determine the range, avoiding possible unsigned->signed overflow. */
1487 negating = !up && TYPE_UNSIGNED (iter_type);
1488 expr = fold_build2 (MINUS_EXPR, plus_type,
1489 fold_convert (plus_type, negating ? b : e),
1490 fold_convert (plus_type, negating ? e : b));
1491 expr = fold_convert (diff_type, expr);
1492 if (negating)
1493 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1494 tree range = force_gimple_operand_gsi
1495 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1496
1497 /* Determine number of iterations. */
1498 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1499 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1500 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1501
1502 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1503 true, GSI_SAME_STMT);
1504
1505 counts[ix].base = b;
1506 counts[ix].iters = iters;
1507 counts[ix].step = s;
1508
1509 total = fold_build2 (MULT_EXPR, bound_type, total,
1510 fold_convert (bound_type, iters));
1511 }
1512
1513 return total;
1514 }
1515
1516 /* Emit initializers for collapsed loop members. INNER is true if
1517 this is for the element loop of a TILE. IVAR is the outer
1518 loop iteration variable, from which collapsed loop iteration values
1519 are calculated. COUNTS array has been initialized by
1520 expand_oacc_collapse_inits. */
1521
1522 static void
1523 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1524 gimple_stmt_iterator *gsi,
1525 const oacc_collapse *counts, tree ivar)
1526 {
1527 tree ivar_type = TREE_TYPE (ivar);
1528
1529 /* The most rapidly changing iteration variable is the innermost
1530 one. */
1531 for (int ix = fd->collapse; ix--;)
1532 {
1533 const omp_for_data_loop *loop = &fd->loops[ix];
1534 const oacc_collapse *collapse = &counts[ix];
1535 tree v = inner ? loop->v : collapse->outer;
1536 tree iter_type = TREE_TYPE (v);
1537 tree diff_type = TREE_TYPE (collapse->step);
1538 tree plus_type = iter_type;
1539 enum tree_code plus_code = PLUS_EXPR;
1540 tree expr;
1541
1542 if (POINTER_TYPE_P (iter_type))
1543 {
1544 plus_code = POINTER_PLUS_EXPR;
1545 plus_type = sizetype;
1546 }
1547
1548 expr = ivar;
1549 if (ix)
1550 {
1551 tree mod = fold_convert (ivar_type, collapse->iters);
1552 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1553 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1554 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1555 true, GSI_SAME_STMT);
1556 }
1557
1558 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1559 collapse->step);
1560 expr = fold_build2 (plus_code, iter_type,
1561 inner ? collapse->outer : collapse->base,
1562 fold_convert (plus_type, expr));
1563 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1564 true, GSI_SAME_STMT);
1565 gassign *ass = gimple_build_assign (v, expr);
1566 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1567 }
1568 }
1569
1570 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1571 of the combined collapse > 1 loop constructs, generate code like:
1572 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1573 if (cond3 is <)
1574 adj = STEP3 - 1;
1575 else
1576 adj = STEP3 + 1;
1577 count3 = (adj + N32 - N31) / STEP3;
1578 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1579 if (cond2 is <)
1580 adj = STEP2 - 1;
1581 else
1582 adj = STEP2 + 1;
1583 count2 = (adj + N22 - N21) / STEP2;
1584 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1585 if (cond1 is <)
1586 adj = STEP1 - 1;
1587 else
1588 adj = STEP1 + 1;
1589 count1 = (adj + N12 - N11) / STEP1;
1590 count = count1 * count2 * count3;
1591 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1592 count = 0;
1593 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1594 of the combined loop constructs, just initialize COUNTS array
1595 from the _looptemp_ clauses. */
1596
1597 /* NOTE: It *could* be better to moosh all of the BBs together,
1598 creating one larger BB with all the computation and the unexpected
1599 jump at the end. I.e.
1600
1601 bool zero3, zero2, zero1, zero;
1602
1603 zero3 = N32 c3 N31;
1604 count3 = (N32 - N31) /[cl] STEP3;
1605 zero2 = N22 c2 N21;
1606 count2 = (N22 - N21) /[cl] STEP2;
1607 zero1 = N12 c1 N11;
1608 count1 = (N12 - N11) /[cl] STEP1;
1609 zero = zero3 || zero2 || zero1;
1610 count = count1 * count2 * count3;
1611 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1612
1613 After all, we expect the zero=false, and thus we expect to have to
1614 evaluate all of the comparison expressions, so short-circuiting
1615 oughtn't be a win. Since the condition isn't protecting a
1616 denominator, we're not concerned about divide-by-zero, so we can
1617 fully evaluate count even if a numerator turned out to be wrong.
1618
1619 It seems like putting this all together would create much better
1620 scheduling opportunities, and less pressure on the chip's branch
1621 predictor. */
1622
1623 static void
1624 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1625 basic_block &entry_bb, tree *counts,
1626 basic_block &zero_iter1_bb, int &first_zero_iter1,
1627 basic_block &zero_iter2_bb, int &first_zero_iter2,
1628 basic_block &l2_dom_bb)
1629 {
1630 tree t, type = TREE_TYPE (fd->loop.v);
1631 edge e, ne;
1632 int i;
1633
1634 /* Collapsed loops need work for expansion into SSA form. */
1635 gcc_assert (!gimple_in_ssa_p (cfun));
1636
1637 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1638 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1639 {
1640 gcc_assert (fd->ordered == 0);
1641 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1642 isn't supposed to be handled, as the inner loop doesn't
1643 use it. */
1644 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1645 OMP_CLAUSE__LOOPTEMP_);
1646 gcc_assert (innerc);
1647 for (i = 0; i < fd->collapse; i++)
1648 {
1649 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1650 OMP_CLAUSE__LOOPTEMP_);
1651 gcc_assert (innerc);
1652 if (i)
1653 counts[i] = OMP_CLAUSE_DECL (innerc);
1654 else
1655 counts[0] = NULL_TREE;
1656 }
1657 return;
1658 }
1659
1660 for (i = fd->collapse; i < fd->ordered; i++)
1661 {
1662 tree itype = TREE_TYPE (fd->loops[i].v);
1663 counts[i] = NULL_TREE;
1664 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1665 fold_convert (itype, fd->loops[i].n1),
1666 fold_convert (itype, fd->loops[i].n2));
1667 if (t && integer_zerop (t))
1668 {
1669 for (i = fd->collapse; i < fd->ordered; i++)
1670 counts[i] = build_int_cst (type, 0);
1671 break;
1672 }
1673 }
1674 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1675 {
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1677
1678 if (i >= fd->collapse && counts[i])
1679 continue;
1680 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1681 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1682 fold_convert (itype, fd->loops[i].n1),
1683 fold_convert (itype, fd->loops[i].n2)))
1684 == NULL_TREE || !integer_onep (t)))
1685 {
1686 gcond *cond_stmt;
1687 tree n1, n2;
1688 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1689 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1690 true, GSI_SAME_STMT);
1691 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1692 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1695 NULL_TREE, NULL_TREE);
1696 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1697 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1698 expand_omp_regimplify_p, NULL, NULL)
1699 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1700 expand_omp_regimplify_p, NULL, NULL))
1701 {
1702 *gsi = gsi_for_stmt (cond_stmt);
1703 gimple_regimplify_operands (cond_stmt, gsi);
1704 }
1705 e = split_block (entry_bb, cond_stmt);
1706 basic_block &zero_iter_bb
1707 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1708 int &first_zero_iter
1709 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1710 if (zero_iter_bb == NULL)
1711 {
1712 gassign *assign_stmt;
1713 first_zero_iter = i;
1714 zero_iter_bb = create_empty_bb (entry_bb);
1715 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1716 *gsi = gsi_after_labels (zero_iter_bb);
1717 if (i < fd->collapse)
1718 assign_stmt = gimple_build_assign (fd->loop.n2,
1719 build_zero_cst (type));
1720 else
1721 {
1722 counts[i] = create_tmp_reg (type, ".count");
1723 assign_stmt
1724 = gimple_build_assign (counts[i], build_zero_cst (type));
1725 }
1726 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1727 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1728 entry_bb);
1729 }
1730 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1731 ne->probability = profile_probability::very_unlikely ();
1732 e->flags = EDGE_TRUE_VALUE;
1733 e->probability = ne->probability.invert ();
1734 if (l2_dom_bb == NULL)
1735 l2_dom_bb = entry_bb;
1736 entry_bb = e->dest;
1737 *gsi = gsi_last_nondebug_bb (entry_bb);
1738 }
1739
1740 if (POINTER_TYPE_P (itype))
1741 itype = signed_type_for (itype);
1742 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1743 ? -1 : 1));
1744 t = fold_build2 (PLUS_EXPR, itype,
1745 fold_convert (itype, fd->loops[i].step), t);
1746 t = fold_build2 (PLUS_EXPR, itype, t,
1747 fold_convert (itype, fd->loops[i].n2));
1748 t = fold_build2 (MINUS_EXPR, itype, t,
1749 fold_convert (itype, fd->loops[i].n1));
1750 /* ?? We could probably use CEIL_DIV_EXPR instead of
1751 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1752 generate the same code in the end because generically we
1753 don't know that the values involved must be negative for
1754 GT?? */
1755 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1756 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1757 fold_build1 (NEGATE_EXPR, itype, t),
1758 fold_build1 (NEGATE_EXPR, itype,
1759 fold_convert (itype,
1760 fd->loops[i].step)));
1761 else
1762 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].step));
1764 t = fold_convert (type, t);
1765 if (TREE_CODE (t) == INTEGER_CST)
1766 counts[i] = t;
1767 else
1768 {
1769 if (i < fd->collapse || i != first_zero_iter2)
1770 counts[i] = create_tmp_reg (type, ".count");
1771 expand_omp_build_assign (gsi, counts[i], t);
1772 }
1773 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1774 {
1775 if (i == 0)
1776 t = counts[0];
1777 else
1778 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1779 expand_omp_build_assign (gsi, fd->loop.n2, t);
1780 }
1781 }
1782 }
1783
1784 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1785 T = V;
1786 V3 = N31 + (T % count3) * STEP3;
1787 T = T / count3;
1788 V2 = N21 + (T % count2) * STEP2;
1789 T = T / count2;
1790 V1 = N11 + T * STEP1;
1791 if this loop doesn't have an inner loop construct combined with it.
1792 If it does have an inner loop construct combined with it and the
1793 iteration count isn't known constant, store values from counts array
1794 into its _looptemp_ temporaries instead. */
1795
1796 static void
1797 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 tree *counts, gimple *inner_stmt, tree startvar)
1799 {
1800 int i;
1801 if (gimple_omp_for_combined_p (fd->for_stmt))
1802 {
1803 /* If fd->loop.n2 is constant, then no propagation of the counts
1804 is needed, they are constant. */
1805 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1806 return;
1807
1808 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1809 ? gimple_omp_taskreg_clauses (inner_stmt)
1810 : gimple_omp_for_clauses (inner_stmt);
1811 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1812 isn't supposed to be handled, as the inner loop doesn't
1813 use it. */
1814 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1815 gcc_assert (innerc);
1816 for (i = 0; i < fd->collapse; i++)
1817 {
1818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1819 OMP_CLAUSE__LOOPTEMP_);
1820 gcc_assert (innerc);
1821 if (i)
1822 {
1823 tree tem = OMP_CLAUSE_DECL (innerc);
1824 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1825 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1826 false, GSI_CONTINUE_LINKING);
1827 gassign *stmt = gimple_build_assign (tem, t);
1828 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1829 }
1830 }
1831 return;
1832 }
1833
1834 tree type = TREE_TYPE (fd->loop.v);
1835 tree tem = create_tmp_reg (type, ".tem");
1836 gassign *stmt = gimple_build_assign (tem, startvar);
1837 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1838
1839 for (i = fd->collapse - 1; i >= 0; i--)
1840 {
1841 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1842 itype = vtype;
1843 if (POINTER_TYPE_P (vtype))
1844 itype = signed_type_for (vtype);
1845 if (i != 0)
1846 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1847 else
1848 t = tem;
1849 t = fold_convert (itype, t);
1850 t = fold_build2 (MULT_EXPR, itype, t,
1851 fold_convert (itype, fd->loops[i].step));
1852 if (POINTER_TYPE_P (vtype))
1853 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1854 else
1855 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1856 t = force_gimple_operand_gsi (gsi, t,
1857 DECL_P (fd->loops[i].v)
1858 && TREE_ADDRESSABLE (fd->loops[i].v),
1859 NULL_TREE, false,
1860 GSI_CONTINUE_LINKING);
1861 stmt = gimple_build_assign (fd->loops[i].v, t);
1862 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1863 if (i != 0)
1864 {
1865 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1866 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1867 false, GSI_CONTINUE_LINKING);
1868 stmt = gimple_build_assign (tem, t);
1869 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1870 }
1871 }
1872 }
1873
1874 /* Helper function for expand_omp_for_*. Generate code like:
1875 L10:
1876 V3 += STEP3;
1877 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1878 L11:
1879 V3 = N31;
1880 V2 += STEP2;
1881 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1882 L12:
1883 V2 = N21;
1884 V1 += STEP1;
1885 goto BODY_BB; */
1886
1887 static basic_block
1888 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1889 basic_block body_bb)
1890 {
1891 basic_block last_bb, bb, collapse_bb = NULL;
1892 int i;
1893 gimple_stmt_iterator gsi;
1894 edge e;
1895 tree t;
1896 gimple *stmt;
1897
1898 last_bb = cont_bb;
1899 for (i = fd->collapse - 1; i >= 0; i--)
1900 {
1901 tree vtype = TREE_TYPE (fd->loops[i].v);
1902
1903 bb = create_empty_bb (last_bb);
1904 add_bb_to_loop (bb, last_bb->loop_father);
1905 gsi = gsi_start_bb (bb);
1906
1907 if (i < fd->collapse - 1)
1908 {
1909 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1910 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1911
1912 t = fd->loops[i + 1].n1;
1913 t = force_gimple_operand_gsi (&gsi, t,
1914 DECL_P (fd->loops[i + 1].v)
1915 && TREE_ADDRESSABLE (fd->loops[i
1916 + 1].v),
1917 NULL_TREE, false,
1918 GSI_CONTINUE_LINKING);
1919 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1920 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1921 }
1922 else
1923 collapse_bb = bb;
1924
1925 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1926
1927 if (POINTER_TYPE_P (vtype))
1928 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1929 else
1930 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1931 t = force_gimple_operand_gsi (&gsi, t,
1932 DECL_P (fd->loops[i].v)
1933 && TREE_ADDRESSABLE (fd->loops[i].v),
1934 NULL_TREE, false, GSI_CONTINUE_LINKING);
1935 stmt = gimple_build_assign (fd->loops[i].v, t);
1936 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1937
1938 if (i > 0)
1939 {
1940 t = fd->loops[i].n2;
1941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1942 false, GSI_CONTINUE_LINKING);
1943 tree v = fd->loops[i].v;
1944 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1945 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1946 false, GSI_CONTINUE_LINKING);
1947 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1948 stmt = gimple_build_cond_empty (t);
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1951 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1952 }
1953 else
1954 make_edge (bb, body_bb, EDGE_FALLTHRU);
1955 last_bb = bb;
1956 }
1957
1958 return collapse_bb;
1959 }
1960
1961 /* Expand #pragma omp ordered depend(source). */
1962
1963 static void
1964 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1965 tree *counts, location_t loc)
1966 {
1967 enum built_in_function source_ix
1968 = fd->iter_type == long_integer_type_node
1969 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1970 gimple *g
1971 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1972 build_fold_addr_expr (counts[fd->ordered]));
1973 gimple_set_location (g, loc);
1974 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1975 }
1976
1977 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1978
1979 static void
1980 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1981 tree *counts, tree c, location_t loc)
1982 {
1983 auto_vec<tree, 10> args;
1984 enum built_in_function sink_ix
1985 = fd->iter_type == long_integer_type_node
1986 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1987 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1988 int i;
1989 gimple_stmt_iterator gsi2 = *gsi;
1990 bool warned_step = false;
1991
1992 for (i = 0; i < fd->ordered; i++)
1993 {
1994 tree step = NULL_TREE;
1995 off = TREE_PURPOSE (deps);
1996 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1997 {
1998 step = TREE_OPERAND (off, 1);
1999 off = TREE_OPERAND (off, 0);
2000 }
2001 if (!integer_zerop (off))
2002 {
2003 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2004 || fd->loops[i].cond_code == GT_EXPR);
2005 bool forward = fd->loops[i].cond_code == LT_EXPR;
2006 if (step)
2007 {
2008 /* Non-simple Fortran DO loops. If step is variable,
2009 we don't know at compile even the direction, so can't
2010 warn. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 break;
2013 forward = tree_int_cst_sgn (step) != -1;
2014 }
2015 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2016 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2017 "lexically later iteration");
2018 break;
2019 }
2020 deps = TREE_CHAIN (deps);
2021 }
2022 /* If all offsets corresponding to the collapsed loops are zero,
2023 this depend clause can be ignored. FIXME: but there is still a
2024 flush needed. We need to emit one __sync_synchronize () for it
2025 though (perhaps conditionally)? Solve this together with the
2026 conservative dependence folding optimization.
2027 if (i >= fd->collapse)
2028 return; */
2029
2030 deps = OMP_CLAUSE_DECL (c);
2031 gsi_prev (&gsi2);
2032 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2033 edge e2 = split_block_after_labels (e1->dest);
2034
2035 gsi2 = gsi_after_labels (e1->dest);
2036 *gsi = gsi_last_bb (e1->src);
2037 for (i = 0; i < fd->ordered; i++)
2038 {
2039 tree itype = TREE_TYPE (fd->loops[i].v);
2040 tree step = NULL_TREE;
2041 tree orig_off = NULL_TREE;
2042 if (POINTER_TYPE_P (itype))
2043 itype = sizetype;
2044 if (i)
2045 deps = TREE_CHAIN (deps);
2046 off = TREE_PURPOSE (deps);
2047 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2048 {
2049 step = TREE_OPERAND (off, 1);
2050 off = TREE_OPERAND (off, 0);
2051 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2052 && integer_onep (fd->loops[i].step)
2053 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2054 }
2055 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2056 if (step)
2057 {
2058 off = fold_convert_loc (loc, itype, off);
2059 orig_off = off;
2060 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2061 }
2062
2063 if (integer_zerop (off))
2064 t = boolean_true_node;
2065 else
2066 {
2067 tree a;
2068 tree co = fold_convert_loc (loc, itype, off);
2069 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2070 {
2071 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2072 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2073 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2074 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2075 co);
2076 }
2077 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2078 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2079 fd->loops[i].v, co);
2080 else
2081 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2082 fd->loops[i].v, co);
2083 if (step)
2084 {
2085 tree t1, t2;
2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2088 fd->loops[i].n1);
2089 else
2090 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2091 fd->loops[i].n2);
2092 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2094 fd->loops[i].n2);
2095 else
2096 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2097 fd->loops[i].n1);
2098 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2099 step, build_int_cst (TREE_TYPE (step), 0));
2100 if (TREE_CODE (step) != INTEGER_CST)
2101 {
2102 t1 = unshare_expr (t1);
2103 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2104 false, GSI_CONTINUE_LINKING);
2105 t2 = unshare_expr (t2);
2106 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2107 false, GSI_CONTINUE_LINKING);
2108 }
2109 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2110 t, t2, t1);
2111 }
2112 else if (fd->loops[i].cond_code == LT_EXPR)
2113 {
2114 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2115 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2116 fd->loops[i].n1);
2117 else
2118 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2119 fd->loops[i].n2);
2120 }
2121 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2122 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2123 fd->loops[i].n2);
2124 else
2125 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2126 fd->loops[i].n1);
2127 }
2128 if (cond)
2129 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2130 else
2131 cond = t;
2132
2133 off = fold_convert_loc (loc, itype, off);
2134
2135 if (step
2136 || (fd->loops[i].cond_code == LT_EXPR
2137 ? !integer_onep (fd->loops[i].step)
2138 : !integer_minus_onep (fd->loops[i].step)))
2139 {
2140 if (step == NULL_TREE
2141 && TYPE_UNSIGNED (itype)
2142 && fd->loops[i].cond_code == GT_EXPR)
2143 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2144 fold_build1_loc (loc, NEGATE_EXPR, itype,
2145 s));
2146 else
2147 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2148 orig_off ? orig_off : off, s);
2149 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2150 build_int_cst (itype, 0));
2151 if (integer_zerop (t) && !warned_step)
2152 {
2153 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2154 "in the iteration space");
2155 warned_step = true;
2156 }
2157 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2158 cond, t);
2159 }
2160
2161 if (i <= fd->collapse - 1 && fd->collapse > 1)
2162 t = fd->loop.v;
2163 else if (counts[i])
2164 t = counts[i];
2165 else
2166 {
2167 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2168 fd->loops[i].v, fd->loops[i].n1);
2169 t = fold_convert_loc (loc, fd->iter_type, t);
2170 }
2171 if (step)
2172 /* We have divided off by step already earlier. */;
2173 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2174 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2175 fold_build1_loc (loc, NEGATE_EXPR, itype,
2176 s));
2177 else
2178 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2179 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2180 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2181 off = fold_convert_loc (loc, fd->iter_type, off);
2182 if (i <= fd->collapse - 1 && fd->collapse > 1)
2183 {
2184 if (i)
2185 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2186 off);
2187 if (i < fd->collapse - 1)
2188 {
2189 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2190 counts[i]);
2191 continue;
2192 }
2193 }
2194 off = unshare_expr (off);
2195 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2196 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2197 true, GSI_SAME_STMT);
2198 args.safe_push (t);
2199 }
2200 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2201 gimple_set_location (g, loc);
2202 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2203
2204 cond = unshare_expr (cond);
2205 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2206 GSI_CONTINUE_LINKING);
2207 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2208 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2209 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2210 e1->probability = e3->probability.invert ();
2211 e1->flags = EDGE_TRUE_VALUE;
2212 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2213
2214 *gsi = gsi_after_labels (e2->dest);
2215 }
2216
2217 /* Expand all #pragma omp ordered depend(source) and
2218 #pragma omp ordered depend(sink:...) constructs in the current
2219 #pragma omp for ordered(n) region. */
2220
2221 static void
2222 expand_omp_ordered_source_sink (struct omp_region *region,
2223 struct omp_for_data *fd, tree *counts,
2224 basic_block cont_bb)
2225 {
2226 struct omp_region *inner;
2227 int i;
2228 for (i = fd->collapse - 1; i < fd->ordered; i++)
2229 if (i == fd->collapse - 1 && fd->collapse > 1)
2230 counts[i] = NULL_TREE;
2231 else if (i >= fd->collapse && !cont_bb)
2232 counts[i] = build_zero_cst (fd->iter_type);
2233 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2234 && integer_onep (fd->loops[i].step))
2235 counts[i] = NULL_TREE;
2236 else
2237 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2238 tree atype
2239 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2240 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2241 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2242
2243 for (inner = region->inner; inner; inner = inner->next)
2244 if (inner->type == GIMPLE_OMP_ORDERED)
2245 {
2246 gomp_ordered *ord_stmt = inner->ord_stmt;
2247 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2248 location_t loc = gimple_location (ord_stmt);
2249 tree c;
2250 for (c = gimple_omp_ordered_clauses (ord_stmt);
2251 c; c = OMP_CLAUSE_CHAIN (c))
2252 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2253 break;
2254 if (c)
2255 expand_omp_ordered_source (&gsi, fd, counts, loc);
2256 for (c = gimple_omp_ordered_clauses (ord_stmt);
2257 c; c = OMP_CLAUSE_CHAIN (c))
2258 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2259 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2260 gsi_remove (&gsi, true);
2261 }
2262 }
2263
2264 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2265 collapsed. */
2266
2267 static basic_block
2268 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2269 basic_block cont_bb, basic_block body_bb,
2270 bool ordered_lastprivate)
2271 {
2272 if (fd->ordered == fd->collapse)
2273 return cont_bb;
2274
2275 if (!cont_bb)
2276 {
2277 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2278 for (int i = fd->collapse; i < fd->ordered; i++)
2279 {
2280 tree type = TREE_TYPE (fd->loops[i].v);
2281 tree n1 = fold_convert (type, fd->loops[i].n1);
2282 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2283 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2284 size_int (i - fd->collapse + 1),
2285 NULL_TREE, NULL_TREE);
2286 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2287 }
2288 return NULL;
2289 }
2290
2291 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2292 {
2293 tree t, type = TREE_TYPE (fd->loops[i].v);
2294 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2295 expand_omp_build_assign (&gsi, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].n1));
2297 if (counts[i])
2298 expand_omp_build_assign (&gsi, counts[i],
2299 build_zero_cst (fd->iter_type));
2300 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2301 size_int (i - fd->collapse + 1),
2302 NULL_TREE, NULL_TREE);
2303 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2304 if (!gsi_end_p (gsi))
2305 gsi_prev (&gsi);
2306 else
2307 gsi = gsi_last_bb (body_bb);
2308 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2309 basic_block new_body = e1->dest;
2310 if (body_bb == cont_bb)
2311 cont_bb = new_body;
2312 edge e2 = NULL;
2313 basic_block new_header;
2314 if (EDGE_COUNT (cont_bb->preds) > 0)
2315 {
2316 gsi = gsi_last_bb (cont_bb);
2317 if (POINTER_TYPE_P (type))
2318 t = fold_build_pointer_plus (fd->loops[i].v,
2319 fold_convert (sizetype,
2320 fd->loops[i].step));
2321 else
2322 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2323 fold_convert (type, fd->loops[i].step));
2324 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2325 if (counts[i])
2326 {
2327 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2328 build_int_cst (fd->iter_type, 1));
2329 expand_omp_build_assign (&gsi, counts[i], t);
2330 t = counts[i];
2331 }
2332 else
2333 {
2334 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2335 fd->loops[i].v, fd->loops[i].n1);
2336 t = fold_convert (fd->iter_type, t);
2337 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2339 }
2340 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2341 size_int (i - fd->collapse + 1),
2342 NULL_TREE, NULL_TREE);
2343 expand_omp_build_assign (&gsi, aref, t);
2344 gsi_prev (&gsi);
2345 e2 = split_block (cont_bb, gsi_stmt (gsi));
2346 new_header = e2->dest;
2347 }
2348 else
2349 new_header = cont_bb;
2350 gsi = gsi_after_labels (new_header);
2351 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2353 tree n2
2354 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2355 true, NULL_TREE, true, GSI_SAME_STMT);
2356 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2357 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2358 edge e3 = split_block (new_header, gsi_stmt (gsi));
2359 cont_bb = e3->dest;
2360 remove_edge (e1);
2361 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2362 e3->flags = EDGE_FALSE_VALUE;
2363 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2364 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2365 e1->probability = e3->probability.invert ();
2366
2367 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2368 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2369
2370 if (e2)
2371 {
2372 struct loop *loop = alloc_loop ();
2373 loop->header = new_header;
2374 loop->latch = e2->src;
2375 add_loop (loop, body_bb->loop_father);
2376 }
2377 }
2378
2379 /* If there are any lastprivate clauses and it is possible some loops
2380 might have zero iterations, ensure all the decls are initialized,
2381 otherwise we could crash evaluating C++ class iterators with lastprivate
2382 clauses. */
2383 bool need_inits = false;
2384 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2385 if (need_inits)
2386 {
2387 tree type = TREE_TYPE (fd->loops[i].v);
2388 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2389 expand_omp_build_assign (&gsi, fd->loops[i].v,
2390 fold_convert (type, fd->loops[i].n1));
2391 }
2392 else
2393 {
2394 tree type = TREE_TYPE (fd->loops[i].v);
2395 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2396 boolean_type_node,
2397 fold_convert (type, fd->loops[i].n1),
2398 fold_convert (type, fd->loops[i].n2));
2399 if (!integer_onep (this_cond))
2400 need_inits = true;
2401 }
2402
2403 return cont_bb;
2404 }
2405
2406 /* A subroutine of expand_omp_for. Generate code for a parallel
2407 loop with any schedule. Given parameters:
2408
2409 for (V = N1; V cond N2; V += STEP) BODY;
2410
2411 where COND is "<" or ">", we generate pseudocode
2412
2413 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2414 if (more) goto L0; else goto L3;
2415 L0:
2416 V = istart0;
2417 iend = iend0;
2418 L1:
2419 BODY;
2420 V += STEP;
2421 if (V cond iend) goto L1; else goto L2;
2422 L2:
2423 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2424 L3:
2425
2426 If this is a combined omp parallel loop, instead of the call to
2427 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2428 If this is gimple_omp_for_combined_p loop, then instead of assigning
2429 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2430 inner GIMPLE_OMP_FOR and V += STEP; and
2431 if (V cond iend) goto L1; else goto L2; are removed.
2432
2433 For collapsed loops, given parameters:
2434 collapse(3)
2435 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2436 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2437 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2438 BODY;
2439
2440 we generate pseudocode
2441
2442 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2443 if (cond3 is <)
2444 adj = STEP3 - 1;
2445 else
2446 adj = STEP3 + 1;
2447 count3 = (adj + N32 - N31) / STEP3;
2448 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2449 if (cond2 is <)
2450 adj = STEP2 - 1;
2451 else
2452 adj = STEP2 + 1;
2453 count2 = (adj + N22 - N21) / STEP2;
2454 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2455 if (cond1 is <)
2456 adj = STEP1 - 1;
2457 else
2458 adj = STEP1 + 1;
2459 count1 = (adj + N12 - N11) / STEP1;
2460 count = count1 * count2 * count3;
2461 goto Z1;
2462 Z0:
2463 count = 0;
2464 Z1:
2465 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2466 if (more) goto L0; else goto L3;
2467 L0:
2468 V = istart0;
2469 T = V;
2470 V3 = N31 + (T % count3) * STEP3;
2471 T = T / count3;
2472 V2 = N21 + (T % count2) * STEP2;
2473 T = T / count2;
2474 V1 = N11 + T * STEP1;
2475 iend = iend0;
2476 L1:
2477 BODY;
2478 V += 1;
2479 if (V < iend) goto L10; else goto L2;
2480 L10:
2481 V3 += STEP3;
2482 if (V3 cond3 N32) goto L1; else goto L11;
2483 L11:
2484 V3 = N31;
2485 V2 += STEP2;
2486 if (V2 cond2 N22) goto L1; else goto L12;
2487 L12:
2488 V2 = N21;
2489 V1 += STEP1;
2490 goto L1;
2491 L2:
2492 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2493 L3:
2494
2495 */
2496
2497 static void
2498 expand_omp_for_generic (struct omp_region *region,
2499 struct omp_for_data *fd,
2500 enum built_in_function start_fn,
2501 enum built_in_function next_fn,
2502 gimple *inner_stmt)
2503 {
2504 tree type, istart0, iend0, iend;
2505 tree t, vmain, vback, bias = NULL_TREE;
2506 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2507 basic_block l2_bb = NULL, l3_bb = NULL;
2508 gimple_stmt_iterator gsi;
2509 gassign *assign_stmt;
2510 bool in_combined_parallel = is_combined_parallel (region);
2511 bool broken_loop = region->cont == NULL;
2512 edge e, ne;
2513 tree *counts = NULL;
2514 int i;
2515 bool ordered_lastprivate = false;
2516
2517 gcc_assert (!broken_loop || !in_combined_parallel);
2518 gcc_assert (fd->iter_type == long_integer_type_node
2519 || !in_combined_parallel);
2520
2521 entry_bb = region->entry;
2522 cont_bb = region->cont;
2523 collapse_bb = NULL;
2524 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2525 gcc_assert (broken_loop
2526 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2527 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2528 l1_bb = single_succ (l0_bb);
2529 if (!broken_loop)
2530 {
2531 l2_bb = create_empty_bb (cont_bb);
2532 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2533 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2534 == l1_bb));
2535 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2536 }
2537 else
2538 l2_bb = NULL;
2539 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2540 exit_bb = region->exit;
2541
2542 gsi = gsi_last_nondebug_bb (entry_bb);
2543
2544 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2545 if (fd->ordered
2546 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2547 OMP_CLAUSE_LASTPRIVATE))
2548 ordered_lastprivate = false;
2549 if (fd->collapse > 1 || fd->ordered)
2550 {
2551 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2552 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2553
2554 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2555 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2556 zero_iter1_bb, first_zero_iter1,
2557 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2558
2559 if (zero_iter1_bb)
2560 {
2561 /* Some counts[i] vars might be uninitialized if
2562 some loop has zero iterations. But the body shouldn't
2563 be executed in that case, so just avoid uninit warnings. */
2564 for (i = first_zero_iter1;
2565 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2566 if (SSA_VAR_P (counts[i]))
2567 TREE_NO_WARNING (counts[i]) = 1;
2568 gsi_prev (&gsi);
2569 e = split_block (entry_bb, gsi_stmt (gsi));
2570 entry_bb = e->dest;
2571 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2572 gsi = gsi_last_nondebug_bb (entry_bb);
2573 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2574 get_immediate_dominator (CDI_DOMINATORS,
2575 zero_iter1_bb));
2576 }
2577 if (zero_iter2_bb)
2578 {
2579 /* Some counts[i] vars might be uninitialized if
2580 some loop has zero iterations. But the body shouldn't
2581 be executed in that case, so just avoid uninit warnings. */
2582 for (i = first_zero_iter2; i < fd->ordered; i++)
2583 if (SSA_VAR_P (counts[i]))
2584 TREE_NO_WARNING (counts[i]) = 1;
2585 if (zero_iter1_bb)
2586 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2587 else
2588 {
2589 gsi_prev (&gsi);
2590 e = split_block (entry_bb, gsi_stmt (gsi));
2591 entry_bb = e->dest;
2592 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2593 gsi = gsi_last_nondebug_bb (entry_bb);
2594 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2595 get_immediate_dominator
2596 (CDI_DOMINATORS, zero_iter2_bb));
2597 }
2598 }
2599 if (fd->collapse == 1)
2600 {
2601 counts[0] = fd->loop.n2;
2602 fd->loop = fd->loops[0];
2603 }
2604 }
2605
2606 type = TREE_TYPE (fd->loop.v);
2607 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2608 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2609 TREE_ADDRESSABLE (istart0) = 1;
2610 TREE_ADDRESSABLE (iend0) = 1;
2611
2612 /* See if we need to bias by LLONG_MIN. */
2613 if (fd->iter_type == long_long_unsigned_type_node
2614 && TREE_CODE (type) == INTEGER_TYPE
2615 && !TYPE_UNSIGNED (type)
2616 && fd->ordered == 0)
2617 {
2618 tree n1, n2;
2619
2620 if (fd->loop.cond_code == LT_EXPR)
2621 {
2622 n1 = fd->loop.n1;
2623 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2624 }
2625 else
2626 {
2627 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2628 n2 = fd->loop.n1;
2629 }
2630 if (TREE_CODE (n1) != INTEGER_CST
2631 || TREE_CODE (n2) != INTEGER_CST
2632 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2633 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2634 }
2635
2636 gimple_stmt_iterator gsif = gsi;
2637 gsi_prev (&gsif);
2638
2639 tree arr = NULL_TREE;
2640 if (in_combined_parallel)
2641 {
2642 gcc_assert (fd->ordered == 0);
2643 /* In a combined parallel loop, emit a call to
2644 GOMP_loop_foo_next. */
2645 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2646 build_fold_addr_expr (istart0),
2647 build_fold_addr_expr (iend0));
2648 }
2649 else
2650 {
2651 tree t0, t1, t2, t3, t4;
2652 /* If this is not a combined parallel loop, emit a call to
2653 GOMP_loop_foo_start in ENTRY_BB. */
2654 t4 = build_fold_addr_expr (iend0);
2655 t3 = build_fold_addr_expr (istart0);
2656 if (fd->ordered)
2657 {
2658 t0 = build_int_cst (unsigned_type_node,
2659 fd->ordered - fd->collapse + 1);
2660 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2661 fd->ordered
2662 - fd->collapse + 1),
2663 ".omp_counts");
2664 DECL_NAMELESS (arr) = 1;
2665 TREE_ADDRESSABLE (arr) = 1;
2666 TREE_STATIC (arr) = 1;
2667 vec<constructor_elt, va_gc> *v;
2668 vec_alloc (v, fd->ordered - fd->collapse + 1);
2669 int idx;
2670
2671 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2672 {
2673 tree c;
2674 if (idx == 0 && fd->collapse > 1)
2675 c = fd->loop.n2;
2676 else
2677 c = counts[idx + fd->collapse - 1];
2678 tree purpose = size_int (idx);
2679 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2680 if (TREE_CODE (c) != INTEGER_CST)
2681 TREE_STATIC (arr) = 0;
2682 }
2683
2684 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2685 if (!TREE_STATIC (arr))
2686 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2687 void_type_node, arr),
2688 true, NULL_TREE, true, GSI_SAME_STMT);
2689 t1 = build_fold_addr_expr (arr);
2690 t2 = NULL_TREE;
2691 }
2692 else
2693 {
2694 t2 = fold_convert (fd->iter_type, fd->loop.step);
2695 t1 = fd->loop.n2;
2696 t0 = fd->loop.n1;
2697 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2698 {
2699 tree innerc
2700 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2701 OMP_CLAUSE__LOOPTEMP_);
2702 gcc_assert (innerc);
2703 t0 = OMP_CLAUSE_DECL (innerc);
2704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2705 OMP_CLAUSE__LOOPTEMP_);
2706 gcc_assert (innerc);
2707 t1 = OMP_CLAUSE_DECL (innerc);
2708 }
2709 if (POINTER_TYPE_P (TREE_TYPE (t0))
2710 && TYPE_PRECISION (TREE_TYPE (t0))
2711 != TYPE_PRECISION (fd->iter_type))
2712 {
2713 /* Avoid casting pointers to integer of a different size. */
2714 tree itype = signed_type_for (type);
2715 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2716 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2717 }
2718 else
2719 {
2720 t1 = fold_convert (fd->iter_type, t1);
2721 t0 = fold_convert (fd->iter_type, t0);
2722 }
2723 if (bias)
2724 {
2725 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2726 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2727 }
2728 }
2729 if (fd->iter_type == long_integer_type_node || fd->ordered)
2730 {
2731 if (fd->chunk_size)
2732 {
2733 t = fold_convert (fd->iter_type, fd->chunk_size);
2734 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2735 if (fd->ordered)
2736 t = build_call_expr (builtin_decl_explicit (start_fn),
2737 5, t0, t1, t, t3, t4);
2738 else
2739 t = build_call_expr (builtin_decl_explicit (start_fn),
2740 6, t0, t1, t2, t, t3, t4);
2741 }
2742 else if (fd->ordered)
2743 t = build_call_expr (builtin_decl_explicit (start_fn),
2744 4, t0, t1, t3, t4);
2745 else
2746 t = build_call_expr (builtin_decl_explicit (start_fn),
2747 5, t0, t1, t2, t3, t4);
2748 }
2749 else
2750 {
2751 tree t5;
2752 tree c_bool_type;
2753 tree bfn_decl;
2754
2755 /* The GOMP_loop_ull_*start functions have additional boolean
2756 argument, true for < loops and false for > loops.
2757 In Fortran, the C bool type can be different from
2758 boolean_type_node. */
2759 bfn_decl = builtin_decl_explicit (start_fn);
2760 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2761 t5 = build_int_cst (c_bool_type,
2762 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2763 if (fd->chunk_size)
2764 {
2765 tree bfn_decl = builtin_decl_explicit (start_fn);
2766 t = fold_convert (fd->iter_type, fd->chunk_size);
2767 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2768 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2769 }
2770 else
2771 t = build_call_expr (builtin_decl_explicit (start_fn),
2772 6, t5, t0, t1, t2, t3, t4);
2773 }
2774 }
2775 if (TREE_TYPE (t) != boolean_type_node)
2776 t = fold_build2 (NE_EXPR, boolean_type_node,
2777 t, build_int_cst (TREE_TYPE (t), 0));
2778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2779 true, GSI_SAME_STMT);
2780 if (arr && !TREE_STATIC (arr))
2781 {
2782 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2783 TREE_THIS_VOLATILE (clobber) = 1;
2784 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2785 GSI_SAME_STMT);
2786 }
2787 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2788
2789 /* Remove the GIMPLE_OMP_FOR statement. */
2790 gsi_remove (&gsi, true);
2791
2792 if (gsi_end_p (gsif))
2793 gsif = gsi_after_labels (gsi_bb (gsif));
2794 gsi_next (&gsif);
2795
2796 /* Iteration setup for sequential loop goes in L0_BB. */
2797 tree startvar = fd->loop.v;
2798 tree endvar = NULL_TREE;
2799
2800 if (gimple_omp_for_combined_p (fd->for_stmt))
2801 {
2802 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2803 && gimple_omp_for_kind (inner_stmt)
2804 == GF_OMP_FOR_KIND_SIMD);
2805 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2806 OMP_CLAUSE__LOOPTEMP_);
2807 gcc_assert (innerc);
2808 startvar = OMP_CLAUSE_DECL (innerc);
2809 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2810 OMP_CLAUSE__LOOPTEMP_);
2811 gcc_assert (innerc);
2812 endvar = OMP_CLAUSE_DECL (innerc);
2813 }
2814
2815 gsi = gsi_start_bb (l0_bb);
2816 t = istart0;
2817 if (fd->ordered && fd->collapse == 1)
2818 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2819 fold_convert (fd->iter_type, fd->loop.step));
2820 else if (bias)
2821 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2822 if (fd->ordered && fd->collapse == 1)
2823 {
2824 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2825 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2826 fd->loop.n1, fold_convert (sizetype, t));
2827 else
2828 {
2829 t = fold_convert (TREE_TYPE (startvar), t);
2830 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2831 fd->loop.n1, t);
2832 }
2833 }
2834 else
2835 {
2836 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2837 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2838 t = fold_convert (TREE_TYPE (startvar), t);
2839 }
2840 t = force_gimple_operand_gsi (&gsi, t,
2841 DECL_P (startvar)
2842 && TREE_ADDRESSABLE (startvar),
2843 NULL_TREE, false, GSI_CONTINUE_LINKING);
2844 assign_stmt = gimple_build_assign (startvar, t);
2845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2846
2847 t = iend0;
2848 if (fd->ordered && fd->collapse == 1)
2849 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2850 fold_convert (fd->iter_type, fd->loop.step));
2851 else if (bias)
2852 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2853 if (fd->ordered && fd->collapse == 1)
2854 {
2855 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2856 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2857 fd->loop.n1, fold_convert (sizetype, t));
2858 else
2859 {
2860 t = fold_convert (TREE_TYPE (startvar), t);
2861 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2862 fd->loop.n1, t);
2863 }
2864 }
2865 else
2866 {
2867 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2868 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2869 t = fold_convert (TREE_TYPE (startvar), t);
2870 }
2871 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2872 false, GSI_CONTINUE_LINKING);
2873 if (endvar)
2874 {
2875 assign_stmt = gimple_build_assign (endvar, iend);
2876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2877 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2878 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2879 else
2880 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2881 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2882 }
2883 /* Handle linear clause adjustments. */
2884 tree itercnt = NULL_TREE;
2885 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2886 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2887 c; c = OMP_CLAUSE_CHAIN (c))
2888 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2889 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2890 {
2891 tree d = OMP_CLAUSE_DECL (c);
2892 bool is_ref = omp_is_reference (d);
2893 tree t = d, a, dest;
2894 if (is_ref)
2895 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2896 tree type = TREE_TYPE (t);
2897 if (POINTER_TYPE_P (type))
2898 type = sizetype;
2899 dest = unshare_expr (t);
2900 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2901 expand_omp_build_assign (&gsif, v, t);
2902 if (itercnt == NULL_TREE)
2903 {
2904 itercnt = startvar;
2905 tree n1 = fd->loop.n1;
2906 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2907 {
2908 itercnt
2909 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2910 itercnt);
2911 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2912 }
2913 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2914 itercnt, n1);
2915 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2916 itercnt, fd->loop.step);
2917 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2918 NULL_TREE, false,
2919 GSI_CONTINUE_LINKING);
2920 }
2921 a = fold_build2 (MULT_EXPR, type,
2922 fold_convert (type, itercnt),
2923 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2924 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2925 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2926 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2927 false, GSI_CONTINUE_LINKING);
2928 assign_stmt = gimple_build_assign (dest, t);
2929 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2930 }
2931 if (fd->collapse > 1)
2932 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2933
2934 if (fd->ordered)
2935 {
2936 /* Until now, counts array contained number of iterations or
2937 variable containing it for ith loop. From now on, we need
2938 those counts only for collapsed loops, and only for the 2nd
2939 till the last collapsed one. Move those one element earlier,
2940 we'll use counts[fd->collapse - 1] for the first source/sink
2941 iteration counter and so on and counts[fd->ordered]
2942 as the array holding the current counter values for
2943 depend(source). */
2944 if (fd->collapse > 1)
2945 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2946 if (broken_loop)
2947 {
2948 int i;
2949 for (i = fd->collapse; i < fd->ordered; i++)
2950 {
2951 tree type = TREE_TYPE (fd->loops[i].v);
2952 tree this_cond
2953 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2954 fold_convert (type, fd->loops[i].n1),
2955 fold_convert (type, fd->loops[i].n2));
2956 if (!integer_onep (this_cond))
2957 break;
2958 }
2959 if (i < fd->ordered)
2960 {
2961 cont_bb
2962 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2963 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2964 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2965 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2966 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2967 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2968 make_edge (cont_bb, l1_bb, 0);
2969 l2_bb = create_empty_bb (cont_bb);
2970 broken_loop = false;
2971 }
2972 }
2973 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2974 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2975 ordered_lastprivate);
2976 if (counts[fd->collapse - 1])
2977 {
2978 gcc_assert (fd->collapse == 1);
2979 gsi = gsi_last_bb (l0_bb);
2980 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2981 istart0, true);
2982 gsi = gsi_last_bb (cont_bb);
2983 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2984 build_int_cst (fd->iter_type, 1));
2985 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2986 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2987 size_zero_node, NULL_TREE, NULL_TREE);
2988 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2989 t = counts[fd->collapse - 1];
2990 }
2991 else if (fd->collapse > 1)
2992 t = fd->loop.v;
2993 else
2994 {
2995 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2996 fd->loops[0].v, fd->loops[0].n1);
2997 t = fold_convert (fd->iter_type, t);
2998 }
2999 gsi = gsi_last_bb (l0_bb);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3003 false, GSI_CONTINUE_LINKING);
3004 expand_omp_build_assign (&gsi, aref, t, true);
3005 }
3006
3007 if (!broken_loop)
3008 {
3009 /* Code to control the increment and predicate for the sequential
3010 loop goes in the CONT_BB. */
3011 gsi = gsi_last_nondebug_bb (cont_bb);
3012 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3013 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3014 vmain = gimple_omp_continue_control_use (cont_stmt);
3015 vback = gimple_omp_continue_control_def (cont_stmt);
3016
3017 if (!gimple_omp_for_combined_p (fd->for_stmt))
3018 {
3019 if (POINTER_TYPE_P (type))
3020 t = fold_build_pointer_plus (vmain, fd->loop.step);
3021 else
3022 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3023 t = force_gimple_operand_gsi (&gsi, t,
3024 DECL_P (vback)
3025 && TREE_ADDRESSABLE (vback),
3026 NULL_TREE, true, GSI_SAME_STMT);
3027 assign_stmt = gimple_build_assign (vback, t);
3028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3029
3030 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3031 {
3032 if (fd->collapse > 1)
3033 t = fd->loop.v;
3034 else
3035 {
3036 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3037 fd->loops[0].v, fd->loops[0].n1);
3038 t = fold_convert (fd->iter_type, t);
3039 }
3040 tree aref = build4 (ARRAY_REF, fd->iter_type,
3041 counts[fd->ordered], size_zero_node,
3042 NULL_TREE, NULL_TREE);
3043 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3044 true, GSI_SAME_STMT);
3045 expand_omp_build_assign (&gsi, aref, t);
3046 }
3047
3048 t = build2 (fd->loop.cond_code, boolean_type_node,
3049 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3050 iend);
3051 gcond *cond_stmt = gimple_build_cond_empty (t);
3052 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3053 }
3054
3055 /* Remove GIMPLE_OMP_CONTINUE. */
3056 gsi_remove (&gsi, true);
3057
3058 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3059 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3060
3061 /* Emit code to get the next parallel iteration in L2_BB. */
3062 gsi = gsi_start_bb (l2_bb);
3063
3064 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3065 build_fold_addr_expr (istart0),
3066 build_fold_addr_expr (iend0));
3067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3068 false, GSI_CONTINUE_LINKING);
3069 if (TREE_TYPE (t) != boolean_type_node)
3070 t = fold_build2 (NE_EXPR, boolean_type_node,
3071 t, build_int_cst (TREE_TYPE (t), 0));
3072 gcond *cond_stmt = gimple_build_cond_empty (t);
3073 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3074 }
3075
3076 /* Add the loop cleanup function. */
3077 gsi = gsi_last_nondebug_bb (exit_bb);
3078 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3079 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3080 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3082 else
3083 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3084 gcall *call_stmt = gimple_build_call (t, 0);
3085 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3086 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3087 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3088 if (fd->ordered)
3089 {
3090 tree arr = counts[fd->ordered];
3091 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3092 TREE_THIS_VOLATILE (clobber) = 1;
3093 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3094 GSI_SAME_STMT);
3095 }
3096 gsi_remove (&gsi, true);
3097
3098 /* Connect the new blocks. */
3099 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3100 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3101
3102 if (!broken_loop)
3103 {
3104 gimple_seq phis;
3105
3106 e = find_edge (cont_bb, l3_bb);
3107 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3108
3109 phis = phi_nodes (l3_bb);
3110 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3111 {
3112 gimple *phi = gsi_stmt (gsi);
3113 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3114 PHI_ARG_DEF_FROM_EDGE (phi, e));
3115 }
3116 remove_edge (e);
3117
3118 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3119 e = find_edge (cont_bb, l1_bb);
3120 if (e == NULL)
3121 {
3122 e = BRANCH_EDGE (cont_bb);
3123 gcc_assert (single_succ (e->dest) == l1_bb);
3124 }
3125 if (gimple_omp_for_combined_p (fd->for_stmt))
3126 {
3127 remove_edge (e);
3128 e = NULL;
3129 }
3130 else if (fd->collapse > 1)
3131 {
3132 remove_edge (e);
3133 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3134 }
3135 else
3136 e->flags = EDGE_TRUE_VALUE;
3137 if (e)
3138 {
3139 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3140 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3141 }
3142 else
3143 {
3144 e = find_edge (cont_bb, l2_bb);
3145 e->flags = EDGE_FALLTHRU;
3146 }
3147 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3148
3149 if (gimple_in_ssa_p (cfun))
3150 {
3151 /* Add phis to the outer loop that connect to the phis in the inner,
3152 original loop, and move the loop entry value of the inner phi to
3153 the loop entry value of the outer phi. */
3154 gphi_iterator psi;
3155 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3156 {
3157 source_location locus;
3158 gphi *nphi;
3159 gphi *exit_phi = psi.phi ();
3160
3161 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3162 continue;
3163
3164 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3165 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3166
3167 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3168 edge latch_to_l1 = find_edge (latch, l1_bb);
3169 gphi *inner_phi
3170 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3171
3172 tree t = gimple_phi_result (exit_phi);
3173 tree new_res = copy_ssa_name (t, NULL);
3174 nphi = create_phi_node (new_res, l0_bb);
3175
3176 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3177 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3178 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3179 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3180 add_phi_arg (nphi, t, entry_to_l0, locus);
3181
3182 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3183 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3184
3185 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3186 }
3187 }
3188
3189 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3190 recompute_dominator (CDI_DOMINATORS, l2_bb));
3191 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3192 recompute_dominator (CDI_DOMINATORS, l3_bb));
3193 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3194 recompute_dominator (CDI_DOMINATORS, l0_bb));
3195 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3196 recompute_dominator (CDI_DOMINATORS, l1_bb));
3197
3198 /* We enter expand_omp_for_generic with a loop. This original loop may
3199 have its own loop struct, or it may be part of an outer loop struct
3200 (which may be the fake loop). */
3201 struct loop *outer_loop = entry_bb->loop_father;
3202 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3203
3204 add_bb_to_loop (l2_bb, outer_loop);
3205
3206 /* We've added a new loop around the original loop. Allocate the
3207 corresponding loop struct. */
3208 struct loop *new_loop = alloc_loop ();
3209 new_loop->header = l0_bb;
3210 new_loop->latch = l2_bb;
3211 add_loop (new_loop, outer_loop);
3212
3213 /* Allocate a loop structure for the original loop unless we already
3214 had one. */
3215 if (!orig_loop_has_loop_struct
3216 && !gimple_omp_for_combined_p (fd->for_stmt))
3217 {
3218 struct loop *orig_loop = alloc_loop ();
3219 orig_loop->header = l1_bb;
3220 /* The loop may have multiple latches. */
3221 add_loop (orig_loop, new_loop);
3222 }
3223 }
3224 }
3225
3226 /* A subroutine of expand_omp_for. Generate code for a parallel
3227 loop with static schedule and no specified chunk size. Given
3228 parameters:
3229
3230 for (V = N1; V cond N2; V += STEP) BODY;
3231
3232 where COND is "<" or ">", we generate pseudocode
3233
3234 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3235 if (cond is <)
3236 adj = STEP - 1;
3237 else
3238 adj = STEP + 1;
3239 if ((__typeof (V)) -1 > 0 && cond is >)
3240 n = -(adj + N2 - N1) / -STEP;
3241 else
3242 n = (adj + N2 - N1) / STEP;
3243 q = n / nthreads;
3244 tt = n % nthreads;
3245 if (threadid < tt) goto L3; else goto L4;
3246 L3:
3247 tt = 0;
3248 q = q + 1;
3249 L4:
3250 s0 = q * threadid + tt;
3251 e0 = s0 + q;
3252 V = s0 * STEP + N1;
3253 if (s0 >= e0) goto L2; else goto L0;
3254 L0:
3255 e = e0 * STEP + N1;
3256 L1:
3257 BODY;
3258 V += STEP;
3259 if (V cond e) goto L1;
3260 L2:
3261 */
3262
3263 static void
3264 expand_omp_for_static_nochunk (struct omp_region *region,
3265 struct omp_for_data *fd,
3266 gimple *inner_stmt)
3267 {
3268 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3269 tree type, itype, vmain, vback;
3270 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3271 basic_block body_bb, cont_bb, collapse_bb = NULL;
3272 basic_block fin_bb;
3273 gimple_stmt_iterator gsi;
3274 edge ep;
3275 bool broken_loop = region->cont == NULL;
3276 tree *counts = NULL;
3277 tree n1, n2, step;
3278
3279 itype = type = TREE_TYPE (fd->loop.v);
3280 if (POINTER_TYPE_P (type))
3281 itype = signed_type_for (type);
3282
3283 entry_bb = region->entry;
3284 cont_bb = region->cont;
3285 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3286 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3287 gcc_assert (broken_loop
3288 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3289 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3290 body_bb = single_succ (seq_start_bb);
3291 if (!broken_loop)
3292 {
3293 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3294 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3295 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3296 }
3297 exit_bb = region->exit;
3298
3299 /* Iteration space partitioning goes in ENTRY_BB. */
3300 gsi = gsi_last_nondebug_bb (entry_bb);
3301 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3302
3303 if (fd->collapse > 1)
3304 {
3305 int first_zero_iter = -1, dummy = -1;
3306 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3307
3308 counts = XALLOCAVEC (tree, fd->collapse);
3309 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3310 fin_bb, first_zero_iter,
3311 dummy_bb, dummy, l2_dom_bb);
3312 t = NULL_TREE;
3313 }
3314 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3315 t = integer_one_node;
3316 else
3317 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3318 fold_convert (type, fd->loop.n1),
3319 fold_convert (type, fd->loop.n2));
3320 if (fd->collapse == 1
3321 && TYPE_UNSIGNED (type)
3322 && (t == NULL_TREE || !integer_onep (t)))
3323 {
3324 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3325 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3326 true, GSI_SAME_STMT);
3327 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3328 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3329 true, GSI_SAME_STMT);
3330 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3331 NULL_TREE, NULL_TREE);
3332 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3333 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3334 expand_omp_regimplify_p, NULL, NULL)
3335 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3336 expand_omp_regimplify_p, NULL, NULL))
3337 {
3338 gsi = gsi_for_stmt (cond_stmt);
3339 gimple_regimplify_operands (cond_stmt, &gsi);
3340 }
3341 ep = split_block (entry_bb, cond_stmt);
3342 ep->flags = EDGE_TRUE_VALUE;
3343 entry_bb = ep->dest;
3344 ep->probability = profile_probability::very_likely ();
3345 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3346 ep->probability = profile_probability::very_unlikely ();
3347 if (gimple_in_ssa_p (cfun))
3348 {
3349 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3350 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3351 !gsi_end_p (gpi); gsi_next (&gpi))
3352 {
3353 gphi *phi = gpi.phi ();
3354 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3355 ep, UNKNOWN_LOCATION);
3356 }
3357 }
3358 gsi = gsi_last_bb (entry_bb);
3359 }
3360
3361 switch (gimple_omp_for_kind (fd->for_stmt))
3362 {
3363 case GF_OMP_FOR_KIND_FOR:
3364 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3365 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3366 break;
3367 case GF_OMP_FOR_KIND_DISTRIBUTE:
3368 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3369 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3370 break;
3371 default:
3372 gcc_unreachable ();
3373 }
3374 nthreads = build_call_expr (nthreads, 0);
3375 nthreads = fold_convert (itype, nthreads);
3376 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3377 true, GSI_SAME_STMT);
3378 threadid = build_call_expr (threadid, 0);
3379 threadid = fold_convert (itype, threadid);
3380 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3381 true, GSI_SAME_STMT);
3382
3383 n1 = fd->loop.n1;
3384 n2 = fd->loop.n2;
3385 step = fd->loop.step;
3386 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3387 {
3388 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3389 OMP_CLAUSE__LOOPTEMP_);
3390 gcc_assert (innerc);
3391 n1 = OMP_CLAUSE_DECL (innerc);
3392 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3393 OMP_CLAUSE__LOOPTEMP_);
3394 gcc_assert (innerc);
3395 n2 = OMP_CLAUSE_DECL (innerc);
3396 }
3397 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3398 true, NULL_TREE, true, GSI_SAME_STMT);
3399 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3400 true, NULL_TREE, true, GSI_SAME_STMT);
3401 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3402 true, NULL_TREE, true, GSI_SAME_STMT);
3403
3404 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3405 t = fold_build2 (PLUS_EXPR, itype, step, t);
3406 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3407 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3408 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3409 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3410 fold_build1 (NEGATE_EXPR, itype, t),
3411 fold_build1 (NEGATE_EXPR, itype, step));
3412 else
3413 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3414 t = fold_convert (itype, t);
3415 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3416
3417 q = create_tmp_reg (itype, "q");
3418 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3419 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3420 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3421
3422 tt = create_tmp_reg (itype, "tt");
3423 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3424 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3425 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3426
3427 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3428 gcond *cond_stmt = gimple_build_cond_empty (t);
3429 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3430
3431 second_bb = split_block (entry_bb, cond_stmt)->dest;
3432 gsi = gsi_last_nondebug_bb (second_bb);
3433 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3434
3435 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3436 GSI_SAME_STMT);
3437 gassign *assign_stmt
3438 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3439 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3440
3441 third_bb = split_block (second_bb, assign_stmt)->dest;
3442 gsi = gsi_last_nondebug_bb (third_bb);
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3444
3445 t = build2 (MULT_EXPR, itype, q, threadid);
3446 t = build2 (PLUS_EXPR, itype, t, tt);
3447 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3448
3449 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3450 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3451
3452 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3453 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3454
3455 /* Remove the GIMPLE_OMP_FOR statement. */
3456 gsi_remove (&gsi, true);
3457
3458 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3459 gsi = gsi_start_bb (seq_start_bb);
3460
3461 tree startvar = fd->loop.v;
3462 tree endvar = NULL_TREE;
3463
3464 if (gimple_omp_for_combined_p (fd->for_stmt))
3465 {
3466 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3467 ? gimple_omp_parallel_clauses (inner_stmt)
3468 : gimple_omp_for_clauses (inner_stmt);
3469 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3470 gcc_assert (innerc);
3471 startvar = OMP_CLAUSE_DECL (innerc);
3472 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3473 OMP_CLAUSE__LOOPTEMP_);
3474 gcc_assert (innerc);
3475 endvar = OMP_CLAUSE_DECL (innerc);
3476 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3477 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3478 {
3479 int i;
3480 for (i = 1; i < fd->collapse; i++)
3481 {
3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 OMP_CLAUSE__LOOPTEMP_);
3484 gcc_assert (innerc);
3485 }
3486 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3487 OMP_CLAUSE__LOOPTEMP_);
3488 if (innerc)
3489 {
3490 /* If needed (distribute parallel for with lastprivate),
3491 propagate down the total number of iterations. */
3492 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3493 fd->loop.n2);
3494 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3495 GSI_CONTINUE_LINKING);
3496 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 }
3499 }
3500 }
3501 t = fold_convert (itype, s0);
3502 t = fold_build2 (MULT_EXPR, itype, t, step);
3503 if (POINTER_TYPE_P (type))
3504 {
3505 t = fold_build_pointer_plus (n1, t);
3506 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3507 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3508 t = fold_convert (signed_type_for (type), t);
3509 }
3510 else
3511 t = fold_build2 (PLUS_EXPR, type, t, n1);
3512 t = fold_convert (TREE_TYPE (startvar), t);
3513 t = force_gimple_operand_gsi (&gsi, t,
3514 DECL_P (startvar)
3515 && TREE_ADDRESSABLE (startvar),
3516 NULL_TREE, false, GSI_CONTINUE_LINKING);
3517 assign_stmt = gimple_build_assign (startvar, t);
3518 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3519
3520 t = fold_convert (itype, e0);
3521 t = fold_build2 (MULT_EXPR, itype, t, step);
3522 if (POINTER_TYPE_P (type))
3523 {
3524 t = fold_build_pointer_plus (n1, t);
3525 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3526 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3527 t = fold_convert (signed_type_for (type), t);
3528 }
3529 else
3530 t = fold_build2 (PLUS_EXPR, type, t, n1);
3531 t = fold_convert (TREE_TYPE (startvar), t);
3532 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3533 false, GSI_CONTINUE_LINKING);
3534 if (endvar)
3535 {
3536 assign_stmt = gimple_build_assign (endvar, e);
3537 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3538 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3539 assign_stmt = gimple_build_assign (fd->loop.v, e);
3540 else
3541 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3542 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3543 }
3544 /* Handle linear clause adjustments. */
3545 tree itercnt = NULL_TREE;
3546 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3547 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3548 c; c = OMP_CLAUSE_CHAIN (c))
3549 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3550 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3551 {
3552 tree d = OMP_CLAUSE_DECL (c);
3553 bool is_ref = omp_is_reference (d);
3554 tree t = d, a, dest;
3555 if (is_ref)
3556 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3557 if (itercnt == NULL_TREE)
3558 {
3559 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3560 {
3561 itercnt = fold_build2 (MINUS_EXPR, itype,
3562 fold_convert (itype, n1),
3563 fold_convert (itype, fd->loop.n1));
3564 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3565 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3566 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3567 NULL_TREE, false,
3568 GSI_CONTINUE_LINKING);
3569 }
3570 else
3571 itercnt = s0;
3572 }
3573 tree type = TREE_TYPE (t);
3574 if (POINTER_TYPE_P (type))
3575 type = sizetype;
3576 a = fold_build2 (MULT_EXPR, type,
3577 fold_convert (type, itercnt),
3578 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3579 dest = unshare_expr (t);
3580 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3581 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3582 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3583 false, GSI_CONTINUE_LINKING);
3584 assign_stmt = gimple_build_assign (dest, t);
3585 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3586 }
3587 if (fd->collapse > 1)
3588 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3589
3590 if (!broken_loop)
3591 {
3592 /* The code controlling the sequential loop replaces the
3593 GIMPLE_OMP_CONTINUE. */
3594 gsi = gsi_last_nondebug_bb (cont_bb);
3595 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3596 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3597 vmain = gimple_omp_continue_control_use (cont_stmt);
3598 vback = gimple_omp_continue_control_def (cont_stmt);
3599
3600 if (!gimple_omp_for_combined_p (fd->for_stmt))
3601 {
3602 if (POINTER_TYPE_P (type))
3603 t = fold_build_pointer_plus (vmain, step);
3604 else
3605 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3606 t = force_gimple_operand_gsi (&gsi, t,
3607 DECL_P (vback)
3608 && TREE_ADDRESSABLE (vback),
3609 NULL_TREE, true, GSI_SAME_STMT);
3610 assign_stmt = gimple_build_assign (vback, t);
3611 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3612
3613 t = build2 (fd->loop.cond_code, boolean_type_node,
3614 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3615 ? t : vback, e);
3616 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3617 }
3618
3619 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3620 gsi_remove (&gsi, true);
3621
3622 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3623 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3624 }
3625
3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3627 gsi = gsi_last_nondebug_bb (exit_bb);
3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3629 {
3630 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3631 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3632 }
3633 gsi_remove (&gsi, true);
3634
3635 /* Connect all the blocks. */
3636 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3637 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3638 ep = find_edge (entry_bb, second_bb);
3639 ep->flags = EDGE_TRUE_VALUE;
3640 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3641 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3642 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3643
3644 if (!broken_loop)
3645 {
3646 ep = find_edge (cont_bb, body_bb);
3647 if (ep == NULL)
3648 {
3649 ep = BRANCH_EDGE (cont_bb);
3650 gcc_assert (single_succ (ep->dest) == body_bb);
3651 }
3652 if (gimple_omp_for_combined_p (fd->for_stmt))
3653 {
3654 remove_edge (ep);
3655 ep = NULL;
3656 }
3657 else if (fd->collapse > 1)
3658 {
3659 remove_edge (ep);
3660 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3661 }
3662 else
3663 ep->flags = EDGE_TRUE_VALUE;
3664 find_edge (cont_bb, fin_bb)->flags
3665 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3666 }
3667
3668 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3669 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3671
3672 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3673 recompute_dominator (CDI_DOMINATORS, body_bb));
3674 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3675 recompute_dominator (CDI_DOMINATORS, fin_bb));
3676
3677 struct loop *loop = body_bb->loop_father;
3678 if (loop != entry_bb->loop_father)
3679 {
3680 gcc_assert (broken_loop || loop->header == body_bb);
3681 gcc_assert (broken_loop
3682 || loop->latch == region->cont
3683 || single_pred (loop->latch) == region->cont);
3684 return;
3685 }
3686
3687 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3688 {
3689 loop = alloc_loop ();
3690 loop->header = body_bb;
3691 if (collapse_bb == NULL)
3692 loop->latch = cont_bb;
3693 add_loop (loop, body_bb->loop_father);
3694 }
3695 }
3696
3697 /* Return phi in E->DEST with ARG on edge E. */
3698
3699 static gphi *
3700 find_phi_with_arg_on_edge (tree arg, edge e)
3701 {
3702 basic_block bb = e->dest;
3703
3704 for (gphi_iterator gpi = gsi_start_phis (bb);
3705 !gsi_end_p (gpi);
3706 gsi_next (&gpi))
3707 {
3708 gphi *phi = gpi.phi ();
3709 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3710 return phi;
3711 }
3712
3713 return NULL;
3714 }
3715
3716 /* A subroutine of expand_omp_for. Generate code for a parallel
3717 loop with static schedule and a specified chunk size. Given
3718 parameters:
3719
3720 for (V = N1; V cond N2; V += STEP) BODY;
3721
3722 where COND is "<" or ">", we generate pseudocode
3723
3724 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3725 if (cond is <)
3726 adj = STEP - 1;
3727 else
3728 adj = STEP + 1;
3729 if ((__typeof (V)) -1 > 0 && cond is >)
3730 n = -(adj + N2 - N1) / -STEP;
3731 else
3732 n = (adj + N2 - N1) / STEP;
3733 trip = 0;
3734 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3735 here so that V is defined
3736 if the loop is not entered
3737 L0:
3738 s0 = (trip * nthreads + threadid) * CHUNK;
3739 e0 = min (s0 + CHUNK, n);
3740 if (s0 < n) goto L1; else goto L4;
3741 L1:
3742 V = s0 * STEP + N1;
3743 e = e0 * STEP + N1;
3744 L2:
3745 BODY;
3746 V += STEP;
3747 if (V cond e) goto L2; else goto L3;
3748 L3:
3749 trip += 1;
3750 goto L0;
3751 L4:
3752 */
3753
3754 static void
3755 expand_omp_for_static_chunk (struct omp_region *region,
3756 struct omp_for_data *fd, gimple *inner_stmt)
3757 {
3758 tree n, s0, e0, e, t;
3759 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3760 tree type, itype, vmain, vback, vextra;
3761 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3762 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3763 gimple_stmt_iterator gsi;
3764 edge se;
3765 bool broken_loop = region->cont == NULL;
3766 tree *counts = NULL;
3767 tree n1, n2, step;
3768
3769 itype = type = TREE_TYPE (fd->loop.v);
3770 if (POINTER_TYPE_P (type))
3771 itype = signed_type_for (type);
3772
3773 entry_bb = region->entry;
3774 se = split_block (entry_bb, last_stmt (entry_bb));
3775 entry_bb = se->src;
3776 iter_part_bb = se->dest;
3777 cont_bb = region->cont;
3778 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3779 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3780 gcc_assert (broken_loop
3781 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3782 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3783 body_bb = single_succ (seq_start_bb);
3784 if (!broken_loop)
3785 {
3786 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3787 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3788 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3789 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3790 }
3791 exit_bb = region->exit;
3792
3793 /* Trip and adjustment setup goes in ENTRY_BB. */
3794 gsi = gsi_last_nondebug_bb (entry_bb);
3795 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3796
3797 if (fd->collapse > 1)
3798 {
3799 int first_zero_iter = -1, dummy = -1;
3800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3801
3802 counts = XALLOCAVEC (tree, fd->collapse);
3803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3804 fin_bb, first_zero_iter,
3805 dummy_bb, dummy, l2_dom_bb);
3806 t = NULL_TREE;
3807 }
3808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3809 t = integer_one_node;
3810 else
3811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3812 fold_convert (type, fd->loop.n1),
3813 fold_convert (type, fd->loop.n2));
3814 if (fd->collapse == 1
3815 && TYPE_UNSIGNED (type)
3816 && (t == NULL_TREE || !integer_onep (t)))
3817 {
3818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3820 true, GSI_SAME_STMT);
3821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3823 true, GSI_SAME_STMT);
3824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3825 NULL_TREE, NULL_TREE);
3826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3828 expand_omp_regimplify_p, NULL, NULL)
3829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3830 expand_omp_regimplify_p, NULL, NULL))
3831 {
3832 gsi = gsi_for_stmt (cond_stmt);
3833 gimple_regimplify_operands (cond_stmt, &gsi);
3834 }
3835 se = split_block (entry_bb, cond_stmt);
3836 se->flags = EDGE_TRUE_VALUE;
3837 entry_bb = se->dest;
3838 se->probability = profile_probability::very_likely ();
3839 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3840 se->probability = profile_probability::very_unlikely ();
3841 if (gimple_in_ssa_p (cfun))
3842 {
3843 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3845 !gsi_end_p (gpi); gsi_next (&gpi))
3846 {
3847 gphi *phi = gpi.phi ();
3848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3849 se, UNKNOWN_LOCATION);
3850 }
3851 }
3852 gsi = gsi_last_bb (entry_bb);
3853 }
3854
3855 switch (gimple_omp_for_kind (fd->for_stmt))
3856 {
3857 case GF_OMP_FOR_KIND_FOR:
3858 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3859 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3860 break;
3861 case GF_OMP_FOR_KIND_DISTRIBUTE:
3862 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3863 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3864 break;
3865 default:
3866 gcc_unreachable ();
3867 }
3868 nthreads = build_call_expr (nthreads, 0);
3869 nthreads = fold_convert (itype, nthreads);
3870 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3871 true, GSI_SAME_STMT);
3872 threadid = build_call_expr (threadid, 0);
3873 threadid = fold_convert (itype, threadid);
3874 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3876
3877 n1 = fd->loop.n1;
3878 n2 = fd->loop.n2;
3879 step = fd->loop.step;
3880 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3881 {
3882 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3883 OMP_CLAUSE__LOOPTEMP_);
3884 gcc_assert (innerc);
3885 n1 = OMP_CLAUSE_DECL (innerc);
3886 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3887 OMP_CLAUSE__LOOPTEMP_);
3888 gcc_assert (innerc);
3889 n2 = OMP_CLAUSE_DECL (innerc);
3890 }
3891 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3892 true, NULL_TREE, true, GSI_SAME_STMT);
3893 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3894 true, NULL_TREE, true, GSI_SAME_STMT);
3895 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3896 true, NULL_TREE, true, GSI_SAME_STMT);
3897 tree chunk_size = fold_convert (itype, fd->chunk_size);
3898 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3899 chunk_size
3900 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3901 GSI_SAME_STMT);
3902
3903 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3904 t = fold_build2 (PLUS_EXPR, itype, step, t);
3905 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3906 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3907 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3908 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3909 fold_build1 (NEGATE_EXPR, itype, t),
3910 fold_build1 (NEGATE_EXPR, itype, step));
3911 else
3912 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3913 t = fold_convert (itype, t);
3914 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3915 true, GSI_SAME_STMT);
3916
3917 trip_var = create_tmp_reg (itype, ".trip");
3918 if (gimple_in_ssa_p (cfun))
3919 {
3920 trip_init = make_ssa_name (trip_var);
3921 trip_main = make_ssa_name (trip_var);
3922 trip_back = make_ssa_name (trip_var);
3923 }
3924 else
3925 {
3926 trip_init = trip_var;
3927 trip_main = trip_var;
3928 trip_back = trip_var;
3929 }
3930
3931 gassign *assign_stmt
3932 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3933 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3934
3935 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3936 t = fold_build2 (MULT_EXPR, itype, t, step);
3937 if (POINTER_TYPE_P (type))
3938 t = fold_build_pointer_plus (n1, t);
3939 else
3940 t = fold_build2 (PLUS_EXPR, type, t, n1);
3941 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3942 true, GSI_SAME_STMT);
3943
3944 /* Remove the GIMPLE_OMP_FOR. */
3945 gsi_remove (&gsi, true);
3946
3947 gimple_stmt_iterator gsif = gsi;
3948
3949 /* Iteration space partitioning goes in ITER_PART_BB. */
3950 gsi = gsi_last_bb (iter_part_bb);
3951
3952 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3953 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3954 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3955 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3956 false, GSI_CONTINUE_LINKING);
3957
3958 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3959 t = fold_build2 (MIN_EXPR, itype, t, n);
3960 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3961 false, GSI_CONTINUE_LINKING);
3962
3963 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3964 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3965
3966 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3967 gsi = gsi_start_bb (seq_start_bb);
3968
3969 tree startvar = fd->loop.v;
3970 tree endvar = NULL_TREE;
3971
3972 if (gimple_omp_for_combined_p (fd->for_stmt))
3973 {
3974 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3975 ? gimple_omp_parallel_clauses (inner_stmt)
3976 : gimple_omp_for_clauses (inner_stmt);
3977 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3978 gcc_assert (innerc);
3979 startvar = OMP_CLAUSE_DECL (innerc);
3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 OMP_CLAUSE__LOOPTEMP_);
3982 gcc_assert (innerc);
3983 endvar = OMP_CLAUSE_DECL (innerc);
3984 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3985 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3986 {
3987 int i;
3988 for (i = 1; i < fd->collapse; i++)
3989 {
3990 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3991 OMP_CLAUSE__LOOPTEMP_);
3992 gcc_assert (innerc);
3993 }
3994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3995 OMP_CLAUSE__LOOPTEMP_);
3996 if (innerc)
3997 {
3998 /* If needed (distribute parallel for with lastprivate),
3999 propagate down the total number of iterations. */
4000 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4001 fd->loop.n2);
4002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4003 GSI_CONTINUE_LINKING);
4004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4006 }
4007 }
4008 }
4009
4010 t = fold_convert (itype, s0);
4011 t = fold_build2 (MULT_EXPR, itype, t, step);
4012 if (POINTER_TYPE_P (type))
4013 {
4014 t = fold_build_pointer_plus (n1, t);
4015 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4016 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4017 t = fold_convert (signed_type_for (type), t);
4018 }
4019 else
4020 t = fold_build2 (PLUS_EXPR, type, t, n1);
4021 t = fold_convert (TREE_TYPE (startvar), t);
4022 t = force_gimple_operand_gsi (&gsi, t,
4023 DECL_P (startvar)
4024 && TREE_ADDRESSABLE (startvar),
4025 NULL_TREE, false, GSI_CONTINUE_LINKING);
4026 assign_stmt = gimple_build_assign (startvar, t);
4027 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4028
4029 t = fold_convert (itype, e0);
4030 t = fold_build2 (MULT_EXPR, itype, t, step);
4031 if (POINTER_TYPE_P (type))
4032 {
4033 t = fold_build_pointer_plus (n1, t);
4034 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4035 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4036 t = fold_convert (signed_type_for (type), t);
4037 }
4038 else
4039 t = fold_build2 (PLUS_EXPR, type, t, n1);
4040 t = fold_convert (TREE_TYPE (startvar), t);
4041 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4042 false, GSI_CONTINUE_LINKING);
4043 if (endvar)
4044 {
4045 assign_stmt = gimple_build_assign (endvar, e);
4046 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4047 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4048 assign_stmt = gimple_build_assign (fd->loop.v, e);
4049 else
4050 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4052 }
4053 /* Handle linear clause adjustments. */
4054 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4055 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4056 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4057 c; c = OMP_CLAUSE_CHAIN (c))
4058 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4059 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4060 {
4061 tree d = OMP_CLAUSE_DECL (c);
4062 bool is_ref = omp_is_reference (d);
4063 tree t = d, a, dest;
4064 if (is_ref)
4065 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4066 tree type = TREE_TYPE (t);
4067 if (POINTER_TYPE_P (type))
4068 type = sizetype;
4069 dest = unshare_expr (t);
4070 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4071 expand_omp_build_assign (&gsif, v, t);
4072 if (itercnt == NULL_TREE)
4073 {
4074 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4075 {
4076 itercntbias
4077 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4078 fold_convert (itype, fd->loop.n1));
4079 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4080 itercntbias, step);
4081 itercntbias
4082 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4083 NULL_TREE, true,
4084 GSI_SAME_STMT);
4085 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4086 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4087 NULL_TREE, false,
4088 GSI_CONTINUE_LINKING);
4089 }
4090 else
4091 itercnt = s0;
4092 }
4093 a = fold_build2 (MULT_EXPR, type,
4094 fold_convert (type, itercnt),
4095 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4096 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4097 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4098 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4099 false, GSI_CONTINUE_LINKING);
4100 assign_stmt = gimple_build_assign (dest, t);
4101 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4102 }
4103 if (fd->collapse > 1)
4104 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4105
4106 if (!broken_loop)
4107 {
4108 /* The code controlling the sequential loop goes in CONT_BB,
4109 replacing the GIMPLE_OMP_CONTINUE. */
4110 gsi = gsi_last_nondebug_bb (cont_bb);
4111 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4112 vmain = gimple_omp_continue_control_use (cont_stmt);
4113 vback = gimple_omp_continue_control_def (cont_stmt);
4114
4115 if (!gimple_omp_for_combined_p (fd->for_stmt))
4116 {
4117 if (POINTER_TYPE_P (type))
4118 t = fold_build_pointer_plus (vmain, step);
4119 else
4120 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4121 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4122 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4123 true, GSI_SAME_STMT);
4124 assign_stmt = gimple_build_assign (vback, t);
4125 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4126
4127 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4128 t = build2 (EQ_EXPR, boolean_type_node,
4129 build_int_cst (itype, 0),
4130 build_int_cst (itype, 1));
4131 else
4132 t = build2 (fd->loop.cond_code, boolean_type_node,
4133 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4134 ? t : vback, e);
4135 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4136 }
4137
4138 /* Remove GIMPLE_OMP_CONTINUE. */
4139 gsi_remove (&gsi, true);
4140
4141 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4142 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4143
4144 /* Trip update code goes into TRIP_UPDATE_BB. */
4145 gsi = gsi_start_bb (trip_update_bb);
4146
4147 t = build_int_cst (itype, 1);
4148 t = build2 (PLUS_EXPR, itype, trip_main, t);
4149 assign_stmt = gimple_build_assign (trip_back, t);
4150 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4151 }
4152
4153 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4154 gsi = gsi_last_nondebug_bb (exit_bb);
4155 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4156 {
4157 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4158 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4159 }
4160 gsi_remove (&gsi, true);
4161
4162 /* Connect the new blocks. */
4163 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4164 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4165
4166 if (!broken_loop)
4167 {
4168 se = find_edge (cont_bb, body_bb);
4169 if (se == NULL)
4170 {
4171 se = BRANCH_EDGE (cont_bb);
4172 gcc_assert (single_succ (se->dest) == body_bb);
4173 }
4174 if (gimple_omp_for_combined_p (fd->for_stmt))
4175 {
4176 remove_edge (se);
4177 se = NULL;
4178 }
4179 else if (fd->collapse > 1)
4180 {
4181 remove_edge (se);
4182 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4183 }
4184 else
4185 se->flags = EDGE_TRUE_VALUE;
4186 find_edge (cont_bb, trip_update_bb)->flags
4187 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4188
4189 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4190 iter_part_bb);
4191 }
4192
4193 if (gimple_in_ssa_p (cfun))
4194 {
4195 gphi_iterator psi;
4196 gphi *phi;
4197 edge re, ene;
4198 edge_var_map *vm;
4199 size_t i;
4200
4201 gcc_assert (fd->collapse == 1 && !broken_loop);
4202
4203 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4204 remove arguments of the phi nodes in fin_bb. We need to create
4205 appropriate phi nodes in iter_part_bb instead. */
4206 se = find_edge (iter_part_bb, fin_bb);
4207 re = single_succ_edge (trip_update_bb);
4208 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4209 ene = single_succ_edge (entry_bb);
4210
4211 psi = gsi_start_phis (fin_bb);
4212 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4213 gsi_next (&psi), ++i)
4214 {
4215 gphi *nphi;
4216 source_location locus;
4217
4218 phi = psi.phi ();
4219 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4220 redirect_edge_var_map_def (vm), 0))
4221 continue;
4222
4223 t = gimple_phi_result (phi);
4224 gcc_assert (t == redirect_edge_var_map_result (vm));
4225
4226 if (!single_pred_p (fin_bb))
4227 t = copy_ssa_name (t, phi);
4228
4229 nphi = create_phi_node (t, iter_part_bb);
4230
4231 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4232 locus = gimple_phi_arg_location_from_edge (phi, se);
4233
4234 /* A special case -- fd->loop.v is not yet computed in
4235 iter_part_bb, we need to use vextra instead. */
4236 if (t == fd->loop.v)
4237 t = vextra;
4238 add_phi_arg (nphi, t, ene, locus);
4239 locus = redirect_edge_var_map_location (vm);
4240 tree back_arg = redirect_edge_var_map_def (vm);
4241 add_phi_arg (nphi, back_arg, re, locus);
4242 edge ce = find_edge (cont_bb, body_bb);
4243 if (ce == NULL)
4244 {
4245 ce = BRANCH_EDGE (cont_bb);
4246 gcc_assert (single_succ (ce->dest) == body_bb);
4247 ce = single_succ_edge (ce->dest);
4248 }
4249 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4250 gcc_assert (inner_loop_phi != NULL);
4251 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4252 find_edge (seq_start_bb, body_bb), locus);
4253
4254 if (!single_pred_p (fin_bb))
4255 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4256 }
4257 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4258 redirect_edge_var_map_clear (re);
4259 if (single_pred_p (fin_bb))
4260 while (1)
4261 {
4262 psi = gsi_start_phis (fin_bb);
4263 if (gsi_end_p (psi))
4264 break;
4265 remove_phi_node (&psi, false);
4266 }
4267
4268 /* Make phi node for trip. */
4269 phi = create_phi_node (trip_main, iter_part_bb);
4270 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4271 UNKNOWN_LOCATION);
4272 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4273 UNKNOWN_LOCATION);
4274 }
4275
4276 if (!broken_loop)
4277 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4278 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4279 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4280 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4281 recompute_dominator (CDI_DOMINATORS, fin_bb));
4282 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4283 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4284 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4285 recompute_dominator (CDI_DOMINATORS, body_bb));
4286
4287 if (!broken_loop)
4288 {
4289 struct loop *loop = body_bb->loop_father;
4290 struct loop *trip_loop = alloc_loop ();
4291 trip_loop->header = iter_part_bb;
4292 trip_loop->latch = trip_update_bb;
4293 add_loop (trip_loop, iter_part_bb->loop_father);
4294
4295 if (loop != entry_bb->loop_father)
4296 {
4297 gcc_assert (loop->header == body_bb);
4298 gcc_assert (loop->latch == region->cont
4299 || single_pred (loop->latch) == region->cont);
4300 trip_loop->inner = loop;
4301 return;
4302 }
4303
4304 if (!gimple_omp_for_combined_p (fd->for_stmt))
4305 {
4306 loop = alloc_loop ();
4307 loop->header = body_bb;
4308 if (collapse_bb == NULL)
4309 loop->latch = cont_bb;
4310 add_loop (loop, trip_loop);
4311 }
4312 }
4313 }
4314
4315 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4316 loop. Given parameters:
4317
4318 for (V = N1; V cond N2; V += STEP) BODY;
4319
4320 where COND is "<" or ">", we generate pseudocode
4321
4322 V = N1;
4323 goto L1;
4324 L0:
4325 BODY;
4326 V += STEP;
4327 L1:
4328 if (V cond N2) goto L0; else goto L2;
4329 L2:
4330
4331 For collapsed loops, given parameters:
4332 collapse(3)
4333 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4334 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4335 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4336 BODY;
4337
4338 we generate pseudocode
4339
4340 if (cond3 is <)
4341 adj = STEP3 - 1;
4342 else
4343 adj = STEP3 + 1;
4344 count3 = (adj + N32 - N31) / STEP3;
4345 if (cond2 is <)
4346 adj = STEP2 - 1;
4347 else
4348 adj = STEP2 + 1;
4349 count2 = (adj + N22 - N21) / STEP2;
4350 if (cond1 is <)
4351 adj = STEP1 - 1;
4352 else
4353 adj = STEP1 + 1;
4354 count1 = (adj + N12 - N11) / STEP1;
4355 count = count1 * count2 * count3;
4356 V = 0;
4357 V1 = N11;
4358 V2 = N21;
4359 V3 = N31;
4360 goto L1;
4361 L0:
4362 BODY;
4363 V += 1;
4364 V3 += STEP3;
4365 V2 += (V3 cond3 N32) ? 0 : STEP2;
4366 V3 = (V3 cond3 N32) ? V3 : N31;
4367 V1 += (V2 cond2 N22) ? 0 : STEP1;
4368 V2 = (V2 cond2 N22) ? V2 : N21;
4369 L1:
4370 if (V < count) goto L0; else goto L2;
4371 L2:
4372
4373 */
4374
4375 static void
4376 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4377 {
4378 tree type, t;
4379 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4380 gimple_stmt_iterator gsi;
4381 gimple *stmt;
4382 gcond *cond_stmt;
4383 bool broken_loop = region->cont == NULL;
4384 edge e, ne;
4385 tree *counts = NULL;
4386 int i;
4387 int safelen_int = INT_MAX;
4388 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4389 OMP_CLAUSE_SAFELEN);
4390 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4391 OMP_CLAUSE__SIMDUID_);
4392 tree n1, n2;
4393
4394 if (safelen)
4395 {
4396 poly_uint64 val;
4397 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4398 if (!poly_int_tree_p (safelen, &val))
4399 safelen_int = 0;
4400 else
4401 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4402 if (safelen_int == 1)
4403 safelen_int = 0;
4404 }
4405 type = TREE_TYPE (fd->loop.v);
4406 entry_bb = region->entry;
4407 cont_bb = region->cont;
4408 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4409 gcc_assert (broken_loop
4410 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4411 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4412 if (!broken_loop)
4413 {
4414 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4415 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4416 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4417 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4418 }
4419 else
4420 {
4421 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4422 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4423 l2_bb = single_succ (l1_bb);
4424 }
4425 exit_bb = region->exit;
4426 l2_dom_bb = NULL;
4427
4428 gsi = gsi_last_nondebug_bb (entry_bb);
4429
4430 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4431 /* Not needed in SSA form right now. */
4432 gcc_assert (!gimple_in_ssa_p (cfun));
4433 if (fd->collapse > 1)
4434 {
4435 int first_zero_iter = -1, dummy = -1;
4436 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4437
4438 counts = XALLOCAVEC (tree, fd->collapse);
4439 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4440 zero_iter_bb, first_zero_iter,
4441 dummy_bb, dummy, l2_dom_bb);
4442 }
4443 if (l2_dom_bb == NULL)
4444 l2_dom_bb = l1_bb;
4445
4446 n1 = fd->loop.n1;
4447 n2 = fd->loop.n2;
4448 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4449 {
4450 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4451 OMP_CLAUSE__LOOPTEMP_);
4452 gcc_assert (innerc);
4453 n1 = OMP_CLAUSE_DECL (innerc);
4454 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4455 OMP_CLAUSE__LOOPTEMP_);
4456 gcc_assert (innerc);
4457 n2 = OMP_CLAUSE_DECL (innerc);
4458 }
4459 tree step = fd->loop.step;
4460
4461 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4462 OMP_CLAUSE__SIMT_);
4463 if (is_simt)
4464 {
4465 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4466 is_simt = safelen_int > 1;
4467 }
4468 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4469 if (is_simt)
4470 {
4471 simt_lane = create_tmp_var (unsigned_type_node);
4472 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4473 gimple_call_set_lhs (g, simt_lane);
4474 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4475 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4476 fold_convert (TREE_TYPE (step), simt_lane));
4477 n1 = fold_convert (type, n1);
4478 if (POINTER_TYPE_P (type))
4479 n1 = fold_build_pointer_plus (n1, offset);
4480 else
4481 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4482
4483 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4484 if (fd->collapse > 1)
4485 simt_maxlane = build_one_cst (unsigned_type_node);
4486 else if (safelen_int < omp_max_simt_vf ())
4487 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4488 tree vf
4489 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4490 unsigned_type_node, 0);
4491 if (simt_maxlane)
4492 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4493 vf = fold_convert (TREE_TYPE (step), vf);
4494 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4495 }
4496
4497 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4498 if (fd->collapse > 1)
4499 {
4500 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4501 {
4502 gsi_prev (&gsi);
4503 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4504 gsi_next (&gsi);
4505 }
4506 else
4507 for (i = 0; i < fd->collapse; i++)
4508 {
4509 tree itype = TREE_TYPE (fd->loops[i].v);
4510 if (POINTER_TYPE_P (itype))
4511 itype = signed_type_for (itype);
4512 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4513 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4514 }
4515 }
4516
4517 /* Remove the GIMPLE_OMP_FOR statement. */
4518 gsi_remove (&gsi, true);
4519
4520 if (!broken_loop)
4521 {
4522 /* Code to control the increment goes in the CONT_BB. */
4523 gsi = gsi_last_nondebug_bb (cont_bb);
4524 stmt = gsi_stmt (gsi);
4525 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4526
4527 if (POINTER_TYPE_P (type))
4528 t = fold_build_pointer_plus (fd->loop.v, step);
4529 else
4530 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4531 expand_omp_build_assign (&gsi, fd->loop.v, t);
4532
4533 if (fd->collapse > 1)
4534 {
4535 i = fd->collapse - 1;
4536 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4537 {
4538 t = fold_convert (sizetype, fd->loops[i].step);
4539 t = fold_build_pointer_plus (fd->loops[i].v, t);
4540 }
4541 else
4542 {
4543 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4544 fd->loops[i].step);
4545 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4546 fd->loops[i].v, t);
4547 }
4548 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4549
4550 for (i = fd->collapse - 1; i > 0; i--)
4551 {
4552 tree itype = TREE_TYPE (fd->loops[i].v);
4553 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4554 if (POINTER_TYPE_P (itype2))
4555 itype2 = signed_type_for (itype2);
4556 t = fold_convert (itype2, fd->loops[i - 1].step);
4557 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4558 GSI_SAME_STMT);
4559 t = build3 (COND_EXPR, itype2,
4560 build2 (fd->loops[i].cond_code, boolean_type_node,
4561 fd->loops[i].v,
4562 fold_convert (itype, fd->loops[i].n2)),
4563 build_int_cst (itype2, 0), t);
4564 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4565 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4566 else
4567 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4568 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4569
4570 t = fold_convert (itype, fd->loops[i].n1);
4571 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4572 GSI_SAME_STMT);
4573 t = build3 (COND_EXPR, itype,
4574 build2 (fd->loops[i].cond_code, boolean_type_node,
4575 fd->loops[i].v,
4576 fold_convert (itype, fd->loops[i].n2)),
4577 fd->loops[i].v, t);
4578 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4579 }
4580 }
4581
4582 /* Remove GIMPLE_OMP_CONTINUE. */
4583 gsi_remove (&gsi, true);
4584 }
4585
4586 /* Emit the condition in L1_BB. */
4587 gsi = gsi_start_bb (l1_bb);
4588
4589 t = fold_convert (type, n2);
4590 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4591 false, GSI_CONTINUE_LINKING);
4592 tree v = fd->loop.v;
4593 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4594 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4595 false, GSI_CONTINUE_LINKING);
4596 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4597 cond_stmt = gimple_build_cond_empty (t);
4598 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4599 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4600 NULL, NULL)
4601 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4602 NULL, NULL))
4603 {
4604 gsi = gsi_for_stmt (cond_stmt);
4605 gimple_regimplify_operands (cond_stmt, &gsi);
4606 }
4607
4608 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4609 if (is_simt)
4610 {
4611 gsi = gsi_start_bb (l2_bb);
4612 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4613 if (POINTER_TYPE_P (type))
4614 t = fold_build_pointer_plus (fd->loop.v, step);
4615 else
4616 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4617 expand_omp_build_assign (&gsi, fd->loop.v, t);
4618 }
4619
4620 /* Remove GIMPLE_OMP_RETURN. */
4621 gsi = gsi_last_nondebug_bb (exit_bb);
4622 gsi_remove (&gsi, true);
4623
4624 /* Connect the new blocks. */
4625 remove_edge (FALLTHRU_EDGE (entry_bb));
4626
4627 if (!broken_loop)
4628 {
4629 remove_edge (BRANCH_EDGE (entry_bb));
4630 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4631
4632 e = BRANCH_EDGE (l1_bb);
4633 ne = FALLTHRU_EDGE (l1_bb);
4634 e->flags = EDGE_TRUE_VALUE;
4635 }
4636 else
4637 {
4638 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4639
4640 ne = single_succ_edge (l1_bb);
4641 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4642
4643 }
4644 ne->flags = EDGE_FALSE_VALUE;
4645 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4646 ne->probability = e->probability.invert ();
4647
4648 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4649 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4650
4651 if (simt_maxlane)
4652 {
4653 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4654 NULL_TREE, NULL_TREE);
4655 gsi = gsi_last_bb (entry_bb);
4656 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4657 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4658 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4659 FALLTHRU_EDGE (entry_bb)->probability
4660 = profile_probability::guessed_always ().apply_scale (7, 8);
4661 BRANCH_EDGE (entry_bb)->probability
4662 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4663 l2_dom_bb = entry_bb;
4664 }
4665 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4666
4667 if (!broken_loop)
4668 {
4669 struct loop *loop = alloc_loop ();
4670 loop->header = l1_bb;
4671 loop->latch = cont_bb;
4672 add_loop (loop, l1_bb->loop_father);
4673 loop->safelen = safelen_int;
4674 if (simduid)
4675 {
4676 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4677 cfun->has_simduid_loops = true;
4678 }
4679 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4680 the loop. */
4681 if ((flag_tree_loop_vectorize
4682 || !global_options_set.x_flag_tree_loop_vectorize)
4683 && flag_tree_loop_optimize
4684 && loop->safelen > 1)
4685 {
4686 loop->force_vectorize = true;
4687 cfun->has_force_vectorize_loops = true;
4688 }
4689 }
4690 else if (simduid)
4691 cfun->has_simduid_loops = true;
4692 }
4693
4694 /* Taskloop construct is represented after gimplification with
4695 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4696 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4697 which should just compute all the needed loop temporaries
4698 for GIMPLE_OMP_TASK. */
4699
4700 static void
4701 expand_omp_taskloop_for_outer (struct omp_region *region,
4702 struct omp_for_data *fd,
4703 gimple *inner_stmt)
4704 {
4705 tree type, bias = NULL_TREE;
4706 basic_block entry_bb, cont_bb, exit_bb;
4707 gimple_stmt_iterator gsi;
4708 gassign *assign_stmt;
4709 tree *counts = NULL;
4710 int i;
4711
4712 gcc_assert (inner_stmt);
4713 gcc_assert (region->cont);
4714 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4715 && gimple_omp_task_taskloop_p (inner_stmt));
4716 type = TREE_TYPE (fd->loop.v);
4717
4718 /* See if we need to bias by LLONG_MIN. */
4719 if (fd->iter_type == long_long_unsigned_type_node
4720 && TREE_CODE (type) == INTEGER_TYPE
4721 && !TYPE_UNSIGNED (type))
4722 {
4723 tree n1, n2;
4724
4725 if (fd->loop.cond_code == LT_EXPR)
4726 {
4727 n1 = fd->loop.n1;
4728 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4729 }
4730 else
4731 {
4732 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4733 n2 = fd->loop.n1;
4734 }
4735 if (TREE_CODE (n1) != INTEGER_CST
4736 || TREE_CODE (n2) != INTEGER_CST
4737 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4738 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4739 }
4740
4741 entry_bb = region->entry;
4742 cont_bb = region->cont;
4743 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4744 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4745 exit_bb = region->exit;
4746
4747 gsi = gsi_last_nondebug_bb (entry_bb);
4748 gimple *for_stmt = gsi_stmt (gsi);
4749 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4750 if (fd->collapse > 1)
4751 {
4752 int first_zero_iter = -1, dummy = -1;
4753 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4754
4755 counts = XALLOCAVEC (tree, fd->collapse);
4756 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4757 zero_iter_bb, first_zero_iter,
4758 dummy_bb, dummy, l2_dom_bb);
4759
4760 if (zero_iter_bb)
4761 {
4762 /* Some counts[i] vars might be uninitialized if
4763 some loop has zero iterations. But the body shouldn't
4764 be executed in that case, so just avoid uninit warnings. */
4765 for (i = first_zero_iter; i < fd->collapse; i++)
4766 if (SSA_VAR_P (counts[i]))
4767 TREE_NO_WARNING (counts[i]) = 1;
4768 gsi_prev (&gsi);
4769 edge e = split_block (entry_bb, gsi_stmt (gsi));
4770 entry_bb = e->dest;
4771 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4772 gsi = gsi_last_bb (entry_bb);
4773 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4774 get_immediate_dominator (CDI_DOMINATORS,
4775 zero_iter_bb));
4776 }
4777 }
4778
4779 tree t0, t1;
4780 t1 = fd->loop.n2;
4781 t0 = fd->loop.n1;
4782 if (POINTER_TYPE_P (TREE_TYPE (t0))
4783 && TYPE_PRECISION (TREE_TYPE (t0))
4784 != TYPE_PRECISION (fd->iter_type))
4785 {
4786 /* Avoid casting pointers to integer of a different size. */
4787 tree itype = signed_type_for (type);
4788 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4789 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4790 }
4791 else
4792 {
4793 t1 = fold_convert (fd->iter_type, t1);
4794 t0 = fold_convert (fd->iter_type, t0);
4795 }
4796 if (bias)
4797 {
4798 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4799 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4800 }
4801
4802 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4803 OMP_CLAUSE__LOOPTEMP_);
4804 gcc_assert (innerc);
4805 tree startvar = OMP_CLAUSE_DECL (innerc);
4806 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4807 gcc_assert (innerc);
4808 tree endvar = OMP_CLAUSE_DECL (innerc);
4809 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4810 {
4811 gcc_assert (innerc);
4812 for (i = 1; i < fd->collapse; i++)
4813 {
4814 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4815 OMP_CLAUSE__LOOPTEMP_);
4816 gcc_assert (innerc);
4817 }
4818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4819 OMP_CLAUSE__LOOPTEMP_);
4820 if (innerc)
4821 {
4822 /* If needed (inner taskloop has lastprivate clause), propagate
4823 down the total number of iterations. */
4824 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4825 NULL_TREE, false,
4826 GSI_CONTINUE_LINKING);
4827 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4828 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4829 }
4830 }
4831
4832 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4833 GSI_CONTINUE_LINKING);
4834 assign_stmt = gimple_build_assign (startvar, t0);
4835 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4836
4837 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4838 GSI_CONTINUE_LINKING);
4839 assign_stmt = gimple_build_assign (endvar, t1);
4840 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4841 if (fd->collapse > 1)
4842 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4843
4844 /* Remove the GIMPLE_OMP_FOR statement. */
4845 gsi = gsi_for_stmt (for_stmt);
4846 gsi_remove (&gsi, true);
4847
4848 gsi = gsi_last_nondebug_bb (cont_bb);
4849 gsi_remove (&gsi, true);
4850
4851 gsi = gsi_last_nondebug_bb (exit_bb);
4852 gsi_remove (&gsi, true);
4853
4854 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4855 remove_edge (BRANCH_EDGE (entry_bb));
4856 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4857 remove_edge (BRANCH_EDGE (cont_bb));
4858 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4859 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4860 recompute_dominator (CDI_DOMINATORS, region->entry));
4861 }
4862
4863 /* Taskloop construct is represented after gimplification with
4864 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4865 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4866 GOMP_taskloop{,_ull} function arranges for each task to be given just
4867 a single range of iterations. */
4868
4869 static void
4870 expand_omp_taskloop_for_inner (struct omp_region *region,
4871 struct omp_for_data *fd,
4872 gimple *inner_stmt)
4873 {
4874 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4875 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4876 basic_block fin_bb;
4877 gimple_stmt_iterator gsi;
4878 edge ep;
4879 bool broken_loop = region->cont == NULL;
4880 tree *counts = NULL;
4881 tree n1, n2, step;
4882
4883 itype = type = TREE_TYPE (fd->loop.v);
4884 if (POINTER_TYPE_P (type))
4885 itype = signed_type_for (type);
4886
4887 /* See if we need to bias by LLONG_MIN. */
4888 if (fd->iter_type == long_long_unsigned_type_node
4889 && TREE_CODE (type) == INTEGER_TYPE
4890 && !TYPE_UNSIGNED (type))
4891 {
4892 tree n1, n2;
4893
4894 if (fd->loop.cond_code == LT_EXPR)
4895 {
4896 n1 = fd->loop.n1;
4897 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4898 }
4899 else
4900 {
4901 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4902 n2 = fd->loop.n1;
4903 }
4904 if (TREE_CODE (n1) != INTEGER_CST
4905 || TREE_CODE (n2) != INTEGER_CST
4906 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4907 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4908 }
4909
4910 entry_bb = region->entry;
4911 cont_bb = region->cont;
4912 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4913 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4914 gcc_assert (broken_loop
4915 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4916 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4917 if (!broken_loop)
4918 {
4919 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4920 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4921 }
4922 exit_bb = region->exit;
4923
4924 /* Iteration space partitioning goes in ENTRY_BB. */
4925 gsi = gsi_last_nondebug_bb (entry_bb);
4926 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4927
4928 if (fd->collapse > 1)
4929 {
4930 int first_zero_iter = -1, dummy = -1;
4931 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4932
4933 counts = XALLOCAVEC (tree, fd->collapse);
4934 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4935 fin_bb, first_zero_iter,
4936 dummy_bb, dummy, l2_dom_bb);
4937 t = NULL_TREE;
4938 }
4939 else
4940 t = integer_one_node;
4941
4942 step = fd->loop.step;
4943 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4944 OMP_CLAUSE__LOOPTEMP_);
4945 gcc_assert (innerc);
4946 n1 = OMP_CLAUSE_DECL (innerc);
4947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4948 gcc_assert (innerc);
4949 n2 = OMP_CLAUSE_DECL (innerc);
4950 if (bias)
4951 {
4952 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4953 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4954 }
4955 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4956 true, NULL_TREE, true, GSI_SAME_STMT);
4957 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4958 true, NULL_TREE, true, GSI_SAME_STMT);
4959 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4960 true, NULL_TREE, true, GSI_SAME_STMT);
4961
4962 tree startvar = fd->loop.v;
4963 tree endvar = NULL_TREE;
4964
4965 if (gimple_omp_for_combined_p (fd->for_stmt))
4966 {
4967 tree clauses = gimple_omp_for_clauses (inner_stmt);
4968 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4969 gcc_assert (innerc);
4970 startvar = OMP_CLAUSE_DECL (innerc);
4971 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4972 OMP_CLAUSE__LOOPTEMP_);
4973 gcc_assert (innerc);
4974 endvar = OMP_CLAUSE_DECL (innerc);
4975 }
4976 t = fold_convert (TREE_TYPE (startvar), n1);
4977 t = force_gimple_operand_gsi (&gsi, t,
4978 DECL_P (startvar)
4979 && TREE_ADDRESSABLE (startvar),
4980 NULL_TREE, false, GSI_CONTINUE_LINKING);
4981 gimple *assign_stmt = gimple_build_assign (startvar, t);
4982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4983
4984 t = fold_convert (TREE_TYPE (startvar), n2);
4985 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4986 false, GSI_CONTINUE_LINKING);
4987 if (endvar)
4988 {
4989 assign_stmt = gimple_build_assign (endvar, e);
4990 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4991 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4992 assign_stmt = gimple_build_assign (fd->loop.v, e);
4993 else
4994 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4995 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4996 }
4997 if (fd->collapse > 1)
4998 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4999
5000 if (!broken_loop)
5001 {
5002 /* The code controlling the sequential loop replaces the
5003 GIMPLE_OMP_CONTINUE. */
5004 gsi = gsi_last_nondebug_bb (cont_bb);
5005 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5006 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5007 vmain = gimple_omp_continue_control_use (cont_stmt);
5008 vback = gimple_omp_continue_control_def (cont_stmt);
5009
5010 if (!gimple_omp_for_combined_p (fd->for_stmt))
5011 {
5012 if (POINTER_TYPE_P (type))
5013 t = fold_build_pointer_plus (vmain, step);
5014 else
5015 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5016 t = force_gimple_operand_gsi (&gsi, t,
5017 DECL_P (vback)
5018 && TREE_ADDRESSABLE (vback),
5019 NULL_TREE, true, GSI_SAME_STMT);
5020 assign_stmt = gimple_build_assign (vback, t);
5021 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5022
5023 t = build2 (fd->loop.cond_code, boolean_type_node,
5024 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5025 ? t : vback, e);
5026 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5027 }
5028
5029 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5030 gsi_remove (&gsi, true);
5031
5032 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5033 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5034 }
5035
5036 /* Remove the GIMPLE_OMP_FOR statement. */
5037 gsi = gsi_for_stmt (fd->for_stmt);
5038 gsi_remove (&gsi, true);
5039
5040 /* Remove the GIMPLE_OMP_RETURN statement. */
5041 gsi = gsi_last_nondebug_bb (exit_bb);
5042 gsi_remove (&gsi, true);
5043
5044 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5045 if (!broken_loop)
5046 remove_edge (BRANCH_EDGE (entry_bb));
5047 else
5048 {
5049 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5050 region->outer->cont = NULL;
5051 }
5052
5053 /* Connect all the blocks. */
5054 if (!broken_loop)
5055 {
5056 ep = find_edge (cont_bb, body_bb);
5057 if (gimple_omp_for_combined_p (fd->for_stmt))
5058 {
5059 remove_edge (ep);
5060 ep = NULL;
5061 }
5062 else if (fd->collapse > 1)
5063 {
5064 remove_edge (ep);
5065 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5066 }
5067 else
5068 ep->flags = EDGE_TRUE_VALUE;
5069 find_edge (cont_bb, fin_bb)->flags
5070 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5071 }
5072
5073 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5074 recompute_dominator (CDI_DOMINATORS, body_bb));
5075 if (!broken_loop)
5076 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5077 recompute_dominator (CDI_DOMINATORS, fin_bb));
5078
5079 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5080 {
5081 struct loop *loop = alloc_loop ();
5082 loop->header = body_bb;
5083 if (collapse_bb == NULL)
5084 loop->latch = cont_bb;
5085 add_loop (loop, body_bb->loop_father);
5086 }
5087 }
5088
5089 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5090 partitioned loop. The lowering here is abstracted, in that the
5091 loop parameters are passed through internal functions, which are
5092 further lowered by oacc_device_lower, once we get to the target
5093 compiler. The loop is of the form:
5094
5095 for (V = B; V LTGT E; V += S) {BODY}
5096
5097 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5098 (constant 0 for no chunking) and we will have a GWV partitioning
5099 mask, specifying dimensions over which the loop is to be
5100 partitioned (see note below). We generate code that looks like
5101 (this ignores tiling):
5102
5103 <entry_bb> [incoming FALL->body, BRANCH->exit]
5104 typedef signedintify (typeof (V)) T; // underlying signed integral type
5105 T range = E - B;
5106 T chunk_no = 0;
5107 T DIR = LTGT == '<' ? +1 : -1;
5108 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5109 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5110
5111 <head_bb> [created by splitting end of entry_bb]
5112 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5113 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5114 if (!(offset LTGT bound)) goto bottom_bb;
5115
5116 <body_bb> [incoming]
5117 V = B + offset;
5118 {BODY}
5119
5120 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5121 offset += step;
5122 if (offset LTGT bound) goto body_bb; [*]
5123
5124 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5125 chunk_no++;
5126 if (chunk < chunk_max) goto head_bb;
5127
5128 <exit_bb> [incoming]
5129 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5130
5131 [*] Needed if V live at end of loop. */
5132
5133 static void
5134 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5135 {
5136 tree v = fd->loop.v;
5137 enum tree_code cond_code = fd->loop.cond_code;
5138 enum tree_code plus_code = PLUS_EXPR;
5139
5140 tree chunk_size = integer_minus_one_node;
5141 tree gwv = integer_zero_node;
5142 tree iter_type = TREE_TYPE (v);
5143 tree diff_type = iter_type;
5144 tree plus_type = iter_type;
5145 struct oacc_collapse *counts = NULL;
5146
5147 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5148 == GF_OMP_FOR_KIND_OACC_LOOP);
5149 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5150 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5151
5152 if (POINTER_TYPE_P (iter_type))
5153 {
5154 plus_code = POINTER_PLUS_EXPR;
5155 plus_type = sizetype;
5156 }
5157 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5158 diff_type = signed_type_for (diff_type);
5159 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5160 diff_type = integer_type_node;
5161
5162 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5163 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5164 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5165 basic_block bottom_bb = NULL;
5166
5167 /* entry_bb has two sucessors; the branch edge is to the exit
5168 block, fallthrough edge to body. */
5169 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5170 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5171
5172 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5173 body_bb, or to a block whose only successor is the body_bb. Its
5174 fallthrough successor is the final block (same as the branch
5175 successor of the entry_bb). */
5176 if (cont_bb)
5177 {
5178 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5179 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5180
5181 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5182 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5183 }
5184 else
5185 gcc_assert (!gimple_in_ssa_p (cfun));
5186
5187 /* The exit block only has entry_bb and cont_bb as predecessors. */
5188 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5189
5190 tree chunk_no;
5191 tree chunk_max = NULL_TREE;
5192 tree bound, offset;
5193 tree step = create_tmp_var (diff_type, ".step");
5194 bool up = cond_code == LT_EXPR;
5195 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5196 bool chunking = !gimple_in_ssa_p (cfun);
5197 bool negating;
5198
5199 /* Tiling vars. */
5200 tree tile_size = NULL_TREE;
5201 tree element_s = NULL_TREE;
5202 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5203 basic_block elem_body_bb = NULL;
5204 basic_block elem_cont_bb = NULL;
5205
5206 /* SSA instances. */
5207 tree offset_incr = NULL_TREE;
5208 tree offset_init = NULL_TREE;
5209
5210 gimple_stmt_iterator gsi;
5211 gassign *ass;
5212 gcall *call;
5213 gimple *stmt;
5214 tree expr;
5215 location_t loc;
5216 edge split, be, fte;
5217
5218 /* Split the end of entry_bb to create head_bb. */
5219 split = split_block (entry_bb, last_stmt (entry_bb));
5220 basic_block head_bb = split->dest;
5221 entry_bb = split->src;
5222
5223 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5224 gsi = gsi_last_nondebug_bb (entry_bb);
5225 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5226 loc = gimple_location (for_stmt);
5227
5228 if (gimple_in_ssa_p (cfun))
5229 {
5230 offset_init = gimple_omp_for_index (for_stmt, 0);
5231 gcc_assert (integer_zerop (fd->loop.n1));
5232 /* The SSA parallelizer does gang parallelism. */
5233 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5234 }
5235
5236 if (fd->collapse > 1 || fd->tiling)
5237 {
5238 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5239 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5240 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5241 TREE_TYPE (fd->loop.n2), loc);
5242
5243 if (SSA_VAR_P (fd->loop.n2))
5244 {
5245 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5246 true, GSI_SAME_STMT);
5247 ass = gimple_build_assign (fd->loop.n2, total);
5248 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5249 }
5250 }
5251
5252 tree b = fd->loop.n1;
5253 tree e = fd->loop.n2;
5254 tree s = fd->loop.step;
5255
5256 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5257 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5258
5259 /* Convert the step, avoiding possible unsigned->signed overflow. */
5260 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5261 if (negating)
5262 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5263 s = fold_convert (diff_type, s);
5264 if (negating)
5265 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5266 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5267
5268 if (!chunking)
5269 chunk_size = integer_zero_node;
5270 expr = fold_convert (diff_type, chunk_size);
5271 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5272 NULL_TREE, true, GSI_SAME_STMT);
5273
5274 if (fd->tiling)
5275 {
5276 /* Determine the tile size and element step,
5277 modify the outer loop step size. */
5278 tile_size = create_tmp_var (diff_type, ".tile_size");
5279 expr = build_int_cst (diff_type, 1);
5280 for (int ix = 0; ix < fd->collapse; ix++)
5281 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5282 expr = force_gimple_operand_gsi (&gsi, expr, true,
5283 NULL_TREE, true, GSI_SAME_STMT);
5284 ass = gimple_build_assign (tile_size, expr);
5285 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5286
5287 element_s = create_tmp_var (diff_type, ".element_s");
5288 ass = gimple_build_assign (element_s, s);
5289 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5290
5291 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5292 s = force_gimple_operand_gsi (&gsi, expr, true,
5293 NULL_TREE, true, GSI_SAME_STMT);
5294 }
5295
5296 /* Determine the range, avoiding possible unsigned->signed overflow. */
5297 negating = !up && TYPE_UNSIGNED (iter_type);
5298 expr = fold_build2 (MINUS_EXPR, plus_type,
5299 fold_convert (plus_type, negating ? b : e),
5300 fold_convert (plus_type, negating ? e : b));
5301 expr = fold_convert (diff_type, expr);
5302 if (negating)
5303 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5304 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5305 NULL_TREE, true, GSI_SAME_STMT);
5306
5307 chunk_no = build_int_cst (diff_type, 0);
5308 if (chunking)
5309 {
5310 gcc_assert (!gimple_in_ssa_p (cfun));
5311
5312 expr = chunk_no;
5313 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5314 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5315
5316 ass = gimple_build_assign (chunk_no, expr);
5317 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5318
5319 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5320 build_int_cst (integer_type_node,
5321 IFN_GOACC_LOOP_CHUNKS),
5322 dir, range, s, chunk_size, gwv);
5323 gimple_call_set_lhs (call, chunk_max);
5324 gimple_set_location (call, loc);
5325 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5326 }
5327 else
5328 chunk_size = chunk_no;
5329
5330 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5331 build_int_cst (integer_type_node,
5332 IFN_GOACC_LOOP_STEP),
5333 dir, range, s, chunk_size, gwv);
5334 gimple_call_set_lhs (call, step);
5335 gimple_set_location (call, loc);
5336 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5337
5338 /* Remove the GIMPLE_OMP_FOR. */
5339 gsi_remove (&gsi, true);
5340
5341 /* Fixup edges from head_bb. */
5342 be = BRANCH_EDGE (head_bb);
5343 fte = FALLTHRU_EDGE (head_bb);
5344 be->flags |= EDGE_FALSE_VALUE;
5345 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5346
5347 basic_block body_bb = fte->dest;
5348
5349 if (gimple_in_ssa_p (cfun))
5350 {
5351 gsi = gsi_last_nondebug_bb (cont_bb);
5352 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5353
5354 offset = gimple_omp_continue_control_use (cont_stmt);
5355 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5356 }
5357 else
5358 {
5359 offset = create_tmp_var (diff_type, ".offset");
5360 offset_init = offset_incr = offset;
5361 }
5362 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5363
5364 /* Loop offset & bound go into head_bb. */
5365 gsi = gsi_start_bb (head_bb);
5366
5367 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5368 build_int_cst (integer_type_node,
5369 IFN_GOACC_LOOP_OFFSET),
5370 dir, range, s,
5371 chunk_size, gwv, chunk_no);
5372 gimple_call_set_lhs (call, offset_init);
5373 gimple_set_location (call, loc);
5374 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5375
5376 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5377 build_int_cst (integer_type_node,
5378 IFN_GOACC_LOOP_BOUND),
5379 dir, range, s,
5380 chunk_size, gwv, offset_init);
5381 gimple_call_set_lhs (call, bound);
5382 gimple_set_location (call, loc);
5383 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5384
5385 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5386 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5387 GSI_CONTINUE_LINKING);
5388
5389 /* V assignment goes into body_bb. */
5390 if (!gimple_in_ssa_p (cfun))
5391 {
5392 gsi = gsi_start_bb (body_bb);
5393
5394 expr = build2 (plus_code, iter_type, b,
5395 fold_convert (plus_type, offset));
5396 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5397 true, GSI_SAME_STMT);
5398 ass = gimple_build_assign (v, expr);
5399 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5400
5401 if (fd->collapse > 1 || fd->tiling)
5402 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5403
5404 if (fd->tiling)
5405 {
5406 /* Determine the range of the element loop -- usually simply
5407 the tile_size, but could be smaller if the final
5408 iteration of the outer loop is a partial tile. */
5409 tree e_range = create_tmp_var (diff_type, ".e_range");
5410
5411 expr = build2 (MIN_EXPR, diff_type,
5412 build2 (MINUS_EXPR, diff_type, bound, offset),
5413 build2 (MULT_EXPR, diff_type, tile_size,
5414 element_s));
5415 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5416 true, GSI_SAME_STMT);
5417 ass = gimple_build_assign (e_range, expr);
5418 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5419
5420 /* Determine bound, offset & step of inner loop. */
5421 e_bound = create_tmp_var (diff_type, ".e_bound");
5422 e_offset = create_tmp_var (diff_type, ".e_offset");
5423 e_step = create_tmp_var (diff_type, ".e_step");
5424
5425 /* Mark these as element loops. */
5426 tree t, e_gwv = integer_minus_one_node;
5427 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5428
5429 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5430 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5431 element_s, chunk, e_gwv, chunk);
5432 gimple_call_set_lhs (call, e_offset);
5433 gimple_set_location (call, loc);
5434 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5435
5436 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5437 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5438 element_s, chunk, e_gwv, e_offset);
5439 gimple_call_set_lhs (call, e_bound);
5440 gimple_set_location (call, loc);
5441 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5442
5443 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5444 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5445 element_s, chunk, e_gwv);
5446 gimple_call_set_lhs (call, e_step);
5447 gimple_set_location (call, loc);
5448 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5449
5450 /* Add test and split block. */
5451 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5452 stmt = gimple_build_cond_empty (expr);
5453 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5454 split = split_block (body_bb, stmt);
5455 elem_body_bb = split->dest;
5456 if (cont_bb == body_bb)
5457 cont_bb = elem_body_bb;
5458 body_bb = split->src;
5459
5460 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5461
5462 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5463 if (cont_bb == NULL)
5464 {
5465 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5466 e->probability = profile_probability::even ();
5467 split->probability = profile_probability::even ();
5468 }
5469
5470 /* Initialize the user's loop vars. */
5471 gsi = gsi_start_bb (elem_body_bb);
5472 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5473 }
5474 }
5475
5476 /* Loop increment goes into cont_bb. If this is not a loop, we
5477 will have spawned threads as if it was, and each one will
5478 execute one iteration. The specification is not explicit about
5479 whether such constructs are ill-formed or not, and they can
5480 occur, especially when noreturn routines are involved. */
5481 if (cont_bb)
5482 {
5483 gsi = gsi_last_nondebug_bb (cont_bb);
5484 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5485 loc = gimple_location (cont_stmt);
5486
5487 if (fd->tiling)
5488 {
5489 /* Insert element loop increment and test. */
5490 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5491 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5492 true, GSI_SAME_STMT);
5493 ass = gimple_build_assign (e_offset, expr);
5494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5495 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5496
5497 stmt = gimple_build_cond_empty (expr);
5498 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5499 split = split_block (cont_bb, stmt);
5500 elem_cont_bb = split->src;
5501 cont_bb = split->dest;
5502
5503 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5504 split->probability = profile_probability::unlikely ().guessed ();
5505 edge latch_edge
5506 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5507 latch_edge->probability = profile_probability::likely ().guessed ();
5508
5509 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5510 skip_edge->probability = profile_probability::unlikely ().guessed ();
5511 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5512 loop_entry_edge->probability
5513 = profile_probability::likely ().guessed ();
5514
5515 gsi = gsi_for_stmt (cont_stmt);
5516 }
5517
5518 /* Increment offset. */
5519 if (gimple_in_ssa_p (cfun))
5520 expr = build2 (plus_code, iter_type, offset,
5521 fold_convert (plus_type, step));
5522 else
5523 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5524 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5525 true, GSI_SAME_STMT);
5526 ass = gimple_build_assign (offset_incr, expr);
5527 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5528 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5529 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5530
5531 /* Remove the GIMPLE_OMP_CONTINUE. */
5532 gsi_remove (&gsi, true);
5533
5534 /* Fixup edges from cont_bb. */
5535 be = BRANCH_EDGE (cont_bb);
5536 fte = FALLTHRU_EDGE (cont_bb);
5537 be->flags |= EDGE_TRUE_VALUE;
5538 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5539
5540 if (chunking)
5541 {
5542 /* Split the beginning of exit_bb to make bottom_bb. We
5543 need to insert a nop at the start, because splitting is
5544 after a stmt, not before. */
5545 gsi = gsi_start_bb (exit_bb);
5546 stmt = gimple_build_nop ();
5547 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5548 split = split_block (exit_bb, stmt);
5549 bottom_bb = split->src;
5550 exit_bb = split->dest;
5551 gsi = gsi_last_bb (bottom_bb);
5552
5553 /* Chunk increment and test goes into bottom_bb. */
5554 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5555 build_int_cst (diff_type, 1));
5556 ass = gimple_build_assign (chunk_no, expr);
5557 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5558
5559 /* Chunk test at end of bottom_bb. */
5560 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5561 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5562 GSI_CONTINUE_LINKING);
5563
5564 /* Fixup edges from bottom_bb. */
5565 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5566 split->probability = profile_probability::unlikely ().guessed ();
5567 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5568 latch_edge->probability = profile_probability::likely ().guessed ();
5569 }
5570 }
5571
5572 gsi = gsi_last_nondebug_bb (exit_bb);
5573 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5574 loc = gimple_location (gsi_stmt (gsi));
5575
5576 if (!gimple_in_ssa_p (cfun))
5577 {
5578 /* Insert the final value of V, in case it is live. This is the
5579 value for the only thread that survives past the join. */
5580 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5581 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5582 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5583 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5584 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5585 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5586 true, GSI_SAME_STMT);
5587 ass = gimple_build_assign (v, expr);
5588 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5589 }
5590
5591 /* Remove the OMP_RETURN. */
5592 gsi_remove (&gsi, true);
5593
5594 if (cont_bb)
5595 {
5596 /* We now have one, two or three nested loops. Update the loop
5597 structures. */
5598 struct loop *parent = entry_bb->loop_father;
5599 struct loop *body = body_bb->loop_father;
5600
5601 if (chunking)
5602 {
5603 struct loop *chunk_loop = alloc_loop ();
5604 chunk_loop->header = head_bb;
5605 chunk_loop->latch = bottom_bb;
5606 add_loop (chunk_loop, parent);
5607 parent = chunk_loop;
5608 }
5609 else if (parent != body)
5610 {
5611 gcc_assert (body->header == body_bb);
5612 gcc_assert (body->latch == cont_bb
5613 || single_pred (body->latch) == cont_bb);
5614 parent = NULL;
5615 }
5616
5617 if (parent)
5618 {
5619 struct loop *body_loop = alloc_loop ();
5620 body_loop->header = body_bb;
5621 body_loop->latch = cont_bb;
5622 add_loop (body_loop, parent);
5623
5624 if (fd->tiling)
5625 {
5626 /* Insert tiling's element loop. */
5627 struct loop *inner_loop = alloc_loop ();
5628 inner_loop->header = elem_body_bb;
5629 inner_loop->latch = elem_cont_bb;
5630 add_loop (inner_loop, body_loop);
5631 }
5632 }
5633 }
5634 }
5635
5636 /* Expand the OMP loop defined by REGION. */
5637
5638 static void
5639 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5640 {
5641 struct omp_for_data fd;
5642 struct omp_for_data_loop *loops;
5643
5644 loops
5645 = (struct omp_for_data_loop *)
5646 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5647 * sizeof (struct omp_for_data_loop));
5648 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5649 &fd, loops);
5650 region->sched_kind = fd.sched_kind;
5651 region->sched_modifiers = fd.sched_modifiers;
5652
5653 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5654 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5655 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5656 if (region->cont)
5657 {
5658 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5659 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5660 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5661 }
5662 else
5663 /* If there isn't a continue then this is a degerate case where
5664 the introduction of abnormal edges during lowering will prevent
5665 original loops from being detected. Fix that up. */
5666 loops_state_set (LOOPS_NEED_FIXUP);
5667
5668 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5669 expand_omp_simd (region, &fd);
5670 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5671 {
5672 gcc_assert (!inner_stmt);
5673 expand_oacc_for (region, &fd);
5674 }
5675 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5676 {
5677 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5678 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5679 else
5680 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5681 }
5682 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5683 && !fd.have_ordered)
5684 {
5685 if (fd.chunk_size == NULL)
5686 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5687 else
5688 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5689 }
5690 else
5691 {
5692 int fn_index, start_ix, next_ix;
5693
5694 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5695 == GF_OMP_FOR_KIND_FOR);
5696 if (fd.chunk_size == NULL
5697 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5698 fd.chunk_size = integer_zero_node;
5699 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5700 switch (fd.sched_kind)
5701 {
5702 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5703 fn_index = 3;
5704 break;
5705 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5706 case OMP_CLAUSE_SCHEDULE_GUIDED:
5707 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5708 && !fd.ordered
5709 && !fd.have_ordered)
5710 {
5711 fn_index = 3 + fd.sched_kind;
5712 break;
5713 }
5714 /* FALLTHRU */
5715 default:
5716 fn_index = fd.sched_kind;
5717 break;
5718 }
5719 if (!fd.ordered)
5720 fn_index += fd.have_ordered * 6;
5721 if (fd.ordered)
5722 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5723 else
5724 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5725 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5726 if (fd.iter_type == long_long_unsigned_type_node)
5727 {
5728 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5729 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5730 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5731 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5732 }
5733 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5734 (enum built_in_function) next_ix, inner_stmt);
5735 }
5736
5737 if (gimple_in_ssa_p (cfun))
5738 update_ssa (TODO_update_ssa_only_virtuals);
5739 }
5740
5741 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5742
5743 v = GOMP_sections_start (n);
5744 L0:
5745 switch (v)
5746 {
5747 case 0:
5748 goto L2;
5749 case 1:
5750 section 1;
5751 goto L1;
5752 case 2:
5753 ...
5754 case n:
5755 ...
5756 default:
5757 abort ();
5758 }
5759 L1:
5760 v = GOMP_sections_next ();
5761 goto L0;
5762 L2:
5763 reduction;
5764
5765 If this is a combined parallel sections, replace the call to
5766 GOMP_sections_start with call to GOMP_sections_next. */
5767
5768 static void
5769 expand_omp_sections (struct omp_region *region)
5770 {
5771 tree t, u, vin = NULL, vmain, vnext, l2;
5772 unsigned len;
5773 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5774 gimple_stmt_iterator si, switch_si;
5775 gomp_sections *sections_stmt;
5776 gimple *stmt;
5777 gomp_continue *cont;
5778 edge_iterator ei;
5779 edge e;
5780 struct omp_region *inner;
5781 unsigned i, casei;
5782 bool exit_reachable = region->cont != NULL;
5783
5784 gcc_assert (region->exit != NULL);
5785 entry_bb = region->entry;
5786 l0_bb = single_succ (entry_bb);
5787 l1_bb = region->cont;
5788 l2_bb = region->exit;
5789 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5790 l2 = gimple_block_label (l2_bb);
5791 else
5792 {
5793 /* This can happen if there are reductions. */
5794 len = EDGE_COUNT (l0_bb->succs);
5795 gcc_assert (len > 0);
5796 e = EDGE_SUCC (l0_bb, len - 1);
5797 si = gsi_last_nondebug_bb (e->dest);
5798 l2 = NULL_TREE;
5799 if (gsi_end_p (si)
5800 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5801 l2 = gimple_block_label (e->dest);
5802 else
5803 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5804 {
5805 si = gsi_last_nondebug_bb (e->dest);
5806 if (gsi_end_p (si)
5807 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5808 {
5809 l2 = gimple_block_label (e->dest);
5810 break;
5811 }
5812 }
5813 }
5814 if (exit_reachable)
5815 default_bb = create_empty_bb (l1_bb->prev_bb);
5816 else
5817 default_bb = create_empty_bb (l0_bb);
5818
5819 /* We will build a switch() with enough cases for all the
5820 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5821 and a default case to abort if something goes wrong. */
5822 len = EDGE_COUNT (l0_bb->succs);
5823
5824 /* Use vec::quick_push on label_vec throughout, since we know the size
5825 in advance. */
5826 auto_vec<tree> label_vec (len);
5827
5828 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5829 GIMPLE_OMP_SECTIONS statement. */
5830 si = gsi_last_nondebug_bb (entry_bb);
5831 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5832 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5833 vin = gimple_omp_sections_control (sections_stmt);
5834 if (!is_combined_parallel (region))
5835 {
5836 /* If we are not inside a combined parallel+sections region,
5837 call GOMP_sections_start. */
5838 t = build_int_cst (unsigned_type_node, len - 1);
5839 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5840 stmt = gimple_build_call (u, 1, t);
5841 }
5842 else
5843 {
5844 /* Otherwise, call GOMP_sections_next. */
5845 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5846 stmt = gimple_build_call (u, 0);
5847 }
5848 gimple_call_set_lhs (stmt, vin);
5849 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5850 gsi_remove (&si, true);
5851
5852 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5853 L0_BB. */
5854 switch_si = gsi_last_nondebug_bb (l0_bb);
5855 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5856 if (exit_reachable)
5857 {
5858 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5859 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5860 vmain = gimple_omp_continue_control_use (cont);
5861 vnext = gimple_omp_continue_control_def (cont);
5862 }
5863 else
5864 {
5865 vmain = vin;
5866 vnext = NULL_TREE;
5867 }
5868
5869 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5870 label_vec.quick_push (t);
5871 i = 1;
5872
5873 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5874 for (inner = region->inner, casei = 1;
5875 inner;
5876 inner = inner->next, i++, casei++)
5877 {
5878 basic_block s_entry_bb, s_exit_bb;
5879
5880 /* Skip optional reduction region. */
5881 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5882 {
5883 --i;
5884 --casei;
5885 continue;
5886 }
5887
5888 s_entry_bb = inner->entry;
5889 s_exit_bb = inner->exit;
5890
5891 t = gimple_block_label (s_entry_bb);
5892 u = build_int_cst (unsigned_type_node, casei);
5893 u = build_case_label (u, NULL, t);
5894 label_vec.quick_push (u);
5895
5896 si = gsi_last_nondebug_bb (s_entry_bb);
5897 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5898 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5899 gsi_remove (&si, true);
5900 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5901
5902 if (s_exit_bb == NULL)
5903 continue;
5904
5905 si = gsi_last_nondebug_bb (s_exit_bb);
5906 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5907 gsi_remove (&si, true);
5908
5909 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5910 }
5911
5912 /* Error handling code goes in DEFAULT_BB. */
5913 t = gimple_block_label (default_bb);
5914 u = build_case_label (NULL, NULL, t);
5915 make_edge (l0_bb, default_bb, 0);
5916 add_bb_to_loop (default_bb, current_loops->tree_root);
5917
5918 stmt = gimple_build_switch (vmain, u, label_vec);
5919 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5920 gsi_remove (&switch_si, true);
5921
5922 si = gsi_start_bb (default_bb);
5923 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5924 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5925
5926 if (exit_reachable)
5927 {
5928 tree bfn_decl;
5929
5930 /* Code to get the next section goes in L1_BB. */
5931 si = gsi_last_nondebug_bb (l1_bb);
5932 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5933
5934 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5935 stmt = gimple_build_call (bfn_decl, 0);
5936 gimple_call_set_lhs (stmt, vnext);
5937 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5938 gsi_remove (&si, true);
5939
5940 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5941 }
5942
5943 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5944 si = gsi_last_nondebug_bb (l2_bb);
5945 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5946 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5947 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5948 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5949 else
5950 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5951 stmt = gimple_build_call (t, 0);
5952 if (gimple_omp_return_lhs (gsi_stmt (si)))
5953 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5954 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5955 gsi_remove (&si, true);
5956
5957 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5958 }
5959
5960 /* Expand code for an OpenMP single directive. We've already expanded
5961 much of the code, here we simply place the GOMP_barrier call. */
5962
5963 static void
5964 expand_omp_single (struct omp_region *region)
5965 {
5966 basic_block entry_bb, exit_bb;
5967 gimple_stmt_iterator si;
5968
5969 entry_bb = region->entry;
5970 exit_bb = region->exit;
5971
5972 si = gsi_last_nondebug_bb (entry_bb);
5973 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5974 gsi_remove (&si, true);
5975 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5976
5977 si = gsi_last_nondebug_bb (exit_bb);
5978 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5979 {
5980 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5981 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5982 }
5983 gsi_remove (&si, true);
5984 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5985 }
5986
5987 /* Generic expansion for OpenMP synchronization directives: master,
5988 ordered and critical. All we need to do here is remove the entry
5989 and exit markers for REGION. */
5990
5991 static void
5992 expand_omp_synch (struct omp_region *region)
5993 {
5994 basic_block entry_bb, exit_bb;
5995 gimple_stmt_iterator si;
5996
5997 entry_bb = region->entry;
5998 exit_bb = region->exit;
5999
6000 si = gsi_last_nondebug_bb (entry_bb);
6001 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6003 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6004 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6005 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6006 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6007 gsi_remove (&si, true);
6008 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6009
6010 if (exit_bb)
6011 {
6012 si = gsi_last_nondebug_bb (exit_bb);
6013 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6014 gsi_remove (&si, true);
6015 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6016 }
6017 }
6018
6019 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6020 operation as a normal volatile load. */
6021
6022 static bool
6023 expand_omp_atomic_load (basic_block load_bb, tree addr,
6024 tree loaded_val, int index)
6025 {
6026 enum built_in_function tmpbase;
6027 gimple_stmt_iterator gsi;
6028 basic_block store_bb;
6029 location_t loc;
6030 gimple *stmt;
6031 tree decl, call, type, itype;
6032
6033 gsi = gsi_last_nondebug_bb (load_bb);
6034 stmt = gsi_stmt (gsi);
6035 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6036 loc = gimple_location (stmt);
6037
6038 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6039 is smaller than word size, then expand_atomic_load assumes that the load
6040 is atomic. We could avoid the builtin entirely in this case. */
6041
6042 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6043 decl = builtin_decl_explicit (tmpbase);
6044 if (decl == NULL_TREE)
6045 return false;
6046
6047 type = TREE_TYPE (loaded_val);
6048 itype = TREE_TYPE (TREE_TYPE (decl));
6049
6050 call = build_call_expr_loc (loc, decl, 2, addr,
6051 build_int_cst (NULL,
6052 gimple_omp_atomic_seq_cst_p (stmt)
6053 ? MEMMODEL_SEQ_CST
6054 : MEMMODEL_RELAXED));
6055 if (!useless_type_conversion_p (type, itype))
6056 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6057 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6058
6059 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6060 gsi_remove (&gsi, true);
6061
6062 store_bb = single_succ (load_bb);
6063 gsi = gsi_last_nondebug_bb (store_bb);
6064 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6065 gsi_remove (&gsi, true);
6066
6067 if (gimple_in_ssa_p (cfun))
6068 update_ssa (TODO_update_ssa_no_phi);
6069
6070 return true;
6071 }
6072
6073 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6074 operation as a normal volatile store. */
6075
6076 static bool
6077 expand_omp_atomic_store (basic_block load_bb, tree addr,
6078 tree loaded_val, tree stored_val, int index)
6079 {
6080 enum built_in_function tmpbase;
6081 gimple_stmt_iterator gsi;
6082 basic_block store_bb = single_succ (load_bb);
6083 location_t loc;
6084 gimple *stmt;
6085 tree decl, call, type, itype;
6086 machine_mode imode;
6087 bool exchange;
6088
6089 gsi = gsi_last_nondebug_bb (load_bb);
6090 stmt = gsi_stmt (gsi);
6091 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6092
6093 /* If the load value is needed, then this isn't a store but an exchange. */
6094 exchange = gimple_omp_atomic_need_value_p (stmt);
6095
6096 gsi = gsi_last_nondebug_bb (store_bb);
6097 stmt = gsi_stmt (gsi);
6098 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6099 loc = gimple_location (stmt);
6100
6101 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6102 is smaller than word size, then expand_atomic_store assumes that the store
6103 is atomic. We could avoid the builtin entirely in this case. */
6104
6105 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6106 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6107 decl = builtin_decl_explicit (tmpbase);
6108 if (decl == NULL_TREE)
6109 return false;
6110
6111 type = TREE_TYPE (stored_val);
6112
6113 /* Dig out the type of the function's second argument. */
6114 itype = TREE_TYPE (decl);
6115 itype = TYPE_ARG_TYPES (itype);
6116 itype = TREE_CHAIN (itype);
6117 itype = TREE_VALUE (itype);
6118 imode = TYPE_MODE (itype);
6119
6120 if (exchange && !can_atomic_exchange_p (imode, true))
6121 return false;
6122
6123 if (!useless_type_conversion_p (itype, type))
6124 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6125 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6126 build_int_cst (NULL,
6127 gimple_omp_atomic_seq_cst_p (stmt)
6128 ? MEMMODEL_SEQ_CST
6129 : MEMMODEL_RELAXED));
6130 if (exchange)
6131 {
6132 if (!useless_type_conversion_p (type, itype))
6133 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6134 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6135 }
6136
6137 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6138 gsi_remove (&gsi, true);
6139
6140 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6141 gsi = gsi_last_nondebug_bb (load_bb);
6142 gsi_remove (&gsi, true);
6143
6144 if (gimple_in_ssa_p (cfun))
6145 update_ssa (TODO_update_ssa_no_phi);
6146
6147 return true;
6148 }
6149
6150 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6151 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6152 size of the data type, and thus usable to find the index of the builtin
6153 decl. Returns false if the expression is not of the proper form. */
6154
6155 static bool
6156 expand_omp_atomic_fetch_op (basic_block load_bb,
6157 tree addr, tree loaded_val,
6158 tree stored_val, int index)
6159 {
6160 enum built_in_function oldbase, newbase, tmpbase;
6161 tree decl, itype, call;
6162 tree lhs, rhs;
6163 basic_block store_bb = single_succ (load_bb);
6164 gimple_stmt_iterator gsi;
6165 gimple *stmt;
6166 location_t loc;
6167 enum tree_code code;
6168 bool need_old, need_new;
6169 machine_mode imode;
6170 bool seq_cst;
6171
6172 /* We expect to find the following sequences:
6173
6174 load_bb:
6175 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6176
6177 store_bb:
6178 val = tmp OP something; (or: something OP tmp)
6179 GIMPLE_OMP_STORE (val)
6180
6181 ???FIXME: Allow a more flexible sequence.
6182 Perhaps use data flow to pick the statements.
6183
6184 */
6185
6186 gsi = gsi_after_labels (store_bb);
6187 stmt = gsi_stmt (gsi);
6188 if (is_gimple_debug (stmt))
6189 {
6190 gsi_next_nondebug (&gsi);
6191 if (gsi_end_p (gsi))
6192 return false;
6193 stmt = gsi_stmt (gsi);
6194 }
6195 loc = gimple_location (stmt);
6196 if (!is_gimple_assign (stmt))
6197 return false;
6198 gsi_next_nondebug (&gsi);
6199 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6200 return false;
6201 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6202 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6203 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6204 gcc_checking_assert (!need_old || !need_new);
6205
6206 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6207 return false;
6208
6209 /* Check for one of the supported fetch-op operations. */
6210 code = gimple_assign_rhs_code (stmt);
6211 switch (code)
6212 {
6213 case PLUS_EXPR:
6214 case POINTER_PLUS_EXPR:
6215 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6216 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6217 break;
6218 case MINUS_EXPR:
6219 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6220 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6221 break;
6222 case BIT_AND_EXPR:
6223 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6224 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6225 break;
6226 case BIT_IOR_EXPR:
6227 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6228 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6229 break;
6230 case BIT_XOR_EXPR:
6231 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6232 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6233 break;
6234 default:
6235 return false;
6236 }
6237
6238 /* Make sure the expression is of the proper form. */
6239 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6240 rhs = gimple_assign_rhs2 (stmt);
6241 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6242 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6243 rhs = gimple_assign_rhs1 (stmt);
6244 else
6245 return false;
6246
6247 tmpbase = ((enum built_in_function)
6248 ((need_new ? newbase : oldbase) + index + 1));
6249 decl = builtin_decl_explicit (tmpbase);
6250 if (decl == NULL_TREE)
6251 return false;
6252 itype = TREE_TYPE (TREE_TYPE (decl));
6253 imode = TYPE_MODE (itype);
6254
6255 /* We could test all of the various optabs involved, but the fact of the
6256 matter is that (with the exception of i486 vs i586 and xadd) all targets
6257 that support any atomic operaton optab also implements compare-and-swap.
6258 Let optabs.c take care of expanding any compare-and-swap loop. */
6259 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6260 return false;
6261
6262 gsi = gsi_last_nondebug_bb (load_bb);
6263 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6264
6265 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6266 It only requires that the operation happen atomically. Thus we can
6267 use the RELAXED memory model. */
6268 call = build_call_expr_loc (loc, decl, 3, addr,
6269 fold_convert_loc (loc, itype, rhs),
6270 build_int_cst (NULL,
6271 seq_cst ? MEMMODEL_SEQ_CST
6272 : MEMMODEL_RELAXED));
6273
6274 if (need_old || need_new)
6275 {
6276 lhs = need_old ? loaded_val : stored_val;
6277 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6278 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6279 }
6280 else
6281 call = fold_convert_loc (loc, void_type_node, call);
6282 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6283 gsi_remove (&gsi, true);
6284
6285 gsi = gsi_last_nondebug_bb (store_bb);
6286 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6287 gsi_remove (&gsi, true);
6288 gsi = gsi_last_nondebug_bb (store_bb);
6289 stmt = gsi_stmt (gsi);
6290 gsi_remove (&gsi, true);
6291
6292 if (gimple_in_ssa_p (cfun))
6293 {
6294 release_defs (stmt);
6295 update_ssa (TODO_update_ssa_no_phi);
6296 }
6297
6298 return true;
6299 }
6300
6301 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6302
6303 oldval = *addr;
6304 repeat:
6305 newval = rhs; // with oldval replacing *addr in rhs
6306 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6307 if (oldval != newval)
6308 goto repeat;
6309
6310 INDEX is log2 of the size of the data type, and thus usable to find the
6311 index of the builtin decl. */
6312
6313 static bool
6314 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6315 tree addr, tree loaded_val, tree stored_val,
6316 int index)
6317 {
6318 tree loadedi, storedi, initial, new_storedi, old_vali;
6319 tree type, itype, cmpxchg, iaddr, atype;
6320 gimple_stmt_iterator si;
6321 basic_block loop_header = single_succ (load_bb);
6322 gimple *phi, *stmt;
6323 edge e;
6324 enum built_in_function fncode;
6325
6326 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6327 order to use the RELAXED memory model effectively. */
6328 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6329 + index + 1);
6330 cmpxchg = builtin_decl_explicit (fncode);
6331 if (cmpxchg == NULL_TREE)
6332 return false;
6333 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6334 atype = type;
6335 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6336
6337 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6338 || !can_atomic_load_p (TYPE_MODE (itype)))
6339 return false;
6340
6341 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6342 si = gsi_last_nondebug_bb (load_bb);
6343 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6344
6345 /* For floating-point values, we'll need to view-convert them to integers
6346 so that we can perform the atomic compare and swap. Simplify the
6347 following code by always setting up the "i"ntegral variables. */
6348 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6349 {
6350 tree iaddr_val;
6351
6352 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6353 true));
6354 atype = itype;
6355 iaddr_val
6356 = force_gimple_operand_gsi (&si,
6357 fold_convert (TREE_TYPE (iaddr), addr),
6358 false, NULL_TREE, true, GSI_SAME_STMT);
6359 stmt = gimple_build_assign (iaddr, iaddr_val);
6360 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6361 loadedi = create_tmp_var (itype);
6362 if (gimple_in_ssa_p (cfun))
6363 loadedi = make_ssa_name (loadedi);
6364 }
6365 else
6366 {
6367 iaddr = addr;
6368 loadedi = loaded_val;
6369 }
6370
6371 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6372 tree loaddecl = builtin_decl_explicit (fncode);
6373 if (loaddecl)
6374 initial
6375 = fold_convert (atype,
6376 build_call_expr (loaddecl, 2, iaddr,
6377 build_int_cst (NULL_TREE,
6378 MEMMODEL_RELAXED)));
6379 else
6380 {
6381 tree off
6382 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6383 true), 0);
6384 initial = build2 (MEM_REF, atype, iaddr, off);
6385 }
6386
6387 initial
6388 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6389 GSI_SAME_STMT);
6390
6391 /* Move the value to the LOADEDI temporary. */
6392 if (gimple_in_ssa_p (cfun))
6393 {
6394 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6395 phi = create_phi_node (loadedi, loop_header);
6396 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6397 initial);
6398 }
6399 else
6400 gsi_insert_before (&si,
6401 gimple_build_assign (loadedi, initial),
6402 GSI_SAME_STMT);
6403 if (loadedi != loaded_val)
6404 {
6405 gimple_stmt_iterator gsi2;
6406 tree x;
6407
6408 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6409 gsi2 = gsi_start_bb (loop_header);
6410 if (gimple_in_ssa_p (cfun))
6411 {
6412 gassign *stmt;
6413 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6414 true, GSI_SAME_STMT);
6415 stmt = gimple_build_assign (loaded_val, x);
6416 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6417 }
6418 else
6419 {
6420 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6421 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6422 true, GSI_SAME_STMT);
6423 }
6424 }
6425 gsi_remove (&si, true);
6426
6427 si = gsi_last_nondebug_bb (store_bb);
6428 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6429
6430 if (iaddr == addr)
6431 storedi = stored_val;
6432 else
6433 storedi
6434 = force_gimple_operand_gsi (&si,
6435 build1 (VIEW_CONVERT_EXPR, itype,
6436 stored_val), true, NULL_TREE, true,
6437 GSI_SAME_STMT);
6438
6439 /* Build the compare&swap statement. */
6440 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6441 new_storedi = force_gimple_operand_gsi (&si,
6442 fold_convert (TREE_TYPE (loadedi),
6443 new_storedi),
6444 true, NULL_TREE,
6445 true, GSI_SAME_STMT);
6446
6447 if (gimple_in_ssa_p (cfun))
6448 old_vali = loadedi;
6449 else
6450 {
6451 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6452 stmt = gimple_build_assign (old_vali, loadedi);
6453 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6454
6455 stmt = gimple_build_assign (loadedi, new_storedi);
6456 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6457 }
6458
6459 /* Note that we always perform the comparison as an integer, even for
6460 floating point. This allows the atomic operation to properly
6461 succeed even with NaNs and -0.0. */
6462 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6463 stmt = gimple_build_cond_empty (ne);
6464 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6465
6466 /* Update cfg. */
6467 e = single_succ_edge (store_bb);
6468 e->flags &= ~EDGE_FALLTHRU;
6469 e->flags |= EDGE_FALSE_VALUE;
6470 /* Expect no looping. */
6471 e->probability = profile_probability::guessed_always ();
6472
6473 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6474 e->probability = profile_probability::guessed_never ();
6475
6476 /* Copy the new value to loadedi (we already did that before the condition
6477 if we are not in SSA). */
6478 if (gimple_in_ssa_p (cfun))
6479 {
6480 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6481 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6482 }
6483
6484 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6485 gsi_remove (&si, true);
6486
6487 struct loop *loop = alloc_loop ();
6488 loop->header = loop_header;
6489 loop->latch = store_bb;
6490 add_loop (loop, loop_header->loop_father);
6491
6492 if (gimple_in_ssa_p (cfun))
6493 update_ssa (TODO_update_ssa_no_phi);
6494
6495 return true;
6496 }
6497
6498 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6499
6500 GOMP_atomic_start ();
6501 *addr = rhs;
6502 GOMP_atomic_end ();
6503
6504 The result is not globally atomic, but works so long as all parallel
6505 references are within #pragma omp atomic directives. According to
6506 responses received from omp@openmp.org, appears to be within spec.
6507 Which makes sense, since that's how several other compilers handle
6508 this situation as well.
6509 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6510 expanding. STORED_VAL is the operand of the matching
6511 GIMPLE_OMP_ATOMIC_STORE.
6512
6513 We replace
6514 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6515 loaded_val = *addr;
6516
6517 and replace
6518 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6519 *addr = stored_val;
6520 */
6521
6522 static bool
6523 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6524 tree addr, tree loaded_val, tree stored_val)
6525 {
6526 gimple_stmt_iterator si;
6527 gassign *stmt;
6528 tree t;
6529
6530 si = gsi_last_nondebug_bb (load_bb);
6531 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6532
6533 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6534 t = build_call_expr (t, 0);
6535 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6536
6537 tree mem = build_simple_mem_ref (addr);
6538 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6539 TREE_OPERAND (mem, 1)
6540 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6541 true),
6542 TREE_OPERAND (mem, 1));
6543 stmt = gimple_build_assign (loaded_val, mem);
6544 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6545 gsi_remove (&si, true);
6546
6547 si = gsi_last_nondebug_bb (store_bb);
6548 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6549
6550 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6551 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6552
6553 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6554 t = build_call_expr (t, 0);
6555 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6556 gsi_remove (&si, true);
6557
6558 if (gimple_in_ssa_p (cfun))
6559 update_ssa (TODO_update_ssa_no_phi);
6560 return true;
6561 }
6562
6563 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6564 using expand_omp_atomic_fetch_op. If it failed, we try to
6565 call expand_omp_atomic_pipeline, and if it fails too, the
6566 ultimate fallback is wrapping the operation in a mutex
6567 (expand_omp_atomic_mutex). REGION is the atomic region built
6568 by build_omp_regions_1(). */
6569
6570 static void
6571 expand_omp_atomic (struct omp_region *region)
6572 {
6573 basic_block load_bb = region->entry, store_bb = region->exit;
6574 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6575 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6576 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6577 tree addr = gimple_omp_atomic_load_rhs (load);
6578 tree stored_val = gimple_omp_atomic_store_val (store);
6579 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6580 HOST_WIDE_INT index;
6581
6582 /* Make sure the type is one of the supported sizes. */
6583 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6584 index = exact_log2 (index);
6585 if (index >= 0 && index <= 4)
6586 {
6587 unsigned int align = TYPE_ALIGN_UNIT (type);
6588
6589 /* __sync builtins require strict data alignment. */
6590 if (exact_log2 (align) >= index)
6591 {
6592 /* Atomic load. */
6593 scalar_mode smode;
6594 if (loaded_val == stored_val
6595 && (is_int_mode (TYPE_MODE (type), &smode)
6596 || is_float_mode (TYPE_MODE (type), &smode))
6597 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6598 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6599 return;
6600
6601 /* Atomic store. */
6602 if ((is_int_mode (TYPE_MODE (type), &smode)
6603 || is_float_mode (TYPE_MODE (type), &smode))
6604 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6605 && store_bb == single_succ (load_bb)
6606 && first_stmt (store_bb) == store
6607 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6608 stored_val, index))
6609 return;
6610
6611 /* When possible, use specialized atomic update functions. */
6612 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6613 && store_bb == single_succ (load_bb)
6614 && expand_omp_atomic_fetch_op (load_bb, addr,
6615 loaded_val, stored_val, index))
6616 return;
6617
6618 /* If we don't have specialized __sync builtins, try and implement
6619 as a compare and swap loop. */
6620 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6621 loaded_val, stored_val, index))
6622 return;
6623 }
6624 }
6625
6626 /* The ultimate fallback is wrapping the operation in a mutex. */
6627 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6628 }
6629
6630 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6631 at REGION_EXIT. */
6632
6633 static void
6634 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6635 basic_block region_exit)
6636 {
6637 struct loop *outer = region_entry->loop_father;
6638 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6639
6640 /* Don't parallelize the kernels region if it contains more than one outer
6641 loop. */
6642 unsigned int nr_outer_loops = 0;
6643 struct loop *single_outer = NULL;
6644 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6645 {
6646 gcc_assert (loop_outer (loop) == outer);
6647
6648 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6649 continue;
6650
6651 if (region_exit != NULL
6652 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6653 continue;
6654
6655 nr_outer_loops++;
6656 single_outer = loop;
6657 }
6658 if (nr_outer_loops != 1)
6659 return;
6660
6661 for (struct loop *loop = single_outer->inner;
6662 loop != NULL;
6663 loop = loop->inner)
6664 if (loop->next)
6665 return;
6666
6667 /* Mark the loops in the region. */
6668 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6669 loop->in_oacc_kernels_region = true;
6670 }
6671
6672 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6673
6674 struct GTY(()) grid_launch_attributes_trees
6675 {
6676 tree kernel_dim_array_type;
6677 tree kernel_lattrs_dimnum_decl;
6678 tree kernel_lattrs_grid_decl;
6679 tree kernel_lattrs_group_decl;
6680 tree kernel_launch_attributes_type;
6681 };
6682
6683 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6684
6685 /* Create types used to pass kernel launch attributes to target. */
6686
6687 static void
6688 grid_create_kernel_launch_attr_types (void)
6689 {
6690 if (grid_attr_trees)
6691 return;
6692 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6693
6694 tree dim_arr_index_type
6695 = build_index_type (build_int_cst (integer_type_node, 2));
6696 grid_attr_trees->kernel_dim_array_type
6697 = build_array_type (uint32_type_node, dim_arr_index_type);
6698
6699 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6700 grid_attr_trees->kernel_lattrs_dimnum_decl
6701 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6702 uint32_type_node);
6703 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6704
6705 grid_attr_trees->kernel_lattrs_grid_decl
6706 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6707 grid_attr_trees->kernel_dim_array_type);
6708 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6709 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6710 grid_attr_trees->kernel_lattrs_group_decl
6711 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6712 grid_attr_trees->kernel_dim_array_type);
6713 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6714 = grid_attr_trees->kernel_lattrs_grid_decl;
6715 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6716 "__gomp_kernel_launch_attributes",
6717 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6718 }
6719
6720 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6721 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6722 of type uint32_type_node. */
6723
6724 static void
6725 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6726 tree fld_decl, int index, tree value)
6727 {
6728 tree ref = build4 (ARRAY_REF, uint32_type_node,
6729 build3 (COMPONENT_REF,
6730 grid_attr_trees->kernel_dim_array_type,
6731 range_var, fld_decl, NULL_TREE),
6732 build_int_cst (integer_type_node, index),
6733 NULL_TREE, NULL_TREE);
6734 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6735 }
6736
6737 /* Return a tree representation of a pointer to a structure with grid and
6738 work-group size information. Statements filling that information will be
6739 inserted before GSI, TGT_STMT is the target statement which has the
6740 necessary information in it. */
6741
6742 static tree
6743 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6744 gomp_target *tgt_stmt)
6745 {
6746 grid_create_kernel_launch_attr_types ();
6747 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6748 "__kernel_launch_attrs");
6749
6750 unsigned max_dim = 0;
6751 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6752 clause;
6753 clause = OMP_CLAUSE_CHAIN (clause))
6754 {
6755 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6756 continue;
6757
6758 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6759 max_dim = MAX (dim, max_dim);
6760
6761 grid_insert_store_range_dim (gsi, lattrs,
6762 grid_attr_trees->kernel_lattrs_grid_decl,
6763 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6764 grid_insert_store_range_dim (gsi, lattrs,
6765 grid_attr_trees->kernel_lattrs_group_decl,
6766 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6767 }
6768
6769 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6770 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6771 gcc_checking_assert (max_dim <= 2);
6772 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6773 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6774 GSI_SAME_STMT);
6775 TREE_ADDRESSABLE (lattrs) = 1;
6776 return build_fold_addr_expr (lattrs);
6777 }
6778
6779 /* Build target argument identifier from the DEVICE identifier, value
6780 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6781
6782 static tree
6783 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6784 {
6785 tree t = build_int_cst (integer_type_node, device);
6786 if (subseqent_param)
6787 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6788 build_int_cst (integer_type_node,
6789 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6790 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6791 build_int_cst (integer_type_node, id));
6792 return t;
6793 }
6794
6795 /* Like above but return it in type that can be directly stored as an element
6796 of the argument array. */
6797
6798 static tree
6799 get_target_argument_identifier (int device, bool subseqent_param, int id)
6800 {
6801 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6802 return fold_convert (ptr_type_node, t);
6803 }
6804
6805 /* Return a target argument consisting of DEVICE identifier, value identifier
6806 ID, and the actual VALUE. */
6807
6808 static tree
6809 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6810 tree value)
6811 {
6812 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6813 fold_convert (integer_type_node, value),
6814 build_int_cst (unsigned_type_node,
6815 GOMP_TARGET_ARG_VALUE_SHIFT));
6816 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6817 get_target_argument_identifier_1 (device, false, id));
6818 t = fold_convert (ptr_type_node, t);
6819 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6820 }
6821
6822 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6823 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6824 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6825 arguments. */
6826
6827 static void
6828 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6829 int id, tree value, vec <tree> *args)
6830 {
6831 if (tree_fits_shwi_p (value)
6832 && tree_to_shwi (value) > -(1 << 15)
6833 && tree_to_shwi (value) < (1 << 15))
6834 args->quick_push (get_target_argument_value (gsi, device, id, value));
6835 else
6836 {
6837 args->quick_push (get_target_argument_identifier (device, true, id));
6838 value = fold_convert (ptr_type_node, value);
6839 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6840 GSI_SAME_STMT);
6841 args->quick_push (value);
6842 }
6843 }
6844
6845 /* Create an array of arguments that is then passed to GOMP_target. */
6846
6847 static tree
6848 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6849 {
6850 auto_vec <tree, 6> args;
6851 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6852 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6853 if (c)
6854 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6855 else
6856 t = integer_minus_one_node;
6857 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6858 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6859
6860 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6861 if (c)
6862 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6863 else
6864 t = integer_minus_one_node;
6865 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6866 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6867 &args);
6868
6869 /* Add HSA-specific grid sizes, if available. */
6870 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6871 OMP_CLAUSE__GRIDDIM_))
6872 {
6873 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6874 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6875 args.quick_push (t);
6876 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6877 }
6878
6879 /* Produce more, perhaps device specific, arguments here. */
6880
6881 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6882 args.length () + 1),
6883 ".omp_target_args");
6884 for (unsigned i = 0; i < args.length (); i++)
6885 {
6886 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6887 build_int_cst (integer_type_node, i),
6888 NULL_TREE, NULL_TREE);
6889 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6890 GSI_SAME_STMT);
6891 }
6892 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6893 build_int_cst (integer_type_node, args.length ()),
6894 NULL_TREE, NULL_TREE);
6895 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6896 GSI_SAME_STMT);
6897 TREE_ADDRESSABLE (argarray) = 1;
6898 return build_fold_addr_expr (argarray);
6899 }
6900
6901 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6902
6903 static void
6904 expand_omp_target (struct omp_region *region)
6905 {
6906 basic_block entry_bb, exit_bb, new_bb;
6907 struct function *child_cfun;
6908 tree child_fn, block, t;
6909 gimple_stmt_iterator gsi;
6910 gomp_target *entry_stmt;
6911 gimple *stmt;
6912 edge e;
6913 bool offloaded, data_region;
6914
6915 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6916 new_bb = region->entry;
6917
6918 offloaded = is_gimple_omp_offloaded (entry_stmt);
6919 switch (gimple_omp_target_kind (entry_stmt))
6920 {
6921 case GF_OMP_TARGET_KIND_REGION:
6922 case GF_OMP_TARGET_KIND_UPDATE:
6923 case GF_OMP_TARGET_KIND_ENTER_DATA:
6924 case GF_OMP_TARGET_KIND_EXIT_DATA:
6925 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6926 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6927 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6928 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6929 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6930 data_region = false;
6931 break;
6932 case GF_OMP_TARGET_KIND_DATA:
6933 case GF_OMP_TARGET_KIND_OACC_DATA:
6934 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6935 data_region = true;
6936 break;
6937 default:
6938 gcc_unreachable ();
6939 }
6940
6941 child_fn = NULL_TREE;
6942 child_cfun = NULL;
6943 if (offloaded)
6944 {
6945 child_fn = gimple_omp_target_child_fn (entry_stmt);
6946 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6947 }
6948
6949 /* Supported by expand_omp_taskreg, but not here. */
6950 if (child_cfun != NULL)
6951 gcc_checking_assert (!child_cfun->cfg);
6952 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6953
6954 entry_bb = region->entry;
6955 exit_bb = region->exit;
6956
6957 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6958 {
6959 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6960
6961 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6962 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6963 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6964 DECL_ATTRIBUTES (child_fn)
6965 = tree_cons (get_identifier ("oacc kernels"),
6966 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6967 }
6968
6969 if (offloaded)
6970 {
6971 unsigned srcidx, dstidx, num;
6972
6973 /* If the offloading region needs data sent from the parent
6974 function, then the very first statement (except possible
6975 tree profile counter updates) of the offloading body
6976 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6977 &.OMP_DATA_O is passed as an argument to the child function,
6978 we need to replace it with the argument as seen by the child
6979 function.
6980
6981 In most cases, this will end up being the identity assignment
6982 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6983 a function call that has been inlined, the original PARM_DECL
6984 .OMP_DATA_I may have been converted into a different local
6985 variable. In which case, we need to keep the assignment. */
6986 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6987 if (data_arg)
6988 {
6989 basic_block entry_succ_bb = single_succ (entry_bb);
6990 gimple_stmt_iterator gsi;
6991 tree arg;
6992 gimple *tgtcopy_stmt = NULL;
6993 tree sender = TREE_VEC_ELT (data_arg, 0);
6994
6995 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6996 {
6997 gcc_assert (!gsi_end_p (gsi));
6998 stmt = gsi_stmt (gsi);
6999 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7000 continue;
7001
7002 if (gimple_num_ops (stmt) == 2)
7003 {
7004 tree arg = gimple_assign_rhs1 (stmt);
7005
7006 /* We're ignoring the subcode because we're
7007 effectively doing a STRIP_NOPS. */
7008
7009 if (TREE_CODE (arg) == ADDR_EXPR
7010 && TREE_OPERAND (arg, 0) == sender)
7011 {
7012 tgtcopy_stmt = stmt;
7013 break;
7014 }
7015 }
7016 }
7017
7018 gcc_assert (tgtcopy_stmt != NULL);
7019 arg = DECL_ARGUMENTS (child_fn);
7020
7021 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7022 gsi_remove (&gsi, true);
7023 }
7024
7025 /* Declare local variables needed in CHILD_CFUN. */
7026 block = DECL_INITIAL (child_fn);
7027 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7028 /* The gimplifier could record temporaries in the offloading block
7029 rather than in containing function's local_decls chain,
7030 which would mean cgraph missed finalizing them. Do it now. */
7031 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7032 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7033 varpool_node::finalize_decl (t);
7034 DECL_SAVED_TREE (child_fn) = NULL;
7035 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7036 gimple_set_body (child_fn, NULL);
7037 TREE_USED (block) = 1;
7038
7039 /* Reset DECL_CONTEXT on function arguments. */
7040 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7041 DECL_CONTEXT (t) = child_fn;
7042
7043 /* Split ENTRY_BB at GIMPLE_*,
7044 so that it can be moved to the child function. */
7045 gsi = gsi_last_nondebug_bb (entry_bb);
7046 stmt = gsi_stmt (gsi);
7047 gcc_assert (stmt
7048 && gimple_code (stmt) == gimple_code (entry_stmt));
7049 e = split_block (entry_bb, stmt);
7050 gsi_remove (&gsi, true);
7051 entry_bb = e->dest;
7052 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7053
7054 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7055 if (exit_bb)
7056 {
7057 gsi = gsi_last_nondebug_bb (exit_bb);
7058 gcc_assert (!gsi_end_p (gsi)
7059 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7060 stmt = gimple_build_return (NULL);
7061 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7062 gsi_remove (&gsi, true);
7063 }
7064
7065 /* Make sure to generate early debug for the function before
7066 outlining anything. */
7067 if (! gimple_in_ssa_p (cfun))
7068 (*debug_hooks->early_global_decl) (cfun->decl);
7069
7070 /* Move the offloading region into CHILD_CFUN. */
7071
7072 block = gimple_block (entry_stmt);
7073
7074 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7075 if (exit_bb)
7076 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7077 /* When the OMP expansion process cannot guarantee an up-to-date
7078 loop tree arrange for the child function to fixup loops. */
7079 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7080 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7081
7082 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7083 num = vec_safe_length (child_cfun->local_decls);
7084 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7085 {
7086 t = (*child_cfun->local_decls)[srcidx];
7087 if (DECL_CONTEXT (t) == cfun->decl)
7088 continue;
7089 if (srcidx != dstidx)
7090 (*child_cfun->local_decls)[dstidx] = t;
7091 dstidx++;
7092 }
7093 if (dstidx != num)
7094 vec_safe_truncate (child_cfun->local_decls, dstidx);
7095
7096 /* Inform the callgraph about the new function. */
7097 child_cfun->curr_properties = cfun->curr_properties;
7098 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7099 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7100 cgraph_node *node = cgraph_node::get_create (child_fn);
7101 node->parallelized_function = 1;
7102 cgraph_node::add_new_function (child_fn, true);
7103
7104 /* Add the new function to the offload table. */
7105 if (ENABLE_OFFLOADING)
7106 {
7107 if (in_lto_p)
7108 DECL_PRESERVE_P (child_fn) = 1;
7109 vec_safe_push (offload_funcs, child_fn);
7110 }
7111
7112 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7113 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7114
7115 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7116 fixed in a following pass. */
7117 push_cfun (child_cfun);
7118 if (need_asm)
7119 assign_assembler_name_if_needed (child_fn);
7120 cgraph_edge::rebuild_edges ();
7121
7122 /* Some EH regions might become dead, see PR34608. If
7123 pass_cleanup_cfg isn't the first pass to happen with the
7124 new child, these dead EH edges might cause problems.
7125 Clean them up now. */
7126 if (flag_exceptions)
7127 {
7128 basic_block bb;
7129 bool changed = false;
7130
7131 FOR_EACH_BB_FN (bb, cfun)
7132 changed |= gimple_purge_dead_eh_edges (bb);
7133 if (changed)
7134 cleanup_tree_cfg ();
7135 }
7136 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7137 verify_loop_structure ();
7138 pop_cfun ();
7139
7140 if (dump_file && !gimple_in_ssa_p (cfun))
7141 {
7142 omp_any_child_fn_dumped = true;
7143 dump_function_header (dump_file, child_fn, dump_flags);
7144 dump_function_to_file (child_fn, dump_file, dump_flags);
7145 }
7146 }
7147
7148 /* Emit a library call to launch the offloading region, or do data
7149 transfers. */
7150 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7151 enum built_in_function start_ix;
7152 location_t clause_loc;
7153 unsigned int flags_i = 0;
7154
7155 switch (gimple_omp_target_kind (entry_stmt))
7156 {
7157 case GF_OMP_TARGET_KIND_REGION:
7158 start_ix = BUILT_IN_GOMP_TARGET;
7159 break;
7160 case GF_OMP_TARGET_KIND_DATA:
7161 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7162 break;
7163 case GF_OMP_TARGET_KIND_UPDATE:
7164 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7165 break;
7166 case GF_OMP_TARGET_KIND_ENTER_DATA:
7167 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7168 break;
7169 case GF_OMP_TARGET_KIND_EXIT_DATA:
7170 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7171 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7172 break;
7173 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7174 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7175 start_ix = BUILT_IN_GOACC_PARALLEL;
7176 break;
7177 case GF_OMP_TARGET_KIND_OACC_DATA:
7178 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7179 start_ix = BUILT_IN_GOACC_DATA_START;
7180 break;
7181 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7182 start_ix = BUILT_IN_GOACC_UPDATE;
7183 break;
7184 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7185 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7186 break;
7187 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7188 start_ix = BUILT_IN_GOACC_DECLARE;
7189 break;
7190 default:
7191 gcc_unreachable ();
7192 }
7193
7194 clauses = gimple_omp_target_clauses (entry_stmt);
7195
7196 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7197 library choose) and there is no conditional. */
7198 cond = NULL_TREE;
7199 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7200
7201 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7202 if (c)
7203 cond = OMP_CLAUSE_IF_EXPR (c);
7204
7205 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7206 if (c)
7207 {
7208 /* Even if we pass it to all library function calls, it is currently only
7209 defined/used for the OpenMP target ones. */
7210 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7211 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7212 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7213 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7214
7215 device = OMP_CLAUSE_DEVICE_ID (c);
7216 clause_loc = OMP_CLAUSE_LOCATION (c);
7217 }
7218 else
7219 clause_loc = gimple_location (entry_stmt);
7220
7221 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7222 if (c)
7223 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7224
7225 /* Ensure 'device' is of the correct type. */
7226 device = fold_convert_loc (clause_loc, integer_type_node, device);
7227
7228 /* If we found the clause 'if (cond)', build
7229 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7230 if (cond)
7231 {
7232 cond = gimple_boolify (cond);
7233
7234 basic_block cond_bb, then_bb, else_bb;
7235 edge e;
7236 tree tmp_var;
7237
7238 tmp_var = create_tmp_var (TREE_TYPE (device));
7239 if (offloaded)
7240 e = split_block_after_labels (new_bb);
7241 else
7242 {
7243 gsi = gsi_last_nondebug_bb (new_bb);
7244 gsi_prev (&gsi);
7245 e = split_block (new_bb, gsi_stmt (gsi));
7246 }
7247 cond_bb = e->src;
7248 new_bb = e->dest;
7249 remove_edge (e);
7250
7251 then_bb = create_empty_bb (cond_bb);
7252 else_bb = create_empty_bb (then_bb);
7253 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7254 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7255
7256 stmt = gimple_build_cond_empty (cond);
7257 gsi = gsi_last_bb (cond_bb);
7258 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7259
7260 gsi = gsi_start_bb (then_bb);
7261 stmt = gimple_build_assign (tmp_var, device);
7262 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7263
7264 gsi = gsi_start_bb (else_bb);
7265 stmt = gimple_build_assign (tmp_var,
7266 build_int_cst (integer_type_node,
7267 GOMP_DEVICE_HOST_FALLBACK));
7268 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7269
7270 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7271 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7272 add_bb_to_loop (then_bb, cond_bb->loop_father);
7273 add_bb_to_loop (else_bb, cond_bb->loop_father);
7274 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7275 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7276
7277 device = tmp_var;
7278 gsi = gsi_last_nondebug_bb (new_bb);
7279 }
7280 else
7281 {
7282 gsi = gsi_last_nondebug_bb (new_bb);
7283 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7284 true, GSI_SAME_STMT);
7285 }
7286
7287 t = gimple_omp_target_data_arg (entry_stmt);
7288 if (t == NULL)
7289 {
7290 t1 = size_zero_node;
7291 t2 = build_zero_cst (ptr_type_node);
7292 t3 = t2;
7293 t4 = t2;
7294 }
7295 else
7296 {
7297 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7298 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7299 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7300 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7301 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7302 }
7303
7304 gimple *g;
7305 bool tagging = false;
7306 /* The maximum number used by any start_ix, without varargs. */
7307 auto_vec<tree, 11> args;
7308 args.quick_push (device);
7309 if (offloaded)
7310 args.quick_push (build_fold_addr_expr (child_fn));
7311 args.quick_push (t1);
7312 args.quick_push (t2);
7313 args.quick_push (t3);
7314 args.quick_push (t4);
7315 switch (start_ix)
7316 {
7317 case BUILT_IN_GOACC_DATA_START:
7318 case BUILT_IN_GOACC_DECLARE:
7319 case BUILT_IN_GOMP_TARGET_DATA:
7320 break;
7321 case BUILT_IN_GOMP_TARGET:
7322 case BUILT_IN_GOMP_TARGET_UPDATE:
7323 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7324 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7325 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7326 if (c)
7327 depend = OMP_CLAUSE_DECL (c);
7328 else
7329 depend = build_int_cst (ptr_type_node, 0);
7330 args.quick_push (depend);
7331 if (start_ix == BUILT_IN_GOMP_TARGET)
7332 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7333 break;
7334 case BUILT_IN_GOACC_PARALLEL:
7335 oacc_set_fn_attrib (child_fn, clauses, &args);
7336 tagging = true;
7337 /* FALLTHRU */
7338 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7339 case BUILT_IN_GOACC_UPDATE:
7340 {
7341 tree t_async = NULL_TREE;
7342
7343 /* If present, use the value specified by the respective
7344 clause, making sure that is of the correct type. */
7345 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7346 if (c)
7347 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7348 integer_type_node,
7349 OMP_CLAUSE_ASYNC_EXPR (c));
7350 else if (!tagging)
7351 /* Default values for t_async. */
7352 t_async = fold_convert_loc (gimple_location (entry_stmt),
7353 integer_type_node,
7354 build_int_cst (integer_type_node,
7355 GOMP_ASYNC_SYNC));
7356 if (tagging && t_async)
7357 {
7358 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7359
7360 if (TREE_CODE (t_async) == INTEGER_CST)
7361 {
7362 /* See if we can pack the async arg in to the tag's
7363 operand. */
7364 i_async = TREE_INT_CST_LOW (t_async);
7365 if (i_async < GOMP_LAUNCH_OP_MAX)
7366 t_async = NULL_TREE;
7367 else
7368 i_async = GOMP_LAUNCH_OP_MAX;
7369 }
7370 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7371 i_async));
7372 }
7373 if (t_async)
7374 args.safe_push (t_async);
7375
7376 /* Save the argument index, and ... */
7377 unsigned t_wait_idx = args.length ();
7378 unsigned num_waits = 0;
7379 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7380 if (!tagging || c)
7381 /* ... push a placeholder. */
7382 args.safe_push (integer_zero_node);
7383
7384 for (; c; c = OMP_CLAUSE_CHAIN (c))
7385 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7386 {
7387 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7388 integer_type_node,
7389 OMP_CLAUSE_WAIT_EXPR (c)));
7390 num_waits++;
7391 }
7392
7393 if (!tagging || num_waits)
7394 {
7395 tree len;
7396
7397 /* Now that we know the number, update the placeholder. */
7398 if (tagging)
7399 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7400 else
7401 len = build_int_cst (integer_type_node, num_waits);
7402 len = fold_convert_loc (gimple_location (entry_stmt),
7403 unsigned_type_node, len);
7404 args[t_wait_idx] = len;
7405 }
7406 }
7407 break;
7408 default:
7409 gcc_unreachable ();
7410 }
7411 if (tagging)
7412 /* Push terminal marker - zero. */
7413 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7414
7415 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7416 gimple_set_location (g, gimple_location (entry_stmt));
7417 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7418 if (!offloaded)
7419 {
7420 g = gsi_stmt (gsi);
7421 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7422 gsi_remove (&gsi, true);
7423 }
7424 if (data_region && region->exit)
7425 {
7426 gsi = gsi_last_nondebug_bb (region->exit);
7427 g = gsi_stmt (gsi);
7428 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7429 gsi_remove (&gsi, true);
7430 }
7431 }
7432
7433 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7434 iteration variable derived from the thread number. INTRA_GROUP means this
7435 is an expansion of a loop iterating over work-items within a separate
7436 iteration over groups. */
7437
7438 static void
7439 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7440 {
7441 gimple_stmt_iterator gsi;
7442 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7443 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7444 == GF_OMP_FOR_KIND_GRID_LOOP);
7445 size_t collapse = gimple_omp_for_collapse (for_stmt);
7446 struct omp_for_data_loop *loops
7447 = XALLOCAVEC (struct omp_for_data_loop,
7448 gimple_omp_for_collapse (for_stmt));
7449 struct omp_for_data fd;
7450
7451 remove_edge (BRANCH_EDGE (kfor->entry));
7452 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7453
7454 gcc_assert (kfor->cont);
7455 omp_extract_for_data (for_stmt, &fd, loops);
7456
7457 gsi = gsi_start_bb (body_bb);
7458
7459 for (size_t dim = 0; dim < collapse; dim++)
7460 {
7461 tree type, itype;
7462 itype = type = TREE_TYPE (fd.loops[dim].v);
7463 if (POINTER_TYPE_P (type))
7464 itype = signed_type_for (type);
7465
7466 tree n1 = fd.loops[dim].n1;
7467 tree step = fd.loops[dim].step;
7468 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7469 true, NULL_TREE, true, GSI_SAME_STMT);
7470 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7471 true, NULL_TREE, true, GSI_SAME_STMT);
7472 tree threadid;
7473 if (gimple_omp_for_grid_group_iter (for_stmt))
7474 {
7475 gcc_checking_assert (!intra_group);
7476 threadid = build_call_expr (builtin_decl_explicit
7477 (BUILT_IN_HSA_WORKGROUPID), 1,
7478 build_int_cstu (unsigned_type_node, dim));
7479 }
7480 else if (intra_group)
7481 threadid = build_call_expr (builtin_decl_explicit
7482 (BUILT_IN_HSA_WORKITEMID), 1,
7483 build_int_cstu (unsigned_type_node, dim));
7484 else
7485 threadid = build_call_expr (builtin_decl_explicit
7486 (BUILT_IN_HSA_WORKITEMABSID), 1,
7487 build_int_cstu (unsigned_type_node, dim));
7488 threadid = fold_convert (itype, threadid);
7489 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7490 true, GSI_SAME_STMT);
7491
7492 tree startvar = fd.loops[dim].v;
7493 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7494 if (POINTER_TYPE_P (type))
7495 t = fold_build_pointer_plus (n1, t);
7496 else
7497 t = fold_build2 (PLUS_EXPR, type, t, n1);
7498 t = fold_convert (type, t);
7499 t = force_gimple_operand_gsi (&gsi, t,
7500 DECL_P (startvar)
7501 && TREE_ADDRESSABLE (startvar),
7502 NULL_TREE, true, GSI_SAME_STMT);
7503 gassign *assign_stmt = gimple_build_assign (startvar, t);
7504 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7505 }
7506 /* Remove the omp for statement. */
7507 gsi = gsi_last_nondebug_bb (kfor->entry);
7508 gsi_remove (&gsi, true);
7509
7510 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7511 gsi = gsi_last_nondebug_bb (kfor->cont);
7512 gcc_assert (!gsi_end_p (gsi)
7513 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7514 gsi_remove (&gsi, true);
7515
7516 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7517 gsi = gsi_last_nondebug_bb (kfor->exit);
7518 gcc_assert (!gsi_end_p (gsi)
7519 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7520 if (intra_group)
7521 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7522 gsi_remove (&gsi, true);
7523
7524 /* Fixup the much simpler CFG. */
7525 remove_edge (find_edge (kfor->cont, body_bb));
7526
7527 if (kfor->cont != body_bb)
7528 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7529 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7530 }
7531
7532 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7533 argument_decls. */
7534
7535 struct grid_arg_decl_map
7536 {
7537 tree old_arg;
7538 tree new_arg;
7539 };
7540
7541 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7542 pertaining to kernel function. */
7543
7544 static tree
7545 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7546 {
7547 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7548 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7549 tree t = *tp;
7550
7551 if (t == adm->old_arg)
7552 *tp = adm->new_arg;
7553 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7554 return NULL_TREE;
7555 }
7556
7557 /* If TARGET region contains a kernel body for loop, remove its region from the
7558 TARGET and expand it in HSA gridified kernel fashion. */
7559
7560 static void
7561 grid_expand_target_grid_body (struct omp_region *target)
7562 {
7563 if (!hsa_gen_requested_p ())
7564 return;
7565
7566 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7567 struct omp_region **pp;
7568
7569 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7570 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7571 break;
7572
7573 struct omp_region *gpukernel = *pp;
7574
7575 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7576 if (!gpukernel)
7577 {
7578 /* HSA cannot handle OACC stuff. */
7579 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7580 return;
7581 gcc_checking_assert (orig_child_fndecl);
7582 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7583 OMP_CLAUSE__GRIDDIM_));
7584 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7585
7586 hsa_register_kernel (n);
7587 return;
7588 }
7589
7590 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7591 OMP_CLAUSE__GRIDDIM_));
7592 tree inside_block
7593 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7594 *pp = gpukernel->next;
7595 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7596 if ((*pp)->type == GIMPLE_OMP_FOR)
7597 break;
7598
7599 struct omp_region *kfor = *pp;
7600 gcc_assert (kfor);
7601 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7602 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7603 *pp = kfor->next;
7604 if (kfor->inner)
7605 {
7606 if (gimple_omp_for_grid_group_iter (for_stmt))
7607 {
7608 struct omp_region **next_pp;
7609 for (pp = &kfor->inner; *pp; pp = next_pp)
7610 {
7611 next_pp = &(*pp)->next;
7612 if ((*pp)->type != GIMPLE_OMP_FOR)
7613 continue;
7614 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7615 gcc_assert (gimple_omp_for_kind (inner)
7616 == GF_OMP_FOR_KIND_GRID_LOOP);
7617 grid_expand_omp_for_loop (*pp, true);
7618 *pp = (*pp)->next;
7619 next_pp = pp;
7620 }
7621 }
7622 expand_omp (kfor->inner);
7623 }
7624 if (gpukernel->inner)
7625 expand_omp (gpukernel->inner);
7626
7627 tree kern_fndecl = copy_node (orig_child_fndecl);
7628 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7629 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7630 tree tgtblock = gimple_block (tgt_stmt);
7631 tree fniniblock = make_node (BLOCK);
7632 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7633 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7634 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7635 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7636 DECL_INITIAL (kern_fndecl) = fniniblock;
7637 push_struct_function (kern_fndecl);
7638 cfun->function_end_locus = gimple_location (tgt_stmt);
7639 init_tree_ssa (cfun);
7640 pop_cfun ();
7641
7642 /* Make sure to generate early debug for the function before
7643 outlining anything. */
7644 if (! gimple_in_ssa_p (cfun))
7645 (*debug_hooks->early_global_decl) (cfun->decl);
7646
7647 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7648 gcc_assert (!DECL_CHAIN (old_parm_decl));
7649 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7650 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7651 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7652 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7653 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7654 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7655 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7656 kern_cfun->curr_properties = cfun->curr_properties;
7657
7658 grid_expand_omp_for_loop (kfor, false);
7659
7660 /* Remove the omp for statement. */
7661 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7662 gsi_remove (&gsi, true);
7663 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7664 return. */
7665 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7666 gcc_assert (!gsi_end_p (gsi)
7667 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7668 gimple *ret_stmt = gimple_build_return (NULL);
7669 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7670 gsi_remove (&gsi, true);
7671
7672 /* Statements in the first BB in the target construct have been produced by
7673 target lowering and must be copied inside the GPUKERNEL, with the two
7674 exceptions of the first OMP statement and the OMP_DATA assignment
7675 statement. */
7676 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7677 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7678 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7679 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7680 !gsi_end_p (tsi); gsi_next (&tsi))
7681 {
7682 gimple *stmt = gsi_stmt (tsi);
7683 if (is_gimple_omp (stmt))
7684 break;
7685 if (sender
7686 && is_gimple_assign (stmt)
7687 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7688 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7689 continue;
7690 gimple *copy = gimple_copy (stmt);
7691 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7692 gimple_set_block (copy, fniniblock);
7693 }
7694
7695 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7696 gpukernel->exit, inside_block);
7697
7698 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7699 kcn->mark_force_output ();
7700 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7701
7702 hsa_register_kernel (kcn, orig_child);
7703
7704 cgraph_node::add_new_function (kern_fndecl, true);
7705 push_cfun (kern_cfun);
7706 cgraph_edge::rebuild_edges ();
7707
7708 /* Re-map any mention of the PARM_DECL of the original function to the
7709 PARM_DECL of the new one.
7710
7711 TODO: It would be great if lowering produced references into the GPU
7712 kernel decl straight away and we did not have to do this. */
7713 struct grid_arg_decl_map adm;
7714 adm.old_arg = old_parm_decl;
7715 adm.new_arg = new_parm_decl;
7716 basic_block bb;
7717 FOR_EACH_BB_FN (bb, kern_cfun)
7718 {
7719 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7720 {
7721 gimple *stmt = gsi_stmt (gsi);
7722 struct walk_stmt_info wi;
7723 memset (&wi, 0, sizeof (wi));
7724 wi.info = &adm;
7725 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7726 }
7727 }
7728 pop_cfun ();
7729
7730 return;
7731 }
7732
7733 /* Expand the parallel region tree rooted at REGION. Expansion
7734 proceeds in depth-first order. Innermost regions are expanded
7735 first. This way, parallel regions that require a new function to
7736 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7737 internal dependencies in their body. */
7738
7739 static void
7740 expand_omp (struct omp_region *region)
7741 {
7742 omp_any_child_fn_dumped = false;
7743 while (region)
7744 {
7745 location_t saved_location;
7746 gimple *inner_stmt = NULL;
7747
7748 /* First, determine whether this is a combined parallel+workshare
7749 region. */
7750 if (region->type == GIMPLE_OMP_PARALLEL)
7751 determine_parallel_type (region);
7752 else if (region->type == GIMPLE_OMP_TARGET)
7753 grid_expand_target_grid_body (region);
7754
7755 if (region->type == GIMPLE_OMP_FOR
7756 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7757 inner_stmt = last_stmt (region->inner->entry);
7758
7759 if (region->inner)
7760 expand_omp (region->inner);
7761
7762 saved_location = input_location;
7763 if (gimple_has_location (last_stmt (region->entry)))
7764 input_location = gimple_location (last_stmt (region->entry));
7765
7766 switch (region->type)
7767 {
7768 case GIMPLE_OMP_PARALLEL:
7769 case GIMPLE_OMP_TASK:
7770 expand_omp_taskreg (region);
7771 break;
7772
7773 case GIMPLE_OMP_FOR:
7774 expand_omp_for (region, inner_stmt);
7775 break;
7776
7777 case GIMPLE_OMP_SECTIONS:
7778 expand_omp_sections (region);
7779 break;
7780
7781 case GIMPLE_OMP_SECTION:
7782 /* Individual omp sections are handled together with their
7783 parent GIMPLE_OMP_SECTIONS region. */
7784 break;
7785
7786 case GIMPLE_OMP_SINGLE:
7787 expand_omp_single (region);
7788 break;
7789
7790 case GIMPLE_OMP_ORDERED:
7791 {
7792 gomp_ordered *ord_stmt
7793 = as_a <gomp_ordered *> (last_stmt (region->entry));
7794 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7795 OMP_CLAUSE_DEPEND))
7796 {
7797 /* We'll expand these when expanding corresponding
7798 worksharing region with ordered(n) clause. */
7799 gcc_assert (region->outer
7800 && region->outer->type == GIMPLE_OMP_FOR);
7801 region->ord_stmt = ord_stmt;
7802 break;
7803 }
7804 }
7805 /* FALLTHRU */
7806 case GIMPLE_OMP_MASTER:
7807 case GIMPLE_OMP_TASKGROUP:
7808 case GIMPLE_OMP_CRITICAL:
7809 case GIMPLE_OMP_TEAMS:
7810 expand_omp_synch (region);
7811 break;
7812
7813 case GIMPLE_OMP_ATOMIC_LOAD:
7814 expand_omp_atomic (region);
7815 break;
7816
7817 case GIMPLE_OMP_TARGET:
7818 expand_omp_target (region);
7819 break;
7820
7821 default:
7822 gcc_unreachable ();
7823 }
7824
7825 input_location = saved_location;
7826 region = region->next;
7827 }
7828 if (omp_any_child_fn_dumped)
7829 {
7830 if (dump_file)
7831 dump_function_header (dump_file, current_function_decl, dump_flags);
7832 omp_any_child_fn_dumped = false;
7833 }
7834 }
7835
7836 /* Helper for build_omp_regions. Scan the dominator tree starting at
7837 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7838 true, the function ends once a single tree is built (otherwise, whole
7839 forest of OMP constructs may be built). */
7840
7841 static void
7842 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7843 bool single_tree)
7844 {
7845 gimple_stmt_iterator gsi;
7846 gimple *stmt;
7847 basic_block son;
7848
7849 gsi = gsi_last_nondebug_bb (bb);
7850 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7851 {
7852 struct omp_region *region;
7853 enum gimple_code code;
7854
7855 stmt = gsi_stmt (gsi);
7856 code = gimple_code (stmt);
7857 if (code == GIMPLE_OMP_RETURN)
7858 {
7859 /* STMT is the return point out of region PARENT. Mark it
7860 as the exit point and make PARENT the immediately
7861 enclosing region. */
7862 gcc_assert (parent);
7863 region = parent;
7864 region->exit = bb;
7865 parent = parent->outer;
7866 }
7867 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7868 {
7869 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7870 GIMPLE_OMP_RETURN, but matches with
7871 GIMPLE_OMP_ATOMIC_LOAD. */
7872 gcc_assert (parent);
7873 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7874 region = parent;
7875 region->exit = bb;
7876 parent = parent->outer;
7877 }
7878 else if (code == GIMPLE_OMP_CONTINUE)
7879 {
7880 gcc_assert (parent);
7881 parent->cont = bb;
7882 }
7883 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7884 {
7885 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7886 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7887 }
7888 else
7889 {
7890 region = new_omp_region (bb, code, parent);
7891 /* Otherwise... */
7892 if (code == GIMPLE_OMP_TARGET)
7893 {
7894 switch (gimple_omp_target_kind (stmt))
7895 {
7896 case GF_OMP_TARGET_KIND_REGION:
7897 case GF_OMP_TARGET_KIND_DATA:
7898 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7899 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7900 case GF_OMP_TARGET_KIND_OACC_DATA:
7901 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7902 break;
7903 case GF_OMP_TARGET_KIND_UPDATE:
7904 case GF_OMP_TARGET_KIND_ENTER_DATA:
7905 case GF_OMP_TARGET_KIND_EXIT_DATA:
7906 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7907 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7908 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7909 /* ..., other than for those stand-alone directives... */
7910 region = NULL;
7911 break;
7912 default:
7913 gcc_unreachable ();
7914 }
7915 }
7916 else if (code == GIMPLE_OMP_ORDERED
7917 && omp_find_clause (gimple_omp_ordered_clauses
7918 (as_a <gomp_ordered *> (stmt)),
7919 OMP_CLAUSE_DEPEND))
7920 /* #pragma omp ordered depend is also just a stand-alone
7921 directive. */
7922 region = NULL;
7923 /* ..., this directive becomes the parent for a new region. */
7924 if (region)
7925 parent = region;
7926 }
7927 }
7928
7929 if (single_tree && !parent)
7930 return;
7931
7932 for (son = first_dom_son (CDI_DOMINATORS, bb);
7933 son;
7934 son = next_dom_son (CDI_DOMINATORS, son))
7935 build_omp_regions_1 (son, parent, single_tree);
7936 }
7937
7938 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7939 root_omp_region. */
7940
7941 static void
7942 build_omp_regions_root (basic_block root)
7943 {
7944 gcc_assert (root_omp_region == NULL);
7945 build_omp_regions_1 (root, NULL, true);
7946 gcc_assert (root_omp_region != NULL);
7947 }
7948
7949 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7950
7951 void
7952 omp_expand_local (basic_block head)
7953 {
7954 build_omp_regions_root (head);
7955 if (dump_file && (dump_flags & TDF_DETAILS))
7956 {
7957 fprintf (dump_file, "\nOMP region tree\n\n");
7958 dump_omp_region (dump_file, root_omp_region, 0);
7959 fprintf (dump_file, "\n");
7960 }
7961
7962 remove_exit_barriers (root_omp_region);
7963 expand_omp (root_omp_region);
7964
7965 omp_free_regions ();
7966 }
7967
7968 /* Scan the CFG and build a tree of OMP regions. Return the root of
7969 the OMP region tree. */
7970
7971 static void
7972 build_omp_regions (void)
7973 {
7974 gcc_assert (root_omp_region == NULL);
7975 calculate_dominance_info (CDI_DOMINATORS);
7976 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7977 }
7978
7979 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7980
7981 static unsigned int
7982 execute_expand_omp (void)
7983 {
7984 build_omp_regions ();
7985
7986 if (!root_omp_region)
7987 return 0;
7988
7989 if (dump_file)
7990 {
7991 fprintf (dump_file, "\nOMP region tree\n\n");
7992 dump_omp_region (dump_file, root_omp_region, 0);
7993 fprintf (dump_file, "\n");
7994 }
7995
7996 remove_exit_barriers (root_omp_region);
7997
7998 expand_omp (root_omp_region);
7999
8000 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8001 verify_loop_structure ();
8002 cleanup_tree_cfg ();
8003
8004 omp_free_regions ();
8005
8006 return 0;
8007 }
8008
8009 /* OMP expansion -- the default pass, run before creation of SSA form. */
8010
8011 namespace {
8012
8013 const pass_data pass_data_expand_omp =
8014 {
8015 GIMPLE_PASS, /* type */
8016 "ompexp", /* name */
8017 OPTGROUP_OMP, /* optinfo_flags */
8018 TV_NONE, /* tv_id */
8019 PROP_gimple_any, /* properties_required */
8020 PROP_gimple_eomp, /* properties_provided */
8021 0, /* properties_destroyed */
8022 0, /* todo_flags_start */
8023 0, /* todo_flags_finish */
8024 };
8025
8026 class pass_expand_omp : public gimple_opt_pass
8027 {
8028 public:
8029 pass_expand_omp (gcc::context *ctxt)
8030 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8031 {}
8032
8033 /* opt_pass methods: */
8034 virtual unsigned int execute (function *)
8035 {
8036 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8037 || flag_openmp_simd != 0)
8038 && !seen_error ());
8039
8040 /* This pass always runs, to provide PROP_gimple_eomp.
8041 But often, there is nothing to do. */
8042 if (!gate)
8043 return 0;
8044
8045 return execute_expand_omp ();
8046 }
8047
8048 }; // class pass_expand_omp
8049
8050 } // anon namespace
8051
8052 gimple_opt_pass *
8053 make_pass_expand_omp (gcc::context *ctxt)
8054 {
8055 return new pass_expand_omp (ctxt);
8056 }
8057
8058 namespace {
8059
8060 const pass_data pass_data_expand_omp_ssa =
8061 {
8062 GIMPLE_PASS, /* type */
8063 "ompexpssa", /* name */
8064 OPTGROUP_OMP, /* optinfo_flags */
8065 TV_NONE, /* tv_id */
8066 PROP_cfg | PROP_ssa, /* properties_required */
8067 PROP_gimple_eomp, /* properties_provided */
8068 0, /* properties_destroyed */
8069 0, /* todo_flags_start */
8070 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8071 };
8072
8073 class pass_expand_omp_ssa : public gimple_opt_pass
8074 {
8075 public:
8076 pass_expand_omp_ssa (gcc::context *ctxt)
8077 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8078 {}
8079
8080 /* opt_pass methods: */
8081 virtual bool gate (function *fun)
8082 {
8083 return !(fun->curr_properties & PROP_gimple_eomp);
8084 }
8085 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8086 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8087
8088 }; // class pass_expand_omp_ssa
8089
8090 } // anon namespace
8091
8092 gimple_opt_pass *
8093 make_pass_expand_omp_ssa (gcc::context *ctxt)
8094 {
8095 return new pass_expand_omp_ssa (ctxt);
8096 }
8097
8098 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8099 GIMPLE_* codes. */
8100
8101 bool
8102 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8103 int *region_idx)
8104 {
8105 gimple *last = last_stmt (bb);
8106 enum gimple_code code = gimple_code (last);
8107 struct omp_region *cur_region = *region;
8108 bool fallthru = false;
8109
8110 switch (code)
8111 {
8112 case GIMPLE_OMP_PARALLEL:
8113 case GIMPLE_OMP_TASK:
8114 case GIMPLE_OMP_FOR:
8115 case GIMPLE_OMP_SINGLE:
8116 case GIMPLE_OMP_TEAMS:
8117 case GIMPLE_OMP_MASTER:
8118 case GIMPLE_OMP_TASKGROUP:
8119 case GIMPLE_OMP_CRITICAL:
8120 case GIMPLE_OMP_SECTION:
8121 case GIMPLE_OMP_GRID_BODY:
8122 cur_region = new_omp_region (bb, code, cur_region);
8123 fallthru = true;
8124 break;
8125
8126 case GIMPLE_OMP_ORDERED:
8127 cur_region = new_omp_region (bb, code, cur_region);
8128 fallthru = true;
8129 if (omp_find_clause (gimple_omp_ordered_clauses
8130 (as_a <gomp_ordered *> (last)),
8131 OMP_CLAUSE_DEPEND))
8132 cur_region = cur_region->outer;
8133 break;
8134
8135 case GIMPLE_OMP_TARGET:
8136 cur_region = new_omp_region (bb, code, cur_region);
8137 fallthru = true;
8138 switch (gimple_omp_target_kind (last))
8139 {
8140 case GF_OMP_TARGET_KIND_REGION:
8141 case GF_OMP_TARGET_KIND_DATA:
8142 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8143 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8144 case GF_OMP_TARGET_KIND_OACC_DATA:
8145 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8146 break;
8147 case GF_OMP_TARGET_KIND_UPDATE:
8148 case GF_OMP_TARGET_KIND_ENTER_DATA:
8149 case GF_OMP_TARGET_KIND_EXIT_DATA:
8150 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8151 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8152 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8153 cur_region = cur_region->outer;
8154 break;
8155 default:
8156 gcc_unreachable ();
8157 }
8158 break;
8159
8160 case GIMPLE_OMP_SECTIONS:
8161 cur_region = new_omp_region (bb, code, cur_region);
8162 fallthru = true;
8163 break;
8164
8165 case GIMPLE_OMP_SECTIONS_SWITCH:
8166 fallthru = false;
8167 break;
8168
8169 case GIMPLE_OMP_ATOMIC_LOAD:
8170 case GIMPLE_OMP_ATOMIC_STORE:
8171 fallthru = true;
8172 break;
8173
8174 case GIMPLE_OMP_RETURN:
8175 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8176 somewhere other than the next block. This will be
8177 created later. */
8178 cur_region->exit = bb;
8179 if (cur_region->type == GIMPLE_OMP_TASK)
8180 /* Add an edge corresponding to not scheduling the task
8181 immediately. */
8182 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8183 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8184 cur_region = cur_region->outer;
8185 break;
8186
8187 case GIMPLE_OMP_CONTINUE:
8188 cur_region->cont = bb;
8189 switch (cur_region->type)
8190 {
8191 case GIMPLE_OMP_FOR:
8192 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8193 succs edges as abnormal to prevent splitting
8194 them. */
8195 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8196 /* Make the loopback edge. */
8197 make_edge (bb, single_succ (cur_region->entry),
8198 EDGE_ABNORMAL);
8199
8200 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8201 corresponds to the case that the body of the loop
8202 is not executed at all. */
8203 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8204 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8205 fallthru = false;
8206 break;
8207
8208 case GIMPLE_OMP_SECTIONS:
8209 /* Wire up the edges into and out of the nested sections. */
8210 {
8211 basic_block switch_bb = single_succ (cur_region->entry);
8212
8213 struct omp_region *i;
8214 for (i = cur_region->inner; i ; i = i->next)
8215 {
8216 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8217 make_edge (switch_bb, i->entry, 0);
8218 make_edge (i->exit, bb, EDGE_FALLTHRU);
8219 }
8220
8221 /* Make the loopback edge to the block with
8222 GIMPLE_OMP_SECTIONS_SWITCH. */
8223 make_edge (bb, switch_bb, 0);
8224
8225 /* Make the edge from the switch to exit. */
8226 make_edge (switch_bb, bb->next_bb, 0);
8227 fallthru = false;
8228 }
8229 break;
8230
8231 case GIMPLE_OMP_TASK:
8232 fallthru = true;
8233 break;
8234
8235 default:
8236 gcc_unreachable ();
8237 }
8238 break;
8239
8240 default:
8241 gcc_unreachable ();
8242 }
8243
8244 if (*region != cur_region)
8245 {
8246 *region = cur_region;
8247 if (cur_region)
8248 *region_idx = cur_region->entry->index;
8249 else
8250 *region_idx = 0;
8251 }
8252
8253 return fallthru;
8254 }
8255
8256 #include "gt-omp-expand.h"