re PR target/87532 (bad results from vec_extract(unsigned char, foo) dependent upon...
[gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66 struct omp_region
67 {
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106 };
107
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
110
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
115
116 /* Return true if REGION is a combined parallel+workshare region. */
117
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
120 {
121 return region->is_combined_parallel;
122 }
123
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
168 {
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198 }
199
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 {
206 if (!simd_schedule || integer_zerop (chunk_size))
207 return chunk_size;
208
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218 }
219
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 {
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286 }
287
288 /* Discover whether REGION is a combined parallel+workshare region. */
289
290 static void
291 determine_parallel_type (struct omp_region *region)
292 {
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
320
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
327 {
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
330
331 if (region->inner->type == GIMPLE_OMP_FOR)
332 {
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
349 return;
350 }
351 else if (region->inner->type == GIMPLE_OMP_SECTIONS
352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
353 OMP_CLAUSE__REDUCTEMP_))
354 return;
355
356 region->is_combined_parallel = true;
357 region->inner->is_combined_parallel = true;
358 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
359 }
360 }
361
362 /* Debugging dumps for parallel regions. */
363 void dump_omp_region (FILE *, struct omp_region *, int);
364 void debug_omp_region (struct omp_region *);
365 void debug_all_omp_regions (void);
366
367 /* Dump the parallel region tree rooted at REGION. */
368
369 void
370 dump_omp_region (FILE *file, struct omp_region *region, int indent)
371 {
372 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
373 gimple_code_name[region->type]);
374
375 if (region->inner)
376 dump_omp_region (file, region->inner, indent + 4);
377
378 if (region->cont)
379 {
380 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
381 region->cont->index);
382 }
383
384 if (region->exit)
385 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
386 region->exit->index);
387 else
388 fprintf (file, "%*s[no exit marker]\n", indent, "");
389
390 if (region->next)
391 dump_omp_region (file, region->next, indent);
392 }
393
394 DEBUG_FUNCTION void
395 debug_omp_region (struct omp_region *region)
396 {
397 dump_omp_region (stderr, region, 0);
398 }
399
400 DEBUG_FUNCTION void
401 debug_all_omp_regions (void)
402 {
403 dump_omp_region (stderr, root_omp_region, 0);
404 }
405
406 /* Create a new parallel region starting at STMT inside region PARENT. */
407
408 static struct omp_region *
409 new_omp_region (basic_block bb, enum gimple_code type,
410 struct omp_region *parent)
411 {
412 struct omp_region *region = XCNEW (struct omp_region);
413
414 region->outer = parent;
415 region->entry = bb;
416 region->type = type;
417
418 if (parent)
419 {
420 /* This is a nested region. Add it to the list of inner
421 regions in PARENT. */
422 region->next = parent->inner;
423 parent->inner = region;
424 }
425 else
426 {
427 /* This is a toplevel region. Add it to the list of toplevel
428 regions in ROOT_OMP_REGION. */
429 region->next = root_omp_region;
430 root_omp_region = region;
431 }
432
433 return region;
434 }
435
436 /* Release the memory associated with the region tree rooted at REGION. */
437
438 static void
439 free_omp_region_1 (struct omp_region *region)
440 {
441 struct omp_region *i, *n;
442
443 for (i = region->inner; i ; i = n)
444 {
445 n = i->next;
446 free_omp_region_1 (i);
447 }
448
449 free (region);
450 }
451
452 /* Release the memory for the entire omp region tree. */
453
454 void
455 omp_free_regions (void)
456 {
457 struct omp_region *r, *n;
458 for (r = root_omp_region; r ; r = n)
459 {
460 n = r->next;
461 free_omp_region_1 (r);
462 }
463 root_omp_region = NULL;
464 }
465
466 /* A convenience function to build an empty GIMPLE_COND with just the
467 condition. */
468
469 static gcond *
470 gimple_build_cond_empty (tree cond)
471 {
472 enum tree_code pred_code;
473 tree lhs, rhs;
474
475 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
476 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
477 }
478
479 /* Return true if a parallel REGION is within a declare target function or
480 within a target region and is not a part of a gridified target. */
481
482 static bool
483 parallel_needs_hsa_kernel_p (struct omp_region *region)
484 {
485 bool indirect = false;
486 for (region = region->outer; region; region = region->outer)
487 {
488 if (region->type == GIMPLE_OMP_PARALLEL)
489 indirect = true;
490 else if (region->type == GIMPLE_OMP_TARGET)
491 {
492 gomp_target *tgt_stmt
493 = as_a <gomp_target *> (last_stmt (region->entry));
494
495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
496 OMP_CLAUSE__GRIDDIM_))
497 return indirect;
498 else
499 return true;
500 }
501 }
502
503 if (lookup_attribute ("omp declare target",
504 DECL_ATTRIBUTES (current_function_decl)))
505 return true;
506
507 return false;
508 }
509
510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511 Add CHILD_FNDECL to decl chain of the supercontext of the block
512 ENTRY_BLOCK - this is the block which originally contained the
513 code from which CHILD_FNDECL was created.
514
515 Together, these actions ensure that the debug info for the outlined
516 function will be emitted with the correct lexical scope. */
517
518 static void
519 adjust_context_and_scope (struct omp_region *region, tree entry_block,
520 tree child_fndecl)
521 {
522 tree parent_fndecl = NULL_TREE;
523 gimple *entry_stmt;
524 /* OMP expansion expands inner regions before outer ones, so if
525 we e.g. have explicit task region nested in parallel region, when
526 expanding the task region current_function_decl will be the original
527 source function, but we actually want to use as context the child
528 function of the parallel. */
529 for (region = region->outer;
530 region && parent_fndecl == NULL_TREE; region = region->outer)
531 switch (region->type)
532 {
533 case GIMPLE_OMP_PARALLEL:
534 case GIMPLE_OMP_TASK:
535 case GIMPLE_OMP_TEAMS:
536 entry_stmt = last_stmt (region->entry);
537 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
538 break;
539 case GIMPLE_OMP_TARGET:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl
542 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
543 break;
544 default:
545 break;
546 }
547
548 if (parent_fndecl == NULL_TREE)
549 parent_fndecl = current_function_decl;
550 DECL_CONTEXT (child_fndecl) = parent_fndecl;
551
552 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
553 {
554 tree b = BLOCK_SUPERCONTEXT (entry_block);
555 if (TREE_CODE (b) == BLOCK)
556 {
557 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
558 BLOCK_VARS (b) = child_fndecl;
559 }
560 }
561 }
562
563 /* Build the function calls to GOMP_parallel etc to actually
564 generate the parallel operation. REGION is the parallel region
565 being expanded. BB is the block where to insert the code. WS_ARGS
566 will be set if this is a call to a combined parallel+workshare
567 construct, it contains the list of additional arguments needed by
568 the workshare construct. */
569
570 static void
571 expand_parallel_call (struct omp_region *region, basic_block bb,
572 gomp_parallel *entry_stmt,
573 vec<tree, va_gc> *ws_args)
574 {
575 tree t, t1, t2, val, cond, c, clauses, flags;
576 gimple_stmt_iterator gsi;
577 gimple *stmt;
578 enum built_in_function start_ix;
579 int start_ix2;
580 location_t clause_loc;
581 vec<tree, va_gc> *args;
582
583 clauses = gimple_omp_parallel_clauses (entry_stmt);
584
585 /* Determine what flavor of GOMP_parallel we will be
586 emitting. */
587 start_ix = BUILT_IN_GOMP_PARALLEL;
588 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
589 if (rtmp)
590 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
591 else if (is_combined_parallel (region))
592 {
593 switch (region->inner->type)
594 {
595 case GIMPLE_OMP_FOR:
596 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
597 switch (region->inner->sched_kind)
598 {
599 case OMP_CLAUSE_SCHEDULE_RUNTIME:
600 if ((region->inner->sched_modifiers
601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
602 start_ix2 = 6;
603 else if ((region->inner->sched_modifiers
604 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
605 start_ix2 = 7;
606 else
607 start_ix2 = 3;
608 break;
609 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
610 case OMP_CLAUSE_SCHEDULE_GUIDED:
611 if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
613 {
614 start_ix2 = 3 + region->inner->sched_kind;
615 break;
616 }
617 /* FALLTHRU */
618 default:
619 start_ix2 = region->inner->sched_kind;
620 break;
621 }
622 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
623 start_ix = (enum built_in_function) start_ix2;
624 break;
625 case GIMPLE_OMP_SECTIONS:
626 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
627 break;
628 default:
629 gcc_unreachable ();
630 }
631 }
632
633 /* By default, the value of NUM_THREADS is zero (selected at run time)
634 and there is no conditional. */
635 cond = NULL_TREE;
636 val = build_int_cst (unsigned_type_node, 0);
637 flags = build_int_cst (unsigned_type_node, 0);
638
639 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
640 if (c)
641 cond = OMP_CLAUSE_IF_EXPR (c);
642
643 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
644 if (c)
645 {
646 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
647 clause_loc = OMP_CLAUSE_LOCATION (c);
648 }
649 else
650 clause_loc = gimple_location (entry_stmt);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
653 if (c)
654 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
655
656 /* Ensure 'val' is of the correct type. */
657 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
658
659 /* If we found the clause 'if (cond)', build either
660 (cond != 0) or (cond ? val : 1u). */
661 if (cond)
662 {
663 cond = gimple_boolify (cond);
664
665 if (integer_zerop (val))
666 val = fold_build2_loc (clause_loc,
667 EQ_EXPR, unsigned_type_node, cond,
668 build_int_cst (TREE_TYPE (cond), 0));
669 else
670 {
671 basic_block cond_bb, then_bb, else_bb;
672 edge e, e_then, e_else;
673 tree tmp_then, tmp_else, tmp_join, tmp_var;
674
675 tmp_var = create_tmp_var (TREE_TYPE (val));
676 if (gimple_in_ssa_p (cfun))
677 {
678 tmp_then = make_ssa_name (tmp_var);
679 tmp_else = make_ssa_name (tmp_var);
680 tmp_join = make_ssa_name (tmp_var);
681 }
682 else
683 {
684 tmp_then = tmp_var;
685 tmp_else = tmp_var;
686 tmp_join = tmp_var;
687 }
688
689 e = split_block_after_labels (bb);
690 cond_bb = e->src;
691 bb = e->dest;
692 remove_edge (e);
693
694 then_bb = create_empty_bb (cond_bb);
695 else_bb = create_empty_bb (then_bb);
696 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
697 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
698
699 stmt = gimple_build_cond_empty (cond);
700 gsi = gsi_start_bb (cond_bb);
701 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
702
703 gsi = gsi_start_bb (then_bb);
704 expand_omp_build_assign (&gsi, tmp_then, val, true);
705
706 gsi = gsi_start_bb (else_bb);
707 expand_omp_build_assign (&gsi, tmp_else,
708 build_int_cst (unsigned_type_node, 1),
709 true);
710
711 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
712 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
713 add_bb_to_loop (then_bb, cond_bb->loop_father);
714 add_bb_to_loop (else_bb, cond_bb->loop_father);
715 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
716 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
717
718 if (gimple_in_ssa_p (cfun))
719 {
720 gphi *phi = create_phi_node (tmp_join, bb);
721 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
722 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
723 }
724
725 val = tmp_join;
726 }
727
728 gsi = gsi_start_bb (bb);
729 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
730 false, GSI_CONTINUE_LINKING);
731 }
732
733 gsi = gsi_last_nondebug_bb (bb);
734 t = gimple_omp_parallel_data_arg (entry_stmt);
735 if (t == NULL)
736 t1 = null_pointer_node;
737 else
738 t1 = build_fold_addr_expr (t);
739 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
740 t2 = build_fold_addr_expr (child_fndecl);
741
742 vec_alloc (args, 4 + vec_safe_length (ws_args));
743 args->quick_push (t2);
744 args->quick_push (t1);
745 args->quick_push (val);
746 if (ws_args)
747 args->splice (*ws_args);
748 args->quick_push (flags);
749
750 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
751 builtin_decl_explicit (start_ix), args);
752
753 if (rtmp)
754 {
755 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
756 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
757 fold_convert (type,
758 fold_convert (pointer_sized_int_node, t)));
759 }
760 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
761 false, GSI_CONTINUE_LINKING);
762
763 if (hsa_gen_requested_p ()
764 && parallel_needs_hsa_kernel_p (region))
765 {
766 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
767 hsa_register_kernel (child_cnode);
768 }
769 }
770
771 /* Build the function call to GOMP_task to actually
772 generate the task operation. BB is the block where to insert the code. */
773
774 static void
775 expand_task_call (struct omp_region *region, basic_block bb,
776 gomp_task *entry_stmt)
777 {
778 tree t1, t2, t3;
779 gimple_stmt_iterator gsi;
780 location_t loc = gimple_location (entry_stmt);
781
782 tree clauses = gimple_omp_task_clauses (entry_stmt);
783
784 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
785 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
786 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
787 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
788 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
789 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
790
791 unsigned int iflags
792 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
793 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
794 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
795
796 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
797 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
798 tree num_tasks = NULL_TREE;
799 bool ull = false;
800 if (taskloop_p)
801 {
802 gimple *g = last_stmt (region->outer->entry);
803 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
804 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
805 struct omp_for_data fd;
806 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
807 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
808 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
809 OMP_CLAUSE__LOOPTEMP_);
810 startvar = OMP_CLAUSE_DECL (startvar);
811 endvar = OMP_CLAUSE_DECL (endvar);
812 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
813 if (fd.loop.cond_code == LT_EXPR)
814 iflags |= GOMP_TASK_FLAG_UP;
815 tree tclauses = gimple_omp_for_clauses (g);
816 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
817 if (num_tasks)
818 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
819 else
820 {
821 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
822 if (num_tasks)
823 {
824 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
825 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
826 }
827 else
828 num_tasks = integer_zero_node;
829 }
830 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
831 if (ifc == NULL_TREE)
832 iflags |= GOMP_TASK_FLAG_IF;
833 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
834 iflags |= GOMP_TASK_FLAG_NOGROUP;
835 ull = fd.iter_type == long_long_unsigned_type_node;
836 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
837 iflags |= GOMP_TASK_FLAG_REDUCTION;
838 }
839 else if (priority)
840 iflags |= GOMP_TASK_FLAG_PRIORITY;
841
842 tree flags = build_int_cst (unsigned_type_node, iflags);
843
844 tree cond = boolean_true_node;
845 if (ifc)
846 {
847 if (taskloop_p)
848 {
849 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 build_int_cst (unsigned_type_node,
852 GOMP_TASK_FLAG_IF),
853 build_int_cst (unsigned_type_node, 0));
854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
855 flags, t);
856 }
857 else
858 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
859 }
860
861 if (finalc)
862 {
863 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
864 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
865 build_int_cst (unsigned_type_node,
866 GOMP_TASK_FLAG_FINAL),
867 build_int_cst (unsigned_type_node, 0));
868 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
869 }
870 if (depend)
871 depend = OMP_CLAUSE_DECL (depend);
872 else
873 depend = build_int_cst (ptr_type_node, 0);
874 if (priority)
875 priority = fold_convert (integer_type_node,
876 OMP_CLAUSE_PRIORITY_EXPR (priority));
877 else
878 priority = integer_zero_node;
879
880 gsi = gsi_last_nondebug_bb (bb);
881 tree t = gimple_omp_task_data_arg (entry_stmt);
882 if (t == NULL)
883 t2 = null_pointer_node;
884 else
885 t2 = build_fold_addr_expr_loc (loc, t);
886 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
887 t = gimple_omp_task_copy_fn (entry_stmt);
888 if (t == NULL)
889 t3 = null_pointer_node;
890 else
891 t3 = build_fold_addr_expr_loc (loc, t);
892
893 if (taskloop_p)
894 t = build_call_expr (ull
895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
897 11, t1, t2, t3,
898 gimple_omp_task_arg_size (entry_stmt),
899 gimple_omp_task_arg_align (entry_stmt), flags,
900 num_tasks, priority, startvar, endvar, step);
901 else
902 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
903 9, t1, t2, t3,
904 gimple_omp_task_arg_size (entry_stmt),
905 gimple_omp_task_arg_align (entry_stmt), cond, flags,
906 depend, priority);
907
908 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
909 false, GSI_CONTINUE_LINKING);
910 }
911
912 /* Build the function call to GOMP_taskwait_depend to actually
913 generate the taskwait operation. BB is the block where to insert the
914 code. */
915
916 static void
917 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
918 {
919 tree clauses = gimple_omp_task_clauses (entry_stmt);
920 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
921 if (depend == NULL_TREE)
922 return;
923
924 depend = OMP_CLAUSE_DECL (depend);
925
926 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
927 tree t
928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
929 1, depend);
930
931 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
932 false, GSI_CONTINUE_LINKING);
933 }
934
935 /* Build the function call to GOMP_teams_reg to actually
936 generate the host teams operation. REGION is the teams region
937 being expanded. BB is the block where to insert the code. */
938
939 static void
940 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
941 {
942 tree clauses = gimple_omp_teams_clauses (entry_stmt);
943 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
944 if (num_teams == NULL_TREE)
945 num_teams = build_int_cst (unsigned_type_node, 0);
946 else
947 {
948 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
949 num_teams = fold_convert (unsigned_type_node, num_teams);
950 }
951 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
952 if (thread_limit == NULL_TREE)
953 thread_limit = build_int_cst (unsigned_type_node, 0);
954 else
955 {
956 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
957 thread_limit = fold_convert (unsigned_type_node, thread_limit);
958 }
959
960 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
961 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
962 if (t == NULL)
963 t1 = null_pointer_node;
964 else
965 t1 = build_fold_addr_expr (t);
966 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
967 tree t2 = build_fold_addr_expr (child_fndecl);
968
969 vec<tree, va_gc> *args;
970 vec_alloc (args, 5);
971 args->quick_push (t2);
972 args->quick_push (t1);
973 args->quick_push (num_teams);
974 args->quick_push (thread_limit);
975 /* For future extensibility. */
976 args->quick_push (build_zero_cst (unsigned_type_node));
977
978 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
980 args);
981
982 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
983 false, GSI_CONTINUE_LINKING);
984 }
985
986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
987
988 static tree
989 vec2chain (vec<tree, va_gc> *v)
990 {
991 tree chain = NULL_TREE, t;
992 unsigned ix;
993
994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
995 {
996 DECL_CHAIN (t) = chain;
997 chain = t;
998 }
999
1000 return chain;
1001 }
1002
1003 /* Remove barriers in REGION->EXIT's block. Note that this is only
1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1007 removed. */
1008
1009 static void
1010 remove_exit_barrier (struct omp_region *region)
1011 {
1012 gimple_stmt_iterator gsi;
1013 basic_block exit_bb;
1014 edge_iterator ei;
1015 edge e;
1016 gimple *stmt;
1017 int any_addressable_vars = -1;
1018
1019 exit_bb = region->exit;
1020
1021 /* If the parallel region doesn't return, we don't have REGION->EXIT
1022 block at all. */
1023 if (! exit_bb)
1024 return;
1025
1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1028 statements that can appear in between are extremely limited -- no
1029 memory operations at all. Here, we allow nothing at all, so the
1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1031 gsi = gsi_last_nondebug_bb (exit_bb);
1032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1033 gsi_prev_nondebug (&gsi);
1034 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1035 return;
1036
1037 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1038 {
1039 gsi = gsi_last_nondebug_bb (e->src);
1040 if (gsi_end_p (gsi))
1041 continue;
1042 stmt = gsi_stmt (gsi);
1043 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1044 && !gimple_omp_return_nowait_p (stmt))
1045 {
1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 in many cases. If there could be tasks queued, the barrier
1048 might be needed to let the tasks run before some local
1049 variable of the parallel that the task uses as shared
1050 runs out of scope. The task can be spawned either
1051 from within current function (this would be easy to check)
1052 or from some function it calls and gets passed an address
1053 of such a variable. */
1054 if (any_addressable_vars < 0)
1055 {
1056 gomp_parallel *parallel_stmt
1057 = as_a <gomp_parallel *> (last_stmt (region->entry));
1058 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1059 tree local_decls, block, decl;
1060 unsigned ix;
1061
1062 any_addressable_vars = 0;
1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1064 if (TREE_ADDRESSABLE (decl))
1065 {
1066 any_addressable_vars = 1;
1067 break;
1068 }
1069 for (block = gimple_block (stmt);
1070 !any_addressable_vars
1071 && block
1072 && TREE_CODE (block) == BLOCK;
1073 block = BLOCK_SUPERCONTEXT (block))
1074 {
1075 for (local_decls = BLOCK_VARS (block);
1076 local_decls;
1077 local_decls = DECL_CHAIN (local_decls))
1078 if (TREE_ADDRESSABLE (local_decls))
1079 {
1080 any_addressable_vars = 1;
1081 break;
1082 }
1083 if (block == gimple_block (parallel_stmt))
1084 break;
1085 }
1086 }
1087 if (!any_addressable_vars)
1088 gimple_omp_return_set_nowait (stmt);
1089 }
1090 }
1091 }
1092
1093 static void
1094 remove_exit_barriers (struct omp_region *region)
1095 {
1096 if (region->type == GIMPLE_OMP_PARALLEL)
1097 remove_exit_barrier (region);
1098
1099 if (region->inner)
1100 {
1101 region = region->inner;
1102 remove_exit_barriers (region);
1103 while (region->next)
1104 {
1105 region = region->next;
1106 remove_exit_barriers (region);
1107 }
1108 }
1109 }
1110
1111 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112 calls. These can't be declared as const functions, but
1113 within one parallel body they are constant, so they can be
1114 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115 which are declared const. Similarly for task body, except
1116 that in untied task omp_get_thread_num () can change at any task
1117 scheduling point. */
1118
1119 static void
1120 optimize_omp_library_calls (gimple *entry_stmt)
1121 {
1122 basic_block bb;
1123 gimple_stmt_iterator gsi;
1124 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1125 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1126 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1127 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1128 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1130 OMP_CLAUSE_UNTIED) != NULL);
1131
1132 FOR_EACH_BB_FN (bb, cfun)
1133 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1134 {
1135 gimple *call = gsi_stmt (gsi);
1136 tree decl;
1137
1138 if (is_gimple_call (call)
1139 && (decl = gimple_call_fndecl (call))
1140 && DECL_EXTERNAL (decl)
1141 && TREE_PUBLIC (decl)
1142 && DECL_INITIAL (decl) == NULL)
1143 {
1144 tree built_in;
1145
1146 if (DECL_NAME (decl) == thr_num_id)
1147 {
1148 /* In #pragma omp task untied omp_get_thread_num () can change
1149 during the execution of the task region. */
1150 if (untied_task)
1151 continue;
1152 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1153 }
1154 else if (DECL_NAME (decl) == num_thr_id)
1155 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1156 else
1157 continue;
1158
1159 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1160 || gimple_call_num_args (call) != 0)
1161 continue;
1162
1163 if (flag_exceptions && !TREE_NOTHROW (decl))
1164 continue;
1165
1166 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1168 TREE_TYPE (TREE_TYPE (built_in))))
1169 continue;
1170
1171 gimple_call_set_fndecl (call, built_in);
1172 }
1173 }
1174 }
1175
1176 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1177 regimplified. */
1178
1179 static tree
1180 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1181 {
1182 tree t = *tp;
1183
1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1185 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1186 return t;
1187
1188 if (TREE_CODE (t) == ADDR_EXPR)
1189 recompute_tree_invariant_for_addr_expr (t);
1190
1191 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1192 return NULL_TREE;
1193 }
1194
1195 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1196
1197 static void
1198 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1199 bool after)
1200 {
1201 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1202 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1203 !after, after ? GSI_CONTINUE_LINKING
1204 : GSI_SAME_STMT);
1205 gimple *stmt = gimple_build_assign (to, from);
1206 if (after)
1207 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1208 else
1209 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1210 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1211 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1212 {
1213 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1214 gimple_regimplify_operands (stmt, &gsi);
1215 }
1216 }
1217
1218 /* Expand the OpenMP parallel or task directive starting at REGION. */
1219
1220 static void
1221 expand_omp_taskreg (struct omp_region *region)
1222 {
1223 basic_block entry_bb, exit_bb, new_bb;
1224 struct function *child_cfun;
1225 tree child_fn, block, t;
1226 gimple_stmt_iterator gsi;
1227 gimple *entry_stmt, *stmt;
1228 edge e;
1229 vec<tree, va_gc> *ws_args;
1230
1231 entry_stmt = last_stmt (region->entry);
1232 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1233 && gimple_omp_task_taskwait_p (entry_stmt))
1234 {
1235 new_bb = region->entry;
1236 gsi = gsi_last_nondebug_bb (region->entry);
1237 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1238 gsi_remove (&gsi, true);
1239 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1240 return;
1241 }
1242
1243 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1244 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1245
1246 entry_bb = region->entry;
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1248 exit_bb = region->cont;
1249 else
1250 exit_bb = region->exit;
1251
1252 if (is_combined_parallel (region))
1253 ws_args = region->ws_args;
1254 else
1255 ws_args = NULL;
1256
1257 if (child_cfun->cfg)
1258 {
1259 /* Due to inlining, it may happen that we have already outlined
1260 the region, in which case all we need to do is make the
1261 sub-graph unreachable and emit the parallel call. */
1262 edge entry_succ_e, exit_succ_e;
1263
1264 entry_succ_e = single_succ_edge (entry_bb);
1265
1266 gsi = gsi_last_nondebug_bb (entry_bb);
1267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1268 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1270 gsi_remove (&gsi, true);
1271
1272 new_bb = entry_bb;
1273 if (exit_bb)
1274 {
1275 exit_succ_e = single_succ_edge (exit_bb);
1276 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1277 }
1278 remove_edge_and_dominated_blocks (entry_succ_e);
1279 }
1280 else
1281 {
1282 unsigned srcidx, dstidx, num;
1283
1284 /* If the parallel region needs data sent from the parent
1285 function, then the very first statement (except possible
1286 tree profile counter updates) of the parallel body
1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1288 &.OMP_DATA_O is passed as an argument to the child function,
1289 we need to replace it with the argument as seen by the child
1290 function.
1291
1292 In most cases, this will end up being the identity assignment
1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1294 a function call that has been inlined, the original PARM_DECL
1295 .OMP_DATA_I may have been converted into a different local
1296 variable. In which case, we need to keep the assignment. */
1297 if (gimple_omp_taskreg_data_arg (entry_stmt))
1298 {
1299 basic_block entry_succ_bb
1300 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1301 : FALLTHRU_EDGE (entry_bb)->dest;
1302 tree arg;
1303 gimple *parcopy_stmt = NULL;
1304
1305 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1306 {
1307 gimple *stmt;
1308
1309 gcc_assert (!gsi_end_p (gsi));
1310 stmt = gsi_stmt (gsi);
1311 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1312 continue;
1313
1314 if (gimple_num_ops (stmt) == 2)
1315 {
1316 tree arg = gimple_assign_rhs1 (stmt);
1317
1318 /* We're ignore the subcode because we're
1319 effectively doing a STRIP_NOPS. */
1320
1321 if (TREE_CODE (arg) == ADDR_EXPR
1322 && (TREE_OPERAND (arg, 0)
1323 == gimple_omp_taskreg_data_arg (entry_stmt)))
1324 {
1325 parcopy_stmt = stmt;
1326 break;
1327 }
1328 }
1329 }
1330
1331 gcc_assert (parcopy_stmt != NULL);
1332 arg = DECL_ARGUMENTS (child_fn);
1333
1334 if (!gimple_in_ssa_p (cfun))
1335 {
1336 if (gimple_assign_lhs (parcopy_stmt) == arg)
1337 gsi_remove (&gsi, true);
1338 else
1339 {
1340 /* ?? Is setting the subcode really necessary ?? */
1341 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1342 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1343 }
1344 }
1345 else
1346 {
1347 tree lhs = gimple_assign_lhs (parcopy_stmt);
1348 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1349 /* We'd like to set the rhs to the default def in the child_fn,
1350 but it's too early to create ssa names in the child_fn.
1351 Instead, we set the rhs to the parm. In
1352 move_sese_region_to_fn, we introduce a default def for the
1353 parm, map the parm to it's default def, and once we encounter
1354 this stmt, replace the parm with the default def. */
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 update_stmt (parcopy_stmt);
1357 }
1358 }
1359
1360 /* Declare local variables needed in CHILD_CFUN. */
1361 block = DECL_INITIAL (child_fn);
1362 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1363 /* The gimplifier could record temporaries in parallel/task block
1364 rather than in containing function's local_decls chain,
1365 which would mean cgraph missed finalizing them. Do it now. */
1366 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1367 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1368 varpool_node::finalize_decl (t);
1369 DECL_SAVED_TREE (child_fn) = NULL;
1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1371 gimple_set_body (child_fn, NULL);
1372 TREE_USED (block) = 1;
1373
1374 /* Reset DECL_CONTEXT on function arguments. */
1375 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1376 DECL_CONTEXT (t) = child_fn;
1377
1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 so that it can be moved to the child function. */
1380 gsi = gsi_last_nondebug_bb (entry_bb);
1381 stmt = gsi_stmt (gsi);
1382 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1383 || gimple_code (stmt) == GIMPLE_OMP_TASK
1384 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1385 e = split_block (entry_bb, stmt);
1386 gsi_remove (&gsi, true);
1387 entry_bb = e->dest;
1388 edge e2 = NULL;
1389 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1390 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1391 else
1392 {
1393 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1394 gcc_assert (e2->dest == region->exit);
1395 remove_edge (BRANCH_EDGE (entry_bb));
1396 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1397 gsi = gsi_last_nondebug_bb (region->exit);
1398 gcc_assert (!gsi_end_p (gsi)
1399 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1400 gsi_remove (&gsi, true);
1401 }
1402
1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1404 if (exit_bb)
1405 {
1406 gsi = gsi_last_nondebug_bb (exit_bb);
1407 gcc_assert (!gsi_end_p (gsi)
1408 && (gimple_code (gsi_stmt (gsi))
1409 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1410 stmt = gimple_build_return (NULL);
1411 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Move the parallel region into CHILD_CFUN. */
1416
1417 if (gimple_in_ssa_p (cfun))
1418 {
1419 init_tree_ssa (child_cfun);
1420 init_ssa_operands (child_cfun);
1421 child_cfun->gimple_df->in_ssa_p = true;
1422 block = NULL_TREE;
1423 }
1424 else
1425 block = gimple_block (entry_stmt);
1426
1427 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1428 if (exit_bb)
1429 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1430 if (e2)
1431 {
1432 basic_block dest_bb = e2->dest;
1433 if (!exit_bb)
1434 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1435 remove_edge (e2);
1436 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1437 }
1438 /* When the OMP expansion process cannot guarantee an up-to-date
1439 loop tree arrange for the child function to fixup loops. */
1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1442
1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1444 num = vec_safe_length (child_cfun->local_decls);
1445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1446 {
1447 t = (*child_cfun->local_decls)[srcidx];
1448 if (DECL_CONTEXT (t) == cfun->decl)
1449 continue;
1450 if (srcidx != dstidx)
1451 (*child_cfun->local_decls)[dstidx] = t;
1452 dstidx++;
1453 }
1454 if (dstidx != num)
1455 vec_safe_truncate (child_cfun->local_decls, dstidx);
1456
1457 /* Inform the callgraph about the new function. */
1458 child_cfun->curr_properties = cfun->curr_properties;
1459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1461 cgraph_node *node = cgraph_node::get_create (child_fn);
1462 node->parallelized_function = 1;
1463 cgraph_node::add_new_function (child_fn, true);
1464
1465 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1467
1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1469 fixed in a following pass. */
1470 push_cfun (child_cfun);
1471 if (need_asm)
1472 assign_assembler_name_if_needed (child_fn);
1473
1474 if (optimize)
1475 optimize_omp_library_calls (entry_stmt);
1476 update_max_bb_count ();
1477 cgraph_edge::rebuild_edges ();
1478
1479 /* Some EH regions might become dead, see PR34608. If
1480 pass_cleanup_cfg isn't the first pass to happen with the
1481 new child, these dead EH edges might cause problems.
1482 Clean them up now. */
1483 if (flag_exceptions)
1484 {
1485 basic_block bb;
1486 bool changed = false;
1487
1488 FOR_EACH_BB_FN (bb, cfun)
1489 changed |= gimple_purge_dead_eh_edges (bb);
1490 if (changed)
1491 cleanup_tree_cfg ();
1492 }
1493 if (gimple_in_ssa_p (cfun))
1494 update_ssa (TODO_update_ssa);
1495 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1496 verify_loop_structure ();
1497 pop_cfun ();
1498
1499 if (dump_file && !gimple_in_ssa_p (cfun))
1500 {
1501 omp_any_child_fn_dumped = true;
1502 dump_function_header (dump_file, child_fn, dump_flags);
1503 dump_function_to_file (child_fn, dump_file, dump_flags);
1504 }
1505 }
1506
1507 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1508
1509 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1510 expand_parallel_call (region, new_bb,
1511 as_a <gomp_parallel *> (entry_stmt), ws_args);
1512 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1513 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1514 else
1515 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1516 if (gimple_in_ssa_p (cfun))
1517 update_ssa (TODO_update_ssa_only_virtuals);
1518 }
1519
1520 /* Information about members of an OpenACC collapsed loop nest. */
1521
1522 struct oacc_collapse
1523 {
1524 tree base; /* Base value. */
1525 tree iters; /* Number of steps. */
1526 tree step; /* Step size. */
1527 tree tile; /* Tile increment (if tiled). */
1528 tree outer; /* Tile iterator var. */
1529 };
1530
1531 /* Helper for expand_oacc_for. Determine collapsed loop information.
1532 Fill in COUNTS array. Emit any initialization code before GSI.
1533 Return the calculated outer loop bound of BOUND_TYPE. */
1534
1535 static tree
1536 expand_oacc_collapse_init (const struct omp_for_data *fd,
1537 gimple_stmt_iterator *gsi,
1538 oacc_collapse *counts, tree bound_type,
1539 location_t loc)
1540 {
1541 tree tiling = fd->tiling;
1542 tree total = build_int_cst (bound_type, 1);
1543 int ix;
1544
1545 gcc_assert (integer_onep (fd->loop.step));
1546 gcc_assert (integer_zerop (fd->loop.n1));
1547
1548 /* When tiling, the first operand of the tile clause applies to the
1549 innermost loop, and we work outwards from there. Seems
1550 backwards, but whatever. */
1551 for (ix = fd->collapse; ix--;)
1552 {
1553 const omp_for_data_loop *loop = &fd->loops[ix];
1554
1555 tree iter_type = TREE_TYPE (loop->v);
1556 tree diff_type = iter_type;
1557 tree plus_type = iter_type;
1558
1559 gcc_assert (loop->cond_code == fd->loop.cond_code);
1560
1561 if (POINTER_TYPE_P (iter_type))
1562 plus_type = sizetype;
1563 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1564 diff_type = signed_type_for (diff_type);
1565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1566 diff_type = integer_type_node;
1567
1568 if (tiling)
1569 {
1570 tree num = build_int_cst (integer_type_node, fd->collapse);
1571 tree loop_no = build_int_cst (integer_type_node, ix);
1572 tree tile = TREE_VALUE (tiling);
1573 gcall *call
1574 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1575 /* gwv-outer=*/integer_zero_node,
1576 /* gwv-inner=*/integer_zero_node);
1577
1578 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1579 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1580 gimple_call_set_lhs (call, counts[ix].tile);
1581 gimple_set_location (call, loc);
1582 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1583
1584 tiling = TREE_CHAIN (tiling);
1585 }
1586 else
1587 {
1588 counts[ix].tile = NULL;
1589 counts[ix].outer = loop->v;
1590 }
1591
1592 tree b = loop->n1;
1593 tree e = loop->n2;
1594 tree s = loop->step;
1595 bool up = loop->cond_code == LT_EXPR;
1596 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1597 bool negating;
1598 tree expr;
1599
1600 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1602 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1603 true, GSI_SAME_STMT);
1604
1605 /* Convert the step, avoiding possible unsigned->signed overflow. */
1606 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1607 if (negating)
1608 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1609 s = fold_convert (diff_type, s);
1610 if (negating)
1611 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1612 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614
1615 /* Determine the range, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (iter_type);
1617 expr = fold_build2 (MINUS_EXPR, plus_type,
1618 fold_convert (plus_type, negating ? b : e),
1619 fold_convert (plus_type, negating ? e : b));
1620 expr = fold_convert (diff_type, expr);
1621 if (negating)
1622 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1623 tree range = force_gimple_operand_gsi
1624 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1625
1626 /* Determine number of iterations. */
1627 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1628 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1629 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1630
1631 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1633
1634 counts[ix].base = b;
1635 counts[ix].iters = iters;
1636 counts[ix].step = s;
1637
1638 total = fold_build2 (MULT_EXPR, bound_type, total,
1639 fold_convert (bound_type, iters));
1640 }
1641
1642 return total;
1643 }
1644
1645 /* Emit initializers for collapsed loop members. INNER is true if
1646 this is for the element loop of a TILE. IVAR is the outer
1647 loop iteration variable, from which collapsed loop iteration values
1648 are calculated. COUNTS array has been initialized by
1649 expand_oacc_collapse_inits. */
1650
1651 static void
1652 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1653 gimple_stmt_iterator *gsi,
1654 const oacc_collapse *counts, tree ivar)
1655 {
1656 tree ivar_type = TREE_TYPE (ivar);
1657
1658 /* The most rapidly changing iteration variable is the innermost
1659 one. */
1660 for (int ix = fd->collapse; ix--;)
1661 {
1662 const omp_for_data_loop *loop = &fd->loops[ix];
1663 const oacc_collapse *collapse = &counts[ix];
1664 tree v = inner ? loop->v : collapse->outer;
1665 tree iter_type = TREE_TYPE (v);
1666 tree diff_type = TREE_TYPE (collapse->step);
1667 tree plus_type = iter_type;
1668 enum tree_code plus_code = PLUS_EXPR;
1669 tree expr;
1670
1671 if (POINTER_TYPE_P (iter_type))
1672 {
1673 plus_code = POINTER_PLUS_EXPR;
1674 plus_type = sizetype;
1675 }
1676
1677 expr = ivar;
1678 if (ix)
1679 {
1680 tree mod = fold_convert (ivar_type, collapse->iters);
1681 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1682 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1683 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1684 true, GSI_SAME_STMT);
1685 }
1686
1687 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1688 collapse->step);
1689 expr = fold_build2 (plus_code, iter_type,
1690 inner ? collapse->outer : collapse->base,
1691 fold_convert (plus_type, expr));
1692 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 gassign *ass = gimple_build_assign (v, expr);
1695 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1696 }
1697 }
1698
1699 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1700 of the combined collapse > 1 loop constructs, generate code like:
1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1702 if (cond3 is <)
1703 adj = STEP3 - 1;
1704 else
1705 adj = STEP3 + 1;
1706 count3 = (adj + N32 - N31) / STEP3;
1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1708 if (cond2 is <)
1709 adj = STEP2 - 1;
1710 else
1711 adj = STEP2 + 1;
1712 count2 = (adj + N22 - N21) / STEP2;
1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1714 if (cond1 is <)
1715 adj = STEP1 - 1;
1716 else
1717 adj = STEP1 + 1;
1718 count1 = (adj + N12 - N11) / STEP1;
1719 count = count1 * count2 * count3;
1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1721 count = 0;
1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1723 of the combined loop constructs, just initialize COUNTS array
1724 from the _looptemp_ clauses. */
1725
1726 /* NOTE: It *could* be better to moosh all of the BBs together,
1727 creating one larger BB with all the computation and the unexpected
1728 jump at the end. I.e.
1729
1730 bool zero3, zero2, zero1, zero;
1731
1732 zero3 = N32 c3 N31;
1733 count3 = (N32 - N31) /[cl] STEP3;
1734 zero2 = N22 c2 N21;
1735 count2 = (N22 - N21) /[cl] STEP2;
1736 zero1 = N12 c1 N11;
1737 count1 = (N12 - N11) /[cl] STEP1;
1738 zero = zero3 || zero2 || zero1;
1739 count = count1 * count2 * count3;
1740 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1741
1742 After all, we expect the zero=false, and thus we expect to have to
1743 evaluate all of the comparison expressions, so short-circuiting
1744 oughtn't be a win. Since the condition isn't protecting a
1745 denominator, we're not concerned about divide-by-zero, so we can
1746 fully evaluate count even if a numerator turned out to be wrong.
1747
1748 It seems like putting this all together would create much better
1749 scheduling opportunities, and less pressure on the chip's branch
1750 predictor. */
1751
1752 static void
1753 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1754 basic_block &entry_bb, tree *counts,
1755 basic_block &zero_iter1_bb, int &first_zero_iter1,
1756 basic_block &zero_iter2_bb, int &first_zero_iter2,
1757 basic_block &l2_dom_bb)
1758 {
1759 tree t, type = TREE_TYPE (fd->loop.v);
1760 edge e, ne;
1761 int i;
1762
1763 /* Collapsed loops need work for expansion into SSA form. */
1764 gcc_assert (!gimple_in_ssa_p (cfun));
1765
1766 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1767 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1768 {
1769 gcc_assert (fd->ordered == 0);
1770 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 isn't supposed to be handled, as the inner loop doesn't
1772 use it. */
1773 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1774 OMP_CLAUSE__LOOPTEMP_);
1775 gcc_assert (innerc);
1776 for (i = 0; i < fd->collapse; i++)
1777 {
1778 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1779 OMP_CLAUSE__LOOPTEMP_);
1780 gcc_assert (innerc);
1781 if (i)
1782 counts[i] = OMP_CLAUSE_DECL (innerc);
1783 else
1784 counts[0] = NULL_TREE;
1785 }
1786 return;
1787 }
1788
1789 for (i = fd->collapse; i < fd->ordered; i++)
1790 {
1791 tree itype = TREE_TYPE (fd->loops[i].v);
1792 counts[i] = NULL_TREE;
1793 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1794 fold_convert (itype, fd->loops[i].n1),
1795 fold_convert (itype, fd->loops[i].n2));
1796 if (t && integer_zerop (t))
1797 {
1798 for (i = fd->collapse; i < fd->ordered; i++)
1799 counts[i] = build_int_cst (type, 0);
1800 break;
1801 }
1802 }
1803 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1804 {
1805 tree itype = TREE_TYPE (fd->loops[i].v);
1806
1807 if (i >= fd->collapse && counts[i])
1808 continue;
1809 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1810 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1811 fold_convert (itype, fd->loops[i].n1),
1812 fold_convert (itype, fd->loops[i].n2)))
1813 == NULL_TREE || !integer_onep (t)))
1814 {
1815 gcond *cond_stmt;
1816 tree n1, n2;
1817 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1818 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1819 true, GSI_SAME_STMT);
1820 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1821 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1822 true, GSI_SAME_STMT);
1823 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1824 NULL_TREE, NULL_TREE);
1825 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1827 expand_omp_regimplify_p, NULL, NULL)
1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1829 expand_omp_regimplify_p, NULL, NULL))
1830 {
1831 *gsi = gsi_for_stmt (cond_stmt);
1832 gimple_regimplify_operands (cond_stmt, gsi);
1833 }
1834 e = split_block (entry_bb, cond_stmt);
1835 basic_block &zero_iter_bb
1836 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1837 int &first_zero_iter
1838 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1839 if (zero_iter_bb == NULL)
1840 {
1841 gassign *assign_stmt;
1842 first_zero_iter = i;
1843 zero_iter_bb = create_empty_bb (entry_bb);
1844 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1845 *gsi = gsi_after_labels (zero_iter_bb);
1846 if (i < fd->collapse)
1847 assign_stmt = gimple_build_assign (fd->loop.n2,
1848 build_zero_cst (type));
1849 else
1850 {
1851 counts[i] = create_tmp_reg (type, ".count");
1852 assign_stmt
1853 = gimple_build_assign (counts[i], build_zero_cst (type));
1854 }
1855 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1856 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1857 entry_bb);
1858 }
1859 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1860 ne->probability = profile_probability::very_unlikely ();
1861 e->flags = EDGE_TRUE_VALUE;
1862 e->probability = ne->probability.invert ();
1863 if (l2_dom_bb == NULL)
1864 l2_dom_bb = entry_bb;
1865 entry_bb = e->dest;
1866 *gsi = gsi_last_nondebug_bb (entry_bb);
1867 }
1868
1869 if (POINTER_TYPE_P (itype))
1870 itype = signed_type_for (itype);
1871 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1872 ? -1 : 1));
1873 t = fold_build2 (PLUS_EXPR, itype,
1874 fold_convert (itype, fd->loops[i].step), t);
1875 t = fold_build2 (PLUS_EXPR, itype, t,
1876 fold_convert (itype, fd->loops[i].n2));
1877 t = fold_build2 (MINUS_EXPR, itype, t,
1878 fold_convert (itype, fd->loops[i].n1));
1879 /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1881 generate the same code in the end because generically we
1882 don't know that the values involved must be negative for
1883 GT?? */
1884 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1885 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1886 fold_build1 (NEGATE_EXPR, itype, t),
1887 fold_build1 (NEGATE_EXPR, itype,
1888 fold_convert (itype,
1889 fd->loops[i].step)));
1890 else
1891 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1892 fold_convert (itype, fd->loops[i].step));
1893 t = fold_convert (type, t);
1894 if (TREE_CODE (t) == INTEGER_CST)
1895 counts[i] = t;
1896 else
1897 {
1898 if (i < fd->collapse || i != first_zero_iter2)
1899 counts[i] = create_tmp_reg (type, ".count");
1900 expand_omp_build_assign (gsi, counts[i], t);
1901 }
1902 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1903 {
1904 if (i == 0)
1905 t = counts[0];
1906 else
1907 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1908 expand_omp_build_assign (gsi, fd->loop.n2, t);
1909 }
1910 }
1911 }
1912
1913 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1914 T = V;
1915 V3 = N31 + (T % count3) * STEP3;
1916 T = T / count3;
1917 V2 = N21 + (T % count2) * STEP2;
1918 T = T / count2;
1919 V1 = N11 + T * STEP1;
1920 if this loop doesn't have an inner loop construct combined with it.
1921 If it does have an inner loop construct combined with it and the
1922 iteration count isn't known constant, store values from counts array
1923 into its _looptemp_ temporaries instead. */
1924
1925 static void
1926 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1927 tree *counts, gimple *inner_stmt, tree startvar)
1928 {
1929 int i;
1930 if (gimple_omp_for_combined_p (fd->for_stmt))
1931 {
1932 /* If fd->loop.n2 is constant, then no propagation of the counts
1933 is needed, they are constant. */
1934 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1935 return;
1936
1937 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1938 ? gimple_omp_taskreg_clauses (inner_stmt)
1939 : gimple_omp_for_clauses (inner_stmt);
1940 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 isn't supposed to be handled, as the inner loop doesn't
1942 use it. */
1943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1944 gcc_assert (innerc);
1945 for (i = 0; i < fd->collapse; i++)
1946 {
1947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1948 OMP_CLAUSE__LOOPTEMP_);
1949 gcc_assert (innerc);
1950 if (i)
1951 {
1952 tree tem = OMP_CLAUSE_DECL (innerc);
1953 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 gassign *stmt = gimple_build_assign (tem, t);
1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1958 }
1959 }
1960 return;
1961 }
1962
1963 tree type = TREE_TYPE (fd->loop.v);
1964 tree tem = create_tmp_reg (type, ".tem");
1965 gassign *stmt = gimple_build_assign (tem, startvar);
1966 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1967
1968 for (i = fd->collapse - 1; i >= 0; i--)
1969 {
1970 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1971 itype = vtype;
1972 if (POINTER_TYPE_P (vtype))
1973 itype = signed_type_for (vtype);
1974 if (i != 0)
1975 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1976 else
1977 t = tem;
1978 t = fold_convert (itype, t);
1979 t = fold_build2 (MULT_EXPR, itype, t,
1980 fold_convert (itype, fd->loops[i].step));
1981 if (POINTER_TYPE_P (vtype))
1982 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1983 else
1984 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1985 t = force_gimple_operand_gsi (gsi, t,
1986 DECL_P (fd->loops[i].v)
1987 && TREE_ADDRESSABLE (fd->loops[i].v),
1988 NULL_TREE, false,
1989 GSI_CONTINUE_LINKING);
1990 stmt = gimple_build_assign (fd->loops[i].v, t);
1991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1992 if (i != 0)
1993 {
1994 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1995 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1996 false, GSI_CONTINUE_LINKING);
1997 stmt = gimple_build_assign (tem, t);
1998 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1999 }
2000 }
2001 }
2002
2003 /* Helper function for expand_omp_for_*. Generate code like:
2004 L10:
2005 V3 += STEP3;
2006 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2007 L11:
2008 V3 = N31;
2009 V2 += STEP2;
2010 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2011 L12:
2012 V2 = N21;
2013 V1 += STEP1;
2014 goto BODY_BB; */
2015
2016 static basic_block
2017 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2018 basic_block body_bb)
2019 {
2020 basic_block last_bb, bb, collapse_bb = NULL;
2021 int i;
2022 gimple_stmt_iterator gsi;
2023 edge e;
2024 tree t;
2025 gimple *stmt;
2026
2027 last_bb = cont_bb;
2028 for (i = fd->collapse - 1; i >= 0; i--)
2029 {
2030 tree vtype = TREE_TYPE (fd->loops[i].v);
2031
2032 bb = create_empty_bb (last_bb);
2033 add_bb_to_loop (bb, last_bb->loop_father);
2034 gsi = gsi_start_bb (bb);
2035
2036 if (i < fd->collapse - 1)
2037 {
2038 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2039 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2040
2041 t = fd->loops[i + 1].n1;
2042 t = force_gimple_operand_gsi (&gsi, t,
2043 DECL_P (fd->loops[i + 1].v)
2044 && TREE_ADDRESSABLE (fd->loops[i
2045 + 1].v),
2046 NULL_TREE, false,
2047 GSI_CONTINUE_LINKING);
2048 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2049 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2050 }
2051 else
2052 collapse_bb = bb;
2053
2054 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2055
2056 if (POINTER_TYPE_P (vtype))
2057 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2058 else
2059 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2060 t = force_gimple_operand_gsi (&gsi, t,
2061 DECL_P (fd->loops[i].v)
2062 && TREE_ADDRESSABLE (fd->loops[i].v),
2063 NULL_TREE, false, GSI_CONTINUE_LINKING);
2064 stmt = gimple_build_assign (fd->loops[i].v, t);
2065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2066
2067 if (i > 0)
2068 {
2069 t = fd->loops[i].n2;
2070 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2071 false, GSI_CONTINUE_LINKING);
2072 tree v = fd->loops[i].v;
2073 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2074 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2077 stmt = gimple_build_cond_empty (t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2080 expand_omp_regimplify_p, NULL, NULL)
2081 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2082 expand_omp_regimplify_p, NULL, NULL))
2083 gimple_regimplify_operands (stmt, &gsi);
2084 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2085 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2086 }
2087 else
2088 make_edge (bb, body_bb, EDGE_FALLTHRU);
2089 last_bb = bb;
2090 }
2091
2092 return collapse_bb;
2093 }
2094
2095 /* Expand #pragma omp ordered depend(source). */
2096
2097 static void
2098 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2099 tree *counts, location_t loc)
2100 {
2101 enum built_in_function source_ix
2102 = fd->iter_type == long_integer_type_node
2103 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2104 gimple *g
2105 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2106 build_fold_addr_expr (counts[fd->ordered]));
2107 gimple_set_location (g, loc);
2108 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2109 }
2110
2111 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2112
2113 static void
2114 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2115 tree *counts, tree c, location_t loc)
2116 {
2117 auto_vec<tree, 10> args;
2118 enum built_in_function sink_ix
2119 = fd->iter_type == long_integer_type_node
2120 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2121 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2122 int i;
2123 gimple_stmt_iterator gsi2 = *gsi;
2124 bool warned_step = false;
2125
2126 for (i = 0; i < fd->ordered; i++)
2127 {
2128 tree step = NULL_TREE;
2129 off = TREE_PURPOSE (deps);
2130 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2131 {
2132 step = TREE_OPERAND (off, 1);
2133 off = TREE_OPERAND (off, 0);
2134 }
2135 if (!integer_zerop (off))
2136 {
2137 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2138 || fd->loops[i].cond_code == GT_EXPR);
2139 bool forward = fd->loops[i].cond_code == LT_EXPR;
2140 if (step)
2141 {
2142 /* Non-simple Fortran DO loops. If step is variable,
2143 we don't know at compile even the direction, so can't
2144 warn. */
2145 if (TREE_CODE (step) != INTEGER_CST)
2146 break;
2147 forward = tree_int_cst_sgn (step) != -1;
2148 }
2149 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2150 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2151 "waiting for lexically later iteration");
2152 break;
2153 }
2154 deps = TREE_CHAIN (deps);
2155 }
2156 /* If all offsets corresponding to the collapsed loops are zero,
2157 this depend clause can be ignored. FIXME: but there is still a
2158 flush needed. We need to emit one __sync_synchronize () for it
2159 though (perhaps conditionally)? Solve this together with the
2160 conservative dependence folding optimization.
2161 if (i >= fd->collapse)
2162 return; */
2163
2164 deps = OMP_CLAUSE_DECL (c);
2165 gsi_prev (&gsi2);
2166 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2167 edge e2 = split_block_after_labels (e1->dest);
2168
2169 gsi2 = gsi_after_labels (e1->dest);
2170 *gsi = gsi_last_bb (e1->src);
2171 for (i = 0; i < fd->ordered; i++)
2172 {
2173 tree itype = TREE_TYPE (fd->loops[i].v);
2174 tree step = NULL_TREE;
2175 tree orig_off = NULL_TREE;
2176 if (POINTER_TYPE_P (itype))
2177 itype = sizetype;
2178 if (i)
2179 deps = TREE_CHAIN (deps);
2180 off = TREE_PURPOSE (deps);
2181 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2182 {
2183 step = TREE_OPERAND (off, 1);
2184 off = TREE_OPERAND (off, 0);
2185 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2186 && integer_onep (fd->loops[i].step)
2187 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2188 }
2189 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2190 if (step)
2191 {
2192 off = fold_convert_loc (loc, itype, off);
2193 orig_off = off;
2194 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2195 }
2196
2197 if (integer_zerop (off))
2198 t = boolean_true_node;
2199 else
2200 {
2201 tree a;
2202 tree co = fold_convert_loc (loc, itype, off);
2203 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2204 {
2205 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2206 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2207 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2208 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2209 co);
2210 }
2211 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2212 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2213 fd->loops[i].v, co);
2214 else
2215 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2216 fd->loops[i].v, co);
2217 if (step)
2218 {
2219 tree t1, t2;
2220 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2221 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2222 fd->loops[i].n1);
2223 else
2224 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2225 fd->loops[i].n2);
2226 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2227 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2228 fd->loops[i].n2);
2229 else
2230 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2231 fd->loops[i].n1);
2232 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2233 step, build_int_cst (TREE_TYPE (step), 0));
2234 if (TREE_CODE (step) != INTEGER_CST)
2235 {
2236 t1 = unshare_expr (t1);
2237 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2238 false, GSI_CONTINUE_LINKING);
2239 t2 = unshare_expr (t2);
2240 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2241 false, GSI_CONTINUE_LINKING);
2242 }
2243 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2244 t, t2, t1);
2245 }
2246 else if (fd->loops[i].cond_code == LT_EXPR)
2247 {
2248 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2249 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2250 fd->loops[i].n1);
2251 else
2252 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2253 fd->loops[i].n2);
2254 }
2255 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2256 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2258 else
2259 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2260 fd->loops[i].n1);
2261 }
2262 if (cond)
2263 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2264 else
2265 cond = t;
2266
2267 off = fold_convert_loc (loc, itype, off);
2268
2269 if (step
2270 || (fd->loops[i].cond_code == LT_EXPR
2271 ? !integer_onep (fd->loops[i].step)
2272 : !integer_minus_onep (fd->loops[i].step)))
2273 {
2274 if (step == NULL_TREE
2275 && TYPE_UNSIGNED (itype)
2276 && fd->loops[i].cond_code == GT_EXPR)
2277 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2278 fold_build1_loc (loc, NEGATE_EXPR, itype,
2279 s));
2280 else
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2282 orig_off ? orig_off : off, s);
2283 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2284 build_int_cst (itype, 0));
2285 if (integer_zerop (t) && !warned_step)
2286 {
2287 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2288 "refers to iteration never in the iteration "
2289 "space");
2290 warned_step = true;
2291 }
2292 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2293 cond, t);
2294 }
2295
2296 if (i <= fd->collapse - 1 && fd->collapse > 1)
2297 t = fd->loop.v;
2298 else if (counts[i])
2299 t = counts[i];
2300 else
2301 {
2302 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2303 fd->loops[i].v, fd->loops[i].n1);
2304 t = fold_convert_loc (loc, fd->iter_type, t);
2305 }
2306 if (step)
2307 /* We have divided off by step already earlier. */;
2308 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2309 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2310 fold_build1_loc (loc, NEGATE_EXPR, itype,
2311 s));
2312 else
2313 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2314 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2315 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2316 off = fold_convert_loc (loc, fd->iter_type, off);
2317 if (i <= fd->collapse - 1 && fd->collapse > 1)
2318 {
2319 if (i)
2320 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2321 off);
2322 if (i < fd->collapse - 1)
2323 {
2324 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2325 counts[i]);
2326 continue;
2327 }
2328 }
2329 off = unshare_expr (off);
2330 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2331 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2332 true, GSI_SAME_STMT);
2333 args.safe_push (t);
2334 }
2335 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2336 gimple_set_location (g, loc);
2337 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2338
2339 cond = unshare_expr (cond);
2340 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2341 GSI_CONTINUE_LINKING);
2342 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2343 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2344 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2345 e1->probability = e3->probability.invert ();
2346 e1->flags = EDGE_TRUE_VALUE;
2347 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2348
2349 *gsi = gsi_after_labels (e2->dest);
2350 }
2351
2352 /* Expand all #pragma omp ordered depend(source) and
2353 #pragma omp ordered depend(sink:...) constructs in the current
2354 #pragma omp for ordered(n) region. */
2355
2356 static void
2357 expand_omp_ordered_source_sink (struct omp_region *region,
2358 struct omp_for_data *fd, tree *counts,
2359 basic_block cont_bb)
2360 {
2361 struct omp_region *inner;
2362 int i;
2363 for (i = fd->collapse - 1; i < fd->ordered; i++)
2364 if (i == fd->collapse - 1 && fd->collapse > 1)
2365 counts[i] = NULL_TREE;
2366 else if (i >= fd->collapse && !cont_bb)
2367 counts[i] = build_zero_cst (fd->iter_type);
2368 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2369 && integer_onep (fd->loops[i].step))
2370 counts[i] = NULL_TREE;
2371 else
2372 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2373 tree atype
2374 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2375 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2376 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2377
2378 for (inner = region->inner; inner; inner = inner->next)
2379 if (inner->type == GIMPLE_OMP_ORDERED)
2380 {
2381 gomp_ordered *ord_stmt = inner->ord_stmt;
2382 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2383 location_t loc = gimple_location (ord_stmt);
2384 tree c;
2385 for (c = gimple_omp_ordered_clauses (ord_stmt);
2386 c; c = OMP_CLAUSE_CHAIN (c))
2387 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2388 break;
2389 if (c)
2390 expand_omp_ordered_source (&gsi, fd, counts, loc);
2391 for (c = gimple_omp_ordered_clauses (ord_stmt);
2392 c; c = OMP_CLAUSE_CHAIN (c))
2393 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2394 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2395 gsi_remove (&gsi, true);
2396 }
2397 }
2398
2399 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2400 collapsed. */
2401
2402 static basic_block
2403 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2404 basic_block cont_bb, basic_block body_bb,
2405 bool ordered_lastprivate)
2406 {
2407 if (fd->ordered == fd->collapse)
2408 return cont_bb;
2409
2410 if (!cont_bb)
2411 {
2412 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2413 for (int i = fd->collapse; i < fd->ordered; i++)
2414 {
2415 tree type = TREE_TYPE (fd->loops[i].v);
2416 tree n1 = fold_convert (type, fd->loops[i].n1);
2417 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2418 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2419 size_int (i - fd->collapse + 1),
2420 NULL_TREE, NULL_TREE);
2421 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2422 }
2423 return NULL;
2424 }
2425
2426 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2427 {
2428 tree t, type = TREE_TYPE (fd->loops[i].v);
2429 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2430 expand_omp_build_assign (&gsi, fd->loops[i].v,
2431 fold_convert (type, fd->loops[i].n1));
2432 if (counts[i])
2433 expand_omp_build_assign (&gsi, counts[i],
2434 build_zero_cst (fd->iter_type));
2435 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2436 size_int (i - fd->collapse + 1),
2437 NULL_TREE, NULL_TREE);
2438 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2439 if (!gsi_end_p (gsi))
2440 gsi_prev (&gsi);
2441 else
2442 gsi = gsi_last_bb (body_bb);
2443 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2444 basic_block new_body = e1->dest;
2445 if (body_bb == cont_bb)
2446 cont_bb = new_body;
2447 edge e2 = NULL;
2448 basic_block new_header;
2449 if (EDGE_COUNT (cont_bb->preds) > 0)
2450 {
2451 gsi = gsi_last_bb (cont_bb);
2452 if (POINTER_TYPE_P (type))
2453 t = fold_build_pointer_plus (fd->loops[i].v,
2454 fold_convert (sizetype,
2455 fd->loops[i].step));
2456 else
2457 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2458 fold_convert (type, fd->loops[i].step));
2459 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2460 if (counts[i])
2461 {
2462 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2463 build_int_cst (fd->iter_type, 1));
2464 expand_omp_build_assign (&gsi, counts[i], t);
2465 t = counts[i];
2466 }
2467 else
2468 {
2469 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2470 fd->loops[i].v, fd->loops[i].n1);
2471 t = fold_convert (fd->iter_type, t);
2472 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2473 true, GSI_SAME_STMT);
2474 }
2475 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2476 size_int (i - fd->collapse + 1),
2477 NULL_TREE, NULL_TREE);
2478 expand_omp_build_assign (&gsi, aref, t);
2479 gsi_prev (&gsi);
2480 e2 = split_block (cont_bb, gsi_stmt (gsi));
2481 new_header = e2->dest;
2482 }
2483 else
2484 new_header = cont_bb;
2485 gsi = gsi_after_labels (new_header);
2486 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2487 true, GSI_SAME_STMT);
2488 tree n2
2489 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2490 true, NULL_TREE, true, GSI_SAME_STMT);
2491 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2492 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2493 edge e3 = split_block (new_header, gsi_stmt (gsi));
2494 cont_bb = e3->dest;
2495 remove_edge (e1);
2496 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2497 e3->flags = EDGE_FALSE_VALUE;
2498 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2499 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2500 e1->probability = e3->probability.invert ();
2501
2502 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2503 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2504
2505 if (e2)
2506 {
2507 struct loop *loop = alloc_loop ();
2508 loop->header = new_header;
2509 loop->latch = e2->src;
2510 add_loop (loop, body_bb->loop_father);
2511 }
2512 }
2513
2514 /* If there are any lastprivate clauses and it is possible some loops
2515 might have zero iterations, ensure all the decls are initialized,
2516 otherwise we could crash evaluating C++ class iterators with lastprivate
2517 clauses. */
2518 bool need_inits = false;
2519 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2520 if (need_inits)
2521 {
2522 tree type = TREE_TYPE (fd->loops[i].v);
2523 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2524 expand_omp_build_assign (&gsi, fd->loops[i].v,
2525 fold_convert (type, fd->loops[i].n1));
2526 }
2527 else
2528 {
2529 tree type = TREE_TYPE (fd->loops[i].v);
2530 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2531 boolean_type_node,
2532 fold_convert (type, fd->loops[i].n1),
2533 fold_convert (type, fd->loops[i].n2));
2534 if (!integer_onep (this_cond))
2535 need_inits = true;
2536 }
2537
2538 return cont_bb;
2539 }
2540
2541 /* A subroutine of expand_omp_for. Generate code for a parallel
2542 loop with any schedule. Given parameters:
2543
2544 for (V = N1; V cond N2; V += STEP) BODY;
2545
2546 where COND is "<" or ">", we generate pseudocode
2547
2548 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2549 if (more) goto L0; else goto L3;
2550 L0:
2551 V = istart0;
2552 iend = iend0;
2553 L1:
2554 BODY;
2555 V += STEP;
2556 if (V cond iend) goto L1; else goto L2;
2557 L2:
2558 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2559 L3:
2560
2561 If this is a combined omp parallel loop, instead of the call to
2562 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2563 If this is gimple_omp_for_combined_p loop, then instead of assigning
2564 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2565 inner GIMPLE_OMP_FOR and V += STEP; and
2566 if (V cond iend) goto L1; else goto L2; are removed.
2567
2568 For collapsed loops, given parameters:
2569 collapse(3)
2570 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2571 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2572 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2573 BODY;
2574
2575 we generate pseudocode
2576
2577 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2578 if (cond3 is <)
2579 adj = STEP3 - 1;
2580 else
2581 adj = STEP3 + 1;
2582 count3 = (adj + N32 - N31) / STEP3;
2583 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2584 if (cond2 is <)
2585 adj = STEP2 - 1;
2586 else
2587 adj = STEP2 + 1;
2588 count2 = (adj + N22 - N21) / STEP2;
2589 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2590 if (cond1 is <)
2591 adj = STEP1 - 1;
2592 else
2593 adj = STEP1 + 1;
2594 count1 = (adj + N12 - N11) / STEP1;
2595 count = count1 * count2 * count3;
2596 goto Z1;
2597 Z0:
2598 count = 0;
2599 Z1:
2600 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2601 if (more) goto L0; else goto L3;
2602 L0:
2603 V = istart0;
2604 T = V;
2605 V3 = N31 + (T % count3) * STEP3;
2606 T = T / count3;
2607 V2 = N21 + (T % count2) * STEP2;
2608 T = T / count2;
2609 V1 = N11 + T * STEP1;
2610 iend = iend0;
2611 L1:
2612 BODY;
2613 V += 1;
2614 if (V < iend) goto L10; else goto L2;
2615 L10:
2616 V3 += STEP3;
2617 if (V3 cond3 N32) goto L1; else goto L11;
2618 L11:
2619 V3 = N31;
2620 V2 += STEP2;
2621 if (V2 cond2 N22) goto L1; else goto L12;
2622 L12:
2623 V2 = N21;
2624 V1 += STEP1;
2625 goto L1;
2626 L2:
2627 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2628 L3:
2629
2630 */
2631
2632 static void
2633 expand_omp_for_generic (struct omp_region *region,
2634 struct omp_for_data *fd,
2635 enum built_in_function start_fn,
2636 enum built_in_function next_fn,
2637 tree sched_arg,
2638 gimple *inner_stmt)
2639 {
2640 tree type, istart0, iend0, iend;
2641 tree t, vmain, vback, bias = NULL_TREE;
2642 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2643 basic_block l2_bb = NULL, l3_bb = NULL;
2644 gimple_stmt_iterator gsi;
2645 gassign *assign_stmt;
2646 bool in_combined_parallel = is_combined_parallel (region);
2647 bool broken_loop = region->cont == NULL;
2648 edge e, ne;
2649 tree *counts = NULL;
2650 int i;
2651 bool ordered_lastprivate = false;
2652
2653 gcc_assert (!broken_loop || !in_combined_parallel);
2654 gcc_assert (fd->iter_type == long_integer_type_node
2655 || !in_combined_parallel);
2656
2657 entry_bb = region->entry;
2658 cont_bb = region->cont;
2659 collapse_bb = NULL;
2660 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2661 gcc_assert (broken_loop
2662 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2663 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2664 l1_bb = single_succ (l0_bb);
2665 if (!broken_loop)
2666 {
2667 l2_bb = create_empty_bb (cont_bb);
2668 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2669 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2670 == l1_bb));
2671 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2672 }
2673 else
2674 l2_bb = NULL;
2675 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2676 exit_bb = region->exit;
2677
2678 gsi = gsi_last_nondebug_bb (entry_bb);
2679
2680 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2681 if (fd->ordered
2682 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2683 OMP_CLAUSE_LASTPRIVATE))
2684 ordered_lastprivate = false;
2685 tree reductions = NULL_TREE;
2686 tree mem = NULL_TREE;
2687 if (sched_arg)
2688 {
2689 if (fd->have_reductemp)
2690 {
2691 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2692 OMP_CLAUSE__REDUCTEMP_);
2693 reductions = OMP_CLAUSE_DECL (c);
2694 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2695 gimple *g = SSA_NAME_DEF_STMT (reductions);
2696 reductions = gimple_assign_rhs1 (g);
2697 OMP_CLAUSE_DECL (c) = reductions;
2698 entry_bb = gimple_bb (g);
2699 edge e = split_block (entry_bb, g);
2700 if (region->entry == entry_bb)
2701 region->entry = e->dest;
2702 gsi = gsi_last_bb (entry_bb);
2703 }
2704 else
2705 reductions = null_pointer_node;
2706 /* For now. */
2707 mem = null_pointer_node;
2708 }
2709 if (fd->collapse > 1 || fd->ordered)
2710 {
2711 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2712 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2713
2714 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2715 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2716 zero_iter1_bb, first_zero_iter1,
2717 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2718
2719 if (zero_iter1_bb)
2720 {
2721 /* Some counts[i] vars might be uninitialized if
2722 some loop has zero iterations. But the body shouldn't
2723 be executed in that case, so just avoid uninit warnings. */
2724 for (i = first_zero_iter1;
2725 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2726 if (SSA_VAR_P (counts[i]))
2727 TREE_NO_WARNING (counts[i]) = 1;
2728 gsi_prev (&gsi);
2729 e = split_block (entry_bb, gsi_stmt (gsi));
2730 entry_bb = e->dest;
2731 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2732 gsi = gsi_last_nondebug_bb (entry_bb);
2733 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2734 get_immediate_dominator (CDI_DOMINATORS,
2735 zero_iter1_bb));
2736 }
2737 if (zero_iter2_bb)
2738 {
2739 /* Some counts[i] vars might be uninitialized if
2740 some loop has zero iterations. But the body shouldn't
2741 be executed in that case, so just avoid uninit warnings. */
2742 for (i = first_zero_iter2; i < fd->ordered; i++)
2743 if (SSA_VAR_P (counts[i]))
2744 TREE_NO_WARNING (counts[i]) = 1;
2745 if (zero_iter1_bb)
2746 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2747 else
2748 {
2749 gsi_prev (&gsi);
2750 e = split_block (entry_bb, gsi_stmt (gsi));
2751 entry_bb = e->dest;
2752 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2753 gsi = gsi_last_nondebug_bb (entry_bb);
2754 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2755 get_immediate_dominator
2756 (CDI_DOMINATORS, zero_iter2_bb));
2757 }
2758 }
2759 if (fd->collapse == 1)
2760 {
2761 counts[0] = fd->loop.n2;
2762 fd->loop = fd->loops[0];
2763 }
2764 }
2765
2766 type = TREE_TYPE (fd->loop.v);
2767 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2768 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2769 TREE_ADDRESSABLE (istart0) = 1;
2770 TREE_ADDRESSABLE (iend0) = 1;
2771
2772 /* See if we need to bias by LLONG_MIN. */
2773 if (fd->iter_type == long_long_unsigned_type_node
2774 && TREE_CODE (type) == INTEGER_TYPE
2775 && !TYPE_UNSIGNED (type)
2776 && fd->ordered == 0)
2777 {
2778 tree n1, n2;
2779
2780 if (fd->loop.cond_code == LT_EXPR)
2781 {
2782 n1 = fd->loop.n1;
2783 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2784 }
2785 else
2786 {
2787 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2788 n2 = fd->loop.n1;
2789 }
2790 if (TREE_CODE (n1) != INTEGER_CST
2791 || TREE_CODE (n2) != INTEGER_CST
2792 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2793 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2794 }
2795
2796 gimple_stmt_iterator gsif = gsi;
2797 gsi_prev (&gsif);
2798
2799 tree arr = NULL_TREE;
2800 if (in_combined_parallel)
2801 {
2802 gcc_assert (fd->ordered == 0);
2803 /* In a combined parallel loop, emit a call to
2804 GOMP_loop_foo_next. */
2805 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2806 build_fold_addr_expr (istart0),
2807 build_fold_addr_expr (iend0));
2808 }
2809 else
2810 {
2811 tree t0, t1, t2, t3, t4;
2812 /* If this is not a combined parallel loop, emit a call to
2813 GOMP_loop_foo_start in ENTRY_BB. */
2814 t4 = build_fold_addr_expr (iend0);
2815 t3 = build_fold_addr_expr (istart0);
2816 if (fd->ordered)
2817 {
2818 t0 = build_int_cst (unsigned_type_node,
2819 fd->ordered - fd->collapse + 1);
2820 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2821 fd->ordered
2822 - fd->collapse + 1),
2823 ".omp_counts");
2824 DECL_NAMELESS (arr) = 1;
2825 TREE_ADDRESSABLE (arr) = 1;
2826 TREE_STATIC (arr) = 1;
2827 vec<constructor_elt, va_gc> *v;
2828 vec_alloc (v, fd->ordered - fd->collapse + 1);
2829 int idx;
2830
2831 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2832 {
2833 tree c;
2834 if (idx == 0 && fd->collapse > 1)
2835 c = fd->loop.n2;
2836 else
2837 c = counts[idx + fd->collapse - 1];
2838 tree purpose = size_int (idx);
2839 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2840 if (TREE_CODE (c) != INTEGER_CST)
2841 TREE_STATIC (arr) = 0;
2842 }
2843
2844 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2845 if (!TREE_STATIC (arr))
2846 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2847 void_type_node, arr),
2848 true, NULL_TREE, true, GSI_SAME_STMT);
2849 t1 = build_fold_addr_expr (arr);
2850 t2 = NULL_TREE;
2851 }
2852 else
2853 {
2854 t2 = fold_convert (fd->iter_type, fd->loop.step);
2855 t1 = fd->loop.n2;
2856 t0 = fd->loop.n1;
2857 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2858 {
2859 tree innerc
2860 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2861 OMP_CLAUSE__LOOPTEMP_);
2862 gcc_assert (innerc);
2863 t0 = OMP_CLAUSE_DECL (innerc);
2864 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2865 OMP_CLAUSE__LOOPTEMP_);
2866 gcc_assert (innerc);
2867 t1 = OMP_CLAUSE_DECL (innerc);
2868 }
2869 if (POINTER_TYPE_P (TREE_TYPE (t0))
2870 && TYPE_PRECISION (TREE_TYPE (t0))
2871 != TYPE_PRECISION (fd->iter_type))
2872 {
2873 /* Avoid casting pointers to integer of a different size. */
2874 tree itype = signed_type_for (type);
2875 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2876 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2877 }
2878 else
2879 {
2880 t1 = fold_convert (fd->iter_type, t1);
2881 t0 = fold_convert (fd->iter_type, t0);
2882 }
2883 if (bias)
2884 {
2885 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2886 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2887 }
2888 }
2889 if (fd->iter_type == long_integer_type_node || fd->ordered)
2890 {
2891 if (fd->chunk_size)
2892 {
2893 t = fold_convert (fd->iter_type, fd->chunk_size);
2894 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2895 if (sched_arg)
2896 {
2897 if (fd->ordered)
2898 t = build_call_expr (builtin_decl_explicit (start_fn),
2899 8, t0, t1, sched_arg, t, t3, t4,
2900 reductions, mem);
2901 else
2902 t = build_call_expr (builtin_decl_explicit (start_fn),
2903 9, t0, t1, t2, sched_arg, t, t3, t4,
2904 reductions, mem);
2905 }
2906 else if (fd->ordered)
2907 t = build_call_expr (builtin_decl_explicit (start_fn),
2908 5, t0, t1, t, t3, t4);
2909 else
2910 t = build_call_expr (builtin_decl_explicit (start_fn),
2911 6, t0, t1, t2, t, t3, t4);
2912 }
2913 else if (fd->ordered)
2914 t = build_call_expr (builtin_decl_explicit (start_fn),
2915 4, t0, t1, t3, t4);
2916 else
2917 t = build_call_expr (builtin_decl_explicit (start_fn),
2918 5, t0, t1, t2, t3, t4);
2919 }
2920 else
2921 {
2922 tree t5;
2923 tree c_bool_type;
2924 tree bfn_decl;
2925
2926 /* The GOMP_loop_ull_*start functions have additional boolean
2927 argument, true for < loops and false for > loops.
2928 In Fortran, the C bool type can be different from
2929 boolean_type_node. */
2930 bfn_decl = builtin_decl_explicit (start_fn);
2931 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2932 t5 = build_int_cst (c_bool_type,
2933 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2934 if (fd->chunk_size)
2935 {
2936 tree bfn_decl = builtin_decl_explicit (start_fn);
2937 t = fold_convert (fd->iter_type, fd->chunk_size);
2938 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2939 if (sched_arg)
2940 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2941 t, t3, t4, reductions, mem);
2942 else
2943 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2944 }
2945 else
2946 t = build_call_expr (builtin_decl_explicit (start_fn),
2947 6, t5, t0, t1, t2, t3, t4);
2948 }
2949 }
2950 if (TREE_TYPE (t) != boolean_type_node)
2951 t = fold_build2 (NE_EXPR, boolean_type_node,
2952 t, build_int_cst (TREE_TYPE (t), 0));
2953 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2954 true, GSI_SAME_STMT);
2955 if (arr && !TREE_STATIC (arr))
2956 {
2957 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2958 TREE_THIS_VOLATILE (clobber) = 1;
2959 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2960 GSI_SAME_STMT);
2961 }
2962 if (fd->have_reductemp)
2963 {
2964 gimple *g = gsi_stmt (gsi);
2965 gsi_remove (&gsi, true);
2966 release_ssa_name (gimple_assign_lhs (g));
2967
2968 entry_bb = region->entry;
2969 gsi = gsi_last_nondebug_bb (entry_bb);
2970
2971 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2972 }
2973 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2974
2975 /* Remove the GIMPLE_OMP_FOR statement. */
2976 gsi_remove (&gsi, true);
2977
2978 if (gsi_end_p (gsif))
2979 gsif = gsi_after_labels (gsi_bb (gsif));
2980 gsi_next (&gsif);
2981
2982 /* Iteration setup for sequential loop goes in L0_BB. */
2983 tree startvar = fd->loop.v;
2984 tree endvar = NULL_TREE;
2985
2986 if (gimple_omp_for_combined_p (fd->for_stmt))
2987 {
2988 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2989 && gimple_omp_for_kind (inner_stmt)
2990 == GF_OMP_FOR_KIND_SIMD);
2991 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2992 OMP_CLAUSE__LOOPTEMP_);
2993 gcc_assert (innerc);
2994 startvar = OMP_CLAUSE_DECL (innerc);
2995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2996 OMP_CLAUSE__LOOPTEMP_);
2997 gcc_assert (innerc);
2998 endvar = OMP_CLAUSE_DECL (innerc);
2999 }
3000
3001 gsi = gsi_start_bb (l0_bb);
3002 t = istart0;
3003 if (fd->ordered && fd->collapse == 1)
3004 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3005 fold_convert (fd->iter_type, fd->loop.step));
3006 else if (bias)
3007 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3008 if (fd->ordered && fd->collapse == 1)
3009 {
3010 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3011 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3012 fd->loop.n1, fold_convert (sizetype, t));
3013 else
3014 {
3015 t = fold_convert (TREE_TYPE (startvar), t);
3016 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3017 fd->loop.n1, t);
3018 }
3019 }
3020 else
3021 {
3022 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3023 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3024 t = fold_convert (TREE_TYPE (startvar), t);
3025 }
3026 t = force_gimple_operand_gsi (&gsi, t,
3027 DECL_P (startvar)
3028 && TREE_ADDRESSABLE (startvar),
3029 NULL_TREE, false, GSI_CONTINUE_LINKING);
3030 assign_stmt = gimple_build_assign (startvar, t);
3031 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3032
3033 t = iend0;
3034 if (fd->ordered && fd->collapse == 1)
3035 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3036 fold_convert (fd->iter_type, fd->loop.step));
3037 else if (bias)
3038 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3039 if (fd->ordered && fd->collapse == 1)
3040 {
3041 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3042 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3043 fd->loop.n1, fold_convert (sizetype, t));
3044 else
3045 {
3046 t = fold_convert (TREE_TYPE (startvar), t);
3047 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, t);
3049 }
3050 }
3051 else
3052 {
3053 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3054 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3055 t = fold_convert (TREE_TYPE (startvar), t);
3056 }
3057 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3058 false, GSI_CONTINUE_LINKING);
3059 if (endvar)
3060 {
3061 assign_stmt = gimple_build_assign (endvar, iend);
3062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3064 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3065 else
3066 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068 }
3069 /* Handle linear clause adjustments. */
3070 tree itercnt = NULL_TREE;
3071 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3072 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3073 c; c = OMP_CLAUSE_CHAIN (c))
3074 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3075 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3076 {
3077 tree d = OMP_CLAUSE_DECL (c);
3078 bool is_ref = omp_is_reference (d);
3079 tree t = d, a, dest;
3080 if (is_ref)
3081 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3082 tree type = TREE_TYPE (t);
3083 if (POINTER_TYPE_P (type))
3084 type = sizetype;
3085 dest = unshare_expr (t);
3086 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3087 expand_omp_build_assign (&gsif, v, t);
3088 if (itercnt == NULL_TREE)
3089 {
3090 itercnt = startvar;
3091 tree n1 = fd->loop.n1;
3092 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3093 {
3094 itercnt
3095 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3096 itercnt);
3097 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3098 }
3099 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3100 itercnt, n1);
3101 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3102 itercnt, fd->loop.step);
3103 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3104 NULL_TREE, false,
3105 GSI_CONTINUE_LINKING);
3106 }
3107 a = fold_build2 (MULT_EXPR, type,
3108 fold_convert (type, itercnt),
3109 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3110 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3111 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3112 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3113 false, GSI_CONTINUE_LINKING);
3114 assign_stmt = gimple_build_assign (dest, t);
3115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3116 }
3117 if (fd->collapse > 1)
3118 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3119
3120 if (fd->ordered)
3121 {
3122 /* Until now, counts array contained number of iterations or
3123 variable containing it for ith loop. From now on, we need
3124 those counts only for collapsed loops, and only for the 2nd
3125 till the last collapsed one. Move those one element earlier,
3126 we'll use counts[fd->collapse - 1] for the first source/sink
3127 iteration counter and so on and counts[fd->ordered]
3128 as the array holding the current counter values for
3129 depend(source). */
3130 if (fd->collapse > 1)
3131 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3132 if (broken_loop)
3133 {
3134 int i;
3135 for (i = fd->collapse; i < fd->ordered; i++)
3136 {
3137 tree type = TREE_TYPE (fd->loops[i].v);
3138 tree this_cond
3139 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3140 fold_convert (type, fd->loops[i].n1),
3141 fold_convert (type, fd->loops[i].n2));
3142 if (!integer_onep (this_cond))
3143 break;
3144 }
3145 if (i < fd->ordered)
3146 {
3147 cont_bb
3148 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3149 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3150 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3151 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3152 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3153 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3154 make_edge (cont_bb, l1_bb, 0);
3155 l2_bb = create_empty_bb (cont_bb);
3156 broken_loop = false;
3157 }
3158 }
3159 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3160 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3161 ordered_lastprivate);
3162 if (counts[fd->collapse - 1])
3163 {
3164 gcc_assert (fd->collapse == 1);
3165 gsi = gsi_last_bb (l0_bb);
3166 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3167 istart0, true);
3168 gsi = gsi_last_bb (cont_bb);
3169 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3170 build_int_cst (fd->iter_type, 1));
3171 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3172 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3173 size_zero_node, NULL_TREE, NULL_TREE);
3174 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3175 t = counts[fd->collapse - 1];
3176 }
3177 else if (fd->collapse > 1)
3178 t = fd->loop.v;
3179 else
3180 {
3181 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3182 fd->loops[0].v, fd->loops[0].n1);
3183 t = fold_convert (fd->iter_type, t);
3184 }
3185 gsi = gsi_last_bb (l0_bb);
3186 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3187 size_zero_node, NULL_TREE, NULL_TREE);
3188 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3189 false, GSI_CONTINUE_LINKING);
3190 expand_omp_build_assign (&gsi, aref, t, true);
3191 }
3192
3193 if (!broken_loop)
3194 {
3195 /* Code to control the increment and predicate for the sequential
3196 loop goes in the CONT_BB. */
3197 gsi = gsi_last_nondebug_bb (cont_bb);
3198 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3199 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3200 vmain = gimple_omp_continue_control_use (cont_stmt);
3201 vback = gimple_omp_continue_control_def (cont_stmt);
3202
3203 if (!gimple_omp_for_combined_p (fd->for_stmt))
3204 {
3205 if (POINTER_TYPE_P (type))
3206 t = fold_build_pointer_plus (vmain, fd->loop.step);
3207 else
3208 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3209 t = force_gimple_operand_gsi (&gsi, t,
3210 DECL_P (vback)
3211 && TREE_ADDRESSABLE (vback),
3212 NULL_TREE, true, GSI_SAME_STMT);
3213 assign_stmt = gimple_build_assign (vback, t);
3214 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3215
3216 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3217 {
3218 tree tem;
3219 if (fd->collapse > 1)
3220 tem = fd->loop.v;
3221 else
3222 {
3223 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3224 fd->loops[0].v, fd->loops[0].n1);
3225 tem = fold_convert (fd->iter_type, tem);
3226 }
3227 tree aref = build4 (ARRAY_REF, fd->iter_type,
3228 counts[fd->ordered], size_zero_node,
3229 NULL_TREE, NULL_TREE);
3230 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3231 true, GSI_SAME_STMT);
3232 expand_omp_build_assign (&gsi, aref, tem);
3233 }
3234
3235 t = build2 (fd->loop.cond_code, boolean_type_node,
3236 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3237 iend);
3238 gcond *cond_stmt = gimple_build_cond_empty (t);
3239 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3240 }
3241
3242 /* Remove GIMPLE_OMP_CONTINUE. */
3243 gsi_remove (&gsi, true);
3244
3245 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3246 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3247
3248 /* Emit code to get the next parallel iteration in L2_BB. */
3249 gsi = gsi_start_bb (l2_bb);
3250
3251 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3252 build_fold_addr_expr (istart0),
3253 build_fold_addr_expr (iend0));
3254 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3255 false, GSI_CONTINUE_LINKING);
3256 if (TREE_TYPE (t) != boolean_type_node)
3257 t = fold_build2 (NE_EXPR, boolean_type_node,
3258 t, build_int_cst (TREE_TYPE (t), 0));
3259 gcond *cond_stmt = gimple_build_cond_empty (t);
3260 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3261 }
3262
3263 /* Add the loop cleanup function. */
3264 gsi = gsi_last_nondebug_bb (exit_bb);
3265 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3266 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3267 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3268 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3269 else
3270 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3271 gcall *call_stmt = gimple_build_call (t, 0);
3272 if (fd->ordered)
3273 {
3274 tree arr = counts[fd->ordered];
3275 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3276 TREE_THIS_VOLATILE (clobber) = 1;
3277 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3278 GSI_SAME_STMT);
3279 }
3280 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3281 {
3282 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3283 if (fd->have_reductemp)
3284 {
3285 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3286 gimple_call_lhs (call_stmt));
3287 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3288 }
3289 }
3290 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3291 gsi_remove (&gsi, true);
3292
3293 /* Connect the new blocks. */
3294 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3295 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3296
3297 if (!broken_loop)
3298 {
3299 gimple_seq phis;
3300
3301 e = find_edge (cont_bb, l3_bb);
3302 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3303
3304 phis = phi_nodes (l3_bb);
3305 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3306 {
3307 gimple *phi = gsi_stmt (gsi);
3308 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3309 PHI_ARG_DEF_FROM_EDGE (phi, e));
3310 }
3311 remove_edge (e);
3312
3313 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3314 e = find_edge (cont_bb, l1_bb);
3315 if (e == NULL)
3316 {
3317 e = BRANCH_EDGE (cont_bb);
3318 gcc_assert (single_succ (e->dest) == l1_bb);
3319 }
3320 if (gimple_omp_for_combined_p (fd->for_stmt))
3321 {
3322 remove_edge (e);
3323 e = NULL;
3324 }
3325 else if (fd->collapse > 1)
3326 {
3327 remove_edge (e);
3328 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3329 }
3330 else
3331 e->flags = EDGE_TRUE_VALUE;
3332 if (e)
3333 {
3334 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3335 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3336 }
3337 else
3338 {
3339 e = find_edge (cont_bb, l2_bb);
3340 e->flags = EDGE_FALLTHRU;
3341 }
3342 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3343
3344 if (gimple_in_ssa_p (cfun))
3345 {
3346 /* Add phis to the outer loop that connect to the phis in the inner,
3347 original loop, and move the loop entry value of the inner phi to
3348 the loop entry value of the outer phi. */
3349 gphi_iterator psi;
3350 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3351 {
3352 location_t locus;
3353 gphi *nphi;
3354 gphi *exit_phi = psi.phi ();
3355
3356 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3357 continue;
3358
3359 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3360 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3361
3362 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3363 edge latch_to_l1 = find_edge (latch, l1_bb);
3364 gphi *inner_phi
3365 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3366
3367 tree t = gimple_phi_result (exit_phi);
3368 tree new_res = copy_ssa_name (t, NULL);
3369 nphi = create_phi_node (new_res, l0_bb);
3370
3371 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3372 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3373 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3374 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3375 add_phi_arg (nphi, t, entry_to_l0, locus);
3376
3377 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3378 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3379
3380 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3381 }
3382 }
3383
3384 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3385 recompute_dominator (CDI_DOMINATORS, l2_bb));
3386 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3387 recompute_dominator (CDI_DOMINATORS, l3_bb));
3388 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3389 recompute_dominator (CDI_DOMINATORS, l0_bb));
3390 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3391 recompute_dominator (CDI_DOMINATORS, l1_bb));
3392
3393 /* We enter expand_omp_for_generic with a loop. This original loop may
3394 have its own loop struct, or it may be part of an outer loop struct
3395 (which may be the fake loop). */
3396 struct loop *outer_loop = entry_bb->loop_father;
3397 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3398
3399 add_bb_to_loop (l2_bb, outer_loop);
3400
3401 /* We've added a new loop around the original loop. Allocate the
3402 corresponding loop struct. */
3403 struct loop *new_loop = alloc_loop ();
3404 new_loop->header = l0_bb;
3405 new_loop->latch = l2_bb;
3406 add_loop (new_loop, outer_loop);
3407
3408 /* Allocate a loop structure for the original loop unless we already
3409 had one. */
3410 if (!orig_loop_has_loop_struct
3411 && !gimple_omp_for_combined_p (fd->for_stmt))
3412 {
3413 struct loop *orig_loop = alloc_loop ();
3414 orig_loop->header = l1_bb;
3415 /* The loop may have multiple latches. */
3416 add_loop (orig_loop, new_loop);
3417 }
3418 }
3419 }
3420
3421 /* A subroutine of expand_omp_for. Generate code for a parallel
3422 loop with static schedule and no specified chunk size. Given
3423 parameters:
3424
3425 for (V = N1; V cond N2; V += STEP) BODY;
3426
3427 where COND is "<" or ">", we generate pseudocode
3428
3429 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3430 if (cond is <)
3431 adj = STEP - 1;
3432 else
3433 adj = STEP + 1;
3434 if ((__typeof (V)) -1 > 0 && cond is >)
3435 n = -(adj + N2 - N1) / -STEP;
3436 else
3437 n = (adj + N2 - N1) / STEP;
3438 q = n / nthreads;
3439 tt = n % nthreads;
3440 if (threadid < tt) goto L3; else goto L4;
3441 L3:
3442 tt = 0;
3443 q = q + 1;
3444 L4:
3445 s0 = q * threadid + tt;
3446 e0 = s0 + q;
3447 V = s0 * STEP + N1;
3448 if (s0 >= e0) goto L2; else goto L0;
3449 L0:
3450 e = e0 * STEP + N1;
3451 L1:
3452 BODY;
3453 V += STEP;
3454 if (V cond e) goto L1;
3455 L2:
3456 */
3457
3458 static void
3459 expand_omp_for_static_nochunk (struct omp_region *region,
3460 struct omp_for_data *fd,
3461 gimple *inner_stmt)
3462 {
3463 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3464 tree type, itype, vmain, vback;
3465 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3466 basic_block body_bb, cont_bb, collapse_bb = NULL;
3467 basic_block fin_bb;
3468 gimple_stmt_iterator gsi;
3469 edge ep;
3470 bool broken_loop = region->cont == NULL;
3471 tree *counts = NULL;
3472 tree n1, n2, step;
3473 tree reductions = NULL_TREE;
3474
3475 itype = type = TREE_TYPE (fd->loop.v);
3476 if (POINTER_TYPE_P (type))
3477 itype = signed_type_for (type);
3478
3479 entry_bb = region->entry;
3480 cont_bb = region->cont;
3481 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3482 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3483 gcc_assert (broken_loop
3484 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3485 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3486 body_bb = single_succ (seq_start_bb);
3487 if (!broken_loop)
3488 {
3489 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3490 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3491 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3492 }
3493 exit_bb = region->exit;
3494
3495 /* Iteration space partitioning goes in ENTRY_BB. */
3496 gsi = gsi_last_nondebug_bb (entry_bb);
3497 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3498
3499 if (fd->collapse > 1)
3500 {
3501 int first_zero_iter = -1, dummy = -1;
3502 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3503
3504 counts = XALLOCAVEC (tree, fd->collapse);
3505 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3506 fin_bb, first_zero_iter,
3507 dummy_bb, dummy, l2_dom_bb);
3508 t = NULL_TREE;
3509 }
3510 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3511 t = integer_one_node;
3512 else
3513 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3514 fold_convert (type, fd->loop.n1),
3515 fold_convert (type, fd->loop.n2));
3516 if (fd->collapse == 1
3517 && TYPE_UNSIGNED (type)
3518 && (t == NULL_TREE || !integer_onep (t)))
3519 {
3520 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3521 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3522 true, GSI_SAME_STMT);
3523 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3524 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3525 true, GSI_SAME_STMT);
3526 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3527 NULL_TREE, NULL_TREE);
3528 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3529 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3530 expand_omp_regimplify_p, NULL, NULL)
3531 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3532 expand_omp_regimplify_p, NULL, NULL))
3533 {
3534 gsi = gsi_for_stmt (cond_stmt);
3535 gimple_regimplify_operands (cond_stmt, &gsi);
3536 }
3537 ep = split_block (entry_bb, cond_stmt);
3538 ep->flags = EDGE_TRUE_VALUE;
3539 entry_bb = ep->dest;
3540 ep->probability = profile_probability::very_likely ();
3541 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3542 ep->probability = profile_probability::very_unlikely ();
3543 if (gimple_in_ssa_p (cfun))
3544 {
3545 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3546 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3547 !gsi_end_p (gpi); gsi_next (&gpi))
3548 {
3549 gphi *phi = gpi.phi ();
3550 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3551 ep, UNKNOWN_LOCATION);
3552 }
3553 }
3554 gsi = gsi_last_bb (entry_bb);
3555 }
3556
3557 if (fd->have_reductemp)
3558 {
3559 tree t1 = build_int_cst (long_integer_type_node, 0);
3560 tree t2 = build_int_cst (long_integer_type_node, 1);
3561 tree t3 = build_int_cstu (long_integer_type_node,
3562 (HOST_WIDE_INT_1U << 31) + 1);
3563 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3564 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3565 reductions = OMP_CLAUSE_DECL (clauses);
3566 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3567 gimple *g = SSA_NAME_DEF_STMT (reductions);
3568 reductions = gimple_assign_rhs1 (g);
3569 OMP_CLAUSE_DECL (clauses) = reductions;
3570 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3571 tree t
3572 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3573 9, t1, t2, t2, t3, t1, null_pointer_node,
3574 null_pointer_node, reductions, null_pointer_node);
3575 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3576 true, GSI_SAME_STMT);
3577 gsi_remove (&gsi2, true);
3578 release_ssa_name (gimple_assign_lhs (g));
3579 }
3580 switch (gimple_omp_for_kind (fd->for_stmt))
3581 {
3582 case GF_OMP_FOR_KIND_FOR:
3583 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3584 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3585 break;
3586 case GF_OMP_FOR_KIND_DISTRIBUTE:
3587 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3588 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3589 break;
3590 default:
3591 gcc_unreachable ();
3592 }
3593 nthreads = build_call_expr (nthreads, 0);
3594 nthreads = fold_convert (itype, nthreads);
3595 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3596 true, GSI_SAME_STMT);
3597 threadid = build_call_expr (threadid, 0);
3598 threadid = fold_convert (itype, threadid);
3599 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3600 true, GSI_SAME_STMT);
3601
3602 n1 = fd->loop.n1;
3603 n2 = fd->loop.n2;
3604 step = fd->loop.step;
3605 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3606 {
3607 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3608 OMP_CLAUSE__LOOPTEMP_);
3609 gcc_assert (innerc);
3610 n1 = OMP_CLAUSE_DECL (innerc);
3611 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3612 OMP_CLAUSE__LOOPTEMP_);
3613 gcc_assert (innerc);
3614 n2 = OMP_CLAUSE_DECL (innerc);
3615 }
3616 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3617 true, NULL_TREE, true, GSI_SAME_STMT);
3618 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3619 true, NULL_TREE, true, GSI_SAME_STMT);
3620 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3621 true, NULL_TREE, true, GSI_SAME_STMT);
3622
3623 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3624 t = fold_build2 (PLUS_EXPR, itype, step, t);
3625 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3626 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3627 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3628 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3629 fold_build1 (NEGATE_EXPR, itype, t),
3630 fold_build1 (NEGATE_EXPR, itype, step));
3631 else
3632 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3633 t = fold_convert (itype, t);
3634 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3635
3636 q = create_tmp_reg (itype, "q");
3637 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3638 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3639 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3640
3641 tt = create_tmp_reg (itype, "tt");
3642 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3643 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3644 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3645
3646 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3647 gcond *cond_stmt = gimple_build_cond_empty (t);
3648 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3649
3650 second_bb = split_block (entry_bb, cond_stmt)->dest;
3651 gsi = gsi_last_nondebug_bb (second_bb);
3652 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3653
3654 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3655 GSI_SAME_STMT);
3656 gassign *assign_stmt
3657 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3658 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3659
3660 third_bb = split_block (second_bb, assign_stmt)->dest;
3661 gsi = gsi_last_nondebug_bb (third_bb);
3662 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3663
3664 t = build2 (MULT_EXPR, itype, q, threadid);
3665 t = build2 (PLUS_EXPR, itype, t, tt);
3666 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3667
3668 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3669 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3670
3671 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3672 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3673
3674 /* Remove the GIMPLE_OMP_FOR statement. */
3675 gsi_remove (&gsi, true);
3676
3677 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3678 gsi = gsi_start_bb (seq_start_bb);
3679
3680 tree startvar = fd->loop.v;
3681 tree endvar = NULL_TREE;
3682
3683 if (gimple_omp_for_combined_p (fd->for_stmt))
3684 {
3685 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3686 ? gimple_omp_parallel_clauses (inner_stmt)
3687 : gimple_omp_for_clauses (inner_stmt);
3688 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3689 gcc_assert (innerc);
3690 startvar = OMP_CLAUSE_DECL (innerc);
3691 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3692 OMP_CLAUSE__LOOPTEMP_);
3693 gcc_assert (innerc);
3694 endvar = OMP_CLAUSE_DECL (innerc);
3695 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3696 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3697 {
3698 int i;
3699 for (i = 1; i < fd->collapse; i++)
3700 {
3701 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3702 OMP_CLAUSE__LOOPTEMP_);
3703 gcc_assert (innerc);
3704 }
3705 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3706 OMP_CLAUSE__LOOPTEMP_);
3707 if (innerc)
3708 {
3709 /* If needed (distribute parallel for with lastprivate),
3710 propagate down the total number of iterations. */
3711 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3712 fd->loop.n2);
3713 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3714 GSI_CONTINUE_LINKING);
3715 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3716 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3717 }
3718 }
3719 }
3720 t = fold_convert (itype, s0);
3721 t = fold_build2 (MULT_EXPR, itype, t, step);
3722 if (POINTER_TYPE_P (type))
3723 {
3724 t = fold_build_pointer_plus (n1, t);
3725 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3726 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3727 t = fold_convert (signed_type_for (type), t);
3728 }
3729 else
3730 t = fold_build2 (PLUS_EXPR, type, t, n1);
3731 t = fold_convert (TREE_TYPE (startvar), t);
3732 t = force_gimple_operand_gsi (&gsi, t,
3733 DECL_P (startvar)
3734 && TREE_ADDRESSABLE (startvar),
3735 NULL_TREE, false, GSI_CONTINUE_LINKING);
3736 assign_stmt = gimple_build_assign (startvar, t);
3737 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3738
3739 t = fold_convert (itype, e0);
3740 t = fold_build2 (MULT_EXPR, itype, t, step);
3741 if (POINTER_TYPE_P (type))
3742 {
3743 t = fold_build_pointer_plus (n1, t);
3744 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3745 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3746 t = fold_convert (signed_type_for (type), t);
3747 }
3748 else
3749 t = fold_build2 (PLUS_EXPR, type, t, n1);
3750 t = fold_convert (TREE_TYPE (startvar), t);
3751 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3752 false, GSI_CONTINUE_LINKING);
3753 if (endvar)
3754 {
3755 assign_stmt = gimple_build_assign (endvar, e);
3756 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3757 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3758 assign_stmt = gimple_build_assign (fd->loop.v, e);
3759 else
3760 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3761 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3762 }
3763 /* Handle linear clause adjustments. */
3764 tree itercnt = NULL_TREE;
3765 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3766 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3767 c; c = OMP_CLAUSE_CHAIN (c))
3768 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3769 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3770 {
3771 tree d = OMP_CLAUSE_DECL (c);
3772 bool is_ref = omp_is_reference (d);
3773 tree t = d, a, dest;
3774 if (is_ref)
3775 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3776 if (itercnt == NULL_TREE)
3777 {
3778 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3779 {
3780 itercnt = fold_build2 (MINUS_EXPR, itype,
3781 fold_convert (itype, n1),
3782 fold_convert (itype, fd->loop.n1));
3783 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3784 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3785 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3786 NULL_TREE, false,
3787 GSI_CONTINUE_LINKING);
3788 }
3789 else
3790 itercnt = s0;
3791 }
3792 tree type = TREE_TYPE (t);
3793 if (POINTER_TYPE_P (type))
3794 type = sizetype;
3795 a = fold_build2 (MULT_EXPR, type,
3796 fold_convert (type, itercnt),
3797 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3798 dest = unshare_expr (t);
3799 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3800 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3801 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3802 false, GSI_CONTINUE_LINKING);
3803 assign_stmt = gimple_build_assign (dest, t);
3804 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3805 }
3806 if (fd->collapse > 1)
3807 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3808
3809 if (!broken_loop)
3810 {
3811 /* The code controlling the sequential loop replaces the
3812 GIMPLE_OMP_CONTINUE. */
3813 gsi = gsi_last_nondebug_bb (cont_bb);
3814 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3815 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3816 vmain = gimple_omp_continue_control_use (cont_stmt);
3817 vback = gimple_omp_continue_control_def (cont_stmt);
3818
3819 if (!gimple_omp_for_combined_p (fd->for_stmt))
3820 {
3821 if (POINTER_TYPE_P (type))
3822 t = fold_build_pointer_plus (vmain, step);
3823 else
3824 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3825 t = force_gimple_operand_gsi (&gsi, t,
3826 DECL_P (vback)
3827 && TREE_ADDRESSABLE (vback),
3828 NULL_TREE, true, GSI_SAME_STMT);
3829 assign_stmt = gimple_build_assign (vback, t);
3830 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3831
3832 t = build2 (fd->loop.cond_code, boolean_type_node,
3833 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3834 ? t : vback, e);
3835 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3836 }
3837
3838 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3839 gsi_remove (&gsi, true);
3840
3841 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3842 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3843 }
3844
3845 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3846 gsi = gsi_last_nondebug_bb (exit_bb);
3847 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3848 {
3849 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3850 if (fd->have_reductemp)
3851 {
3852 tree fn;
3853 if (t)
3854 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3855 else
3856 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3857 gcall *g = gimple_build_call (fn, 0);
3858 if (t)
3859 {
3860 gimple_call_set_lhs (g, t);
3861 gsi_insert_after (&gsi, gimple_build_assign (reductions,
3862 NOP_EXPR, t),
3863 GSI_SAME_STMT);
3864 }
3865 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3866 }
3867 else
3868 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3869 }
3870 gsi_remove (&gsi, true);
3871
3872 /* Connect all the blocks. */
3873 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3874 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3875 ep = find_edge (entry_bb, second_bb);
3876 ep->flags = EDGE_TRUE_VALUE;
3877 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3878 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3879 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3880
3881 if (!broken_loop)
3882 {
3883 ep = find_edge (cont_bb, body_bb);
3884 if (ep == NULL)
3885 {
3886 ep = BRANCH_EDGE (cont_bb);
3887 gcc_assert (single_succ (ep->dest) == body_bb);
3888 }
3889 if (gimple_omp_for_combined_p (fd->for_stmt))
3890 {
3891 remove_edge (ep);
3892 ep = NULL;
3893 }
3894 else if (fd->collapse > 1)
3895 {
3896 remove_edge (ep);
3897 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3898 }
3899 else
3900 ep->flags = EDGE_TRUE_VALUE;
3901 find_edge (cont_bb, fin_bb)->flags
3902 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3903 }
3904
3905 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3906 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3907 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3908
3909 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3910 recompute_dominator (CDI_DOMINATORS, body_bb));
3911 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3912 recompute_dominator (CDI_DOMINATORS, fin_bb));
3913
3914 struct loop *loop = body_bb->loop_father;
3915 if (loop != entry_bb->loop_father)
3916 {
3917 gcc_assert (broken_loop || loop->header == body_bb);
3918 gcc_assert (broken_loop
3919 || loop->latch == region->cont
3920 || single_pred (loop->latch) == region->cont);
3921 return;
3922 }
3923
3924 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3925 {
3926 loop = alloc_loop ();
3927 loop->header = body_bb;
3928 if (collapse_bb == NULL)
3929 loop->latch = cont_bb;
3930 add_loop (loop, body_bb->loop_father);
3931 }
3932 }
3933
3934 /* Return phi in E->DEST with ARG on edge E. */
3935
3936 static gphi *
3937 find_phi_with_arg_on_edge (tree arg, edge e)
3938 {
3939 basic_block bb = e->dest;
3940
3941 for (gphi_iterator gpi = gsi_start_phis (bb);
3942 !gsi_end_p (gpi);
3943 gsi_next (&gpi))
3944 {
3945 gphi *phi = gpi.phi ();
3946 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3947 return phi;
3948 }
3949
3950 return NULL;
3951 }
3952
3953 /* A subroutine of expand_omp_for. Generate code for a parallel
3954 loop with static schedule and a specified chunk size. Given
3955 parameters:
3956
3957 for (V = N1; V cond N2; V += STEP) BODY;
3958
3959 where COND is "<" or ">", we generate pseudocode
3960
3961 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3962 if (cond is <)
3963 adj = STEP - 1;
3964 else
3965 adj = STEP + 1;
3966 if ((__typeof (V)) -1 > 0 && cond is >)
3967 n = -(adj + N2 - N1) / -STEP;
3968 else
3969 n = (adj + N2 - N1) / STEP;
3970 trip = 0;
3971 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3972 here so that V is defined
3973 if the loop is not entered
3974 L0:
3975 s0 = (trip * nthreads + threadid) * CHUNK;
3976 e0 = min (s0 + CHUNK, n);
3977 if (s0 < n) goto L1; else goto L4;
3978 L1:
3979 V = s0 * STEP + N1;
3980 e = e0 * STEP + N1;
3981 L2:
3982 BODY;
3983 V += STEP;
3984 if (V cond e) goto L2; else goto L3;
3985 L3:
3986 trip += 1;
3987 goto L0;
3988 L4:
3989 */
3990
3991 static void
3992 expand_omp_for_static_chunk (struct omp_region *region,
3993 struct omp_for_data *fd, gimple *inner_stmt)
3994 {
3995 tree n, s0, e0, e, t;
3996 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3997 tree type, itype, vmain, vback, vextra;
3998 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3999 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4000 gimple_stmt_iterator gsi;
4001 edge se;
4002 bool broken_loop = region->cont == NULL;
4003 tree *counts = NULL;
4004 tree n1, n2, step;
4005 tree reductions = NULL_TREE;
4006
4007 itype = type = TREE_TYPE (fd->loop.v);
4008 if (POINTER_TYPE_P (type))
4009 itype = signed_type_for (type);
4010
4011 entry_bb = region->entry;
4012 se = split_block (entry_bb, last_stmt (entry_bb));
4013 entry_bb = se->src;
4014 iter_part_bb = se->dest;
4015 cont_bb = region->cont;
4016 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4017 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4018 gcc_assert (broken_loop
4019 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4020 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4021 body_bb = single_succ (seq_start_bb);
4022 if (!broken_loop)
4023 {
4024 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4025 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4026 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4027 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4028 }
4029 exit_bb = region->exit;
4030
4031 /* Trip and adjustment setup goes in ENTRY_BB. */
4032 gsi = gsi_last_nondebug_bb (entry_bb);
4033 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4034
4035 if (fd->collapse > 1)
4036 {
4037 int first_zero_iter = -1, dummy = -1;
4038 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4039
4040 counts = XALLOCAVEC (tree, fd->collapse);
4041 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4042 fin_bb, first_zero_iter,
4043 dummy_bb, dummy, l2_dom_bb);
4044 t = NULL_TREE;
4045 }
4046 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4047 t = integer_one_node;
4048 else
4049 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4050 fold_convert (type, fd->loop.n1),
4051 fold_convert (type, fd->loop.n2));
4052 if (fd->collapse == 1
4053 && TYPE_UNSIGNED (type)
4054 && (t == NULL_TREE || !integer_onep (t)))
4055 {
4056 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4057 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4058 true, GSI_SAME_STMT);
4059 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4060 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4061 true, GSI_SAME_STMT);
4062 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4063 NULL_TREE, NULL_TREE);
4064 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4065 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4066 expand_omp_regimplify_p, NULL, NULL)
4067 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4068 expand_omp_regimplify_p, NULL, NULL))
4069 {
4070 gsi = gsi_for_stmt (cond_stmt);
4071 gimple_regimplify_operands (cond_stmt, &gsi);
4072 }
4073 se = split_block (entry_bb, cond_stmt);
4074 se->flags = EDGE_TRUE_VALUE;
4075 entry_bb = se->dest;
4076 se->probability = profile_probability::very_likely ();
4077 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4078 se->probability = profile_probability::very_unlikely ();
4079 if (gimple_in_ssa_p (cfun))
4080 {
4081 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4082 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4083 !gsi_end_p (gpi); gsi_next (&gpi))
4084 {
4085 gphi *phi = gpi.phi ();
4086 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4087 se, UNKNOWN_LOCATION);
4088 }
4089 }
4090 gsi = gsi_last_bb (entry_bb);
4091 }
4092
4093 if (fd->have_reductemp)
4094 {
4095 tree t1 = build_int_cst (long_integer_type_node, 0);
4096 tree t2 = build_int_cst (long_integer_type_node, 1);
4097 tree t3 = build_int_cstu (long_integer_type_node,
4098 (HOST_WIDE_INT_1U << 31) + 1);
4099 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4100 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4101 reductions = OMP_CLAUSE_DECL (clauses);
4102 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4103 gimple *g = SSA_NAME_DEF_STMT (reductions);
4104 reductions = gimple_assign_rhs1 (g);
4105 OMP_CLAUSE_DECL (clauses) = reductions;
4106 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4107 tree t
4108 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4109 9, t1, t2, t2, t3, t1, null_pointer_node,
4110 null_pointer_node, reductions, null_pointer_node);
4111 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4112 true, GSI_SAME_STMT);
4113 gsi_remove (&gsi2, true);
4114 release_ssa_name (gimple_assign_lhs (g));
4115 }
4116 switch (gimple_omp_for_kind (fd->for_stmt))
4117 {
4118 case GF_OMP_FOR_KIND_FOR:
4119 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4120 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4121 break;
4122 case GF_OMP_FOR_KIND_DISTRIBUTE:
4123 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4124 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4125 break;
4126 default:
4127 gcc_unreachable ();
4128 }
4129 nthreads = build_call_expr (nthreads, 0);
4130 nthreads = fold_convert (itype, nthreads);
4131 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4132 true, GSI_SAME_STMT);
4133 threadid = build_call_expr (threadid, 0);
4134 threadid = fold_convert (itype, threadid);
4135 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4136 true, GSI_SAME_STMT);
4137
4138 n1 = fd->loop.n1;
4139 n2 = fd->loop.n2;
4140 step = fd->loop.step;
4141 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4142 {
4143 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4144 OMP_CLAUSE__LOOPTEMP_);
4145 gcc_assert (innerc);
4146 n1 = OMP_CLAUSE_DECL (innerc);
4147 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4148 OMP_CLAUSE__LOOPTEMP_);
4149 gcc_assert (innerc);
4150 n2 = OMP_CLAUSE_DECL (innerc);
4151 }
4152 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4153 true, NULL_TREE, true, GSI_SAME_STMT);
4154 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4155 true, NULL_TREE, true, GSI_SAME_STMT);
4156 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4157 true, NULL_TREE, true, GSI_SAME_STMT);
4158 tree chunk_size = fold_convert (itype, fd->chunk_size);
4159 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4160 chunk_size
4161 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4162 GSI_SAME_STMT);
4163
4164 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4165 t = fold_build2 (PLUS_EXPR, itype, step, t);
4166 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4167 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4168 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4169 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4170 fold_build1 (NEGATE_EXPR, itype, t),
4171 fold_build1 (NEGATE_EXPR, itype, step));
4172 else
4173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4174 t = fold_convert (itype, t);
4175 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4176 true, GSI_SAME_STMT);
4177
4178 trip_var = create_tmp_reg (itype, ".trip");
4179 if (gimple_in_ssa_p (cfun))
4180 {
4181 trip_init = make_ssa_name (trip_var);
4182 trip_main = make_ssa_name (trip_var);
4183 trip_back = make_ssa_name (trip_var);
4184 }
4185 else
4186 {
4187 trip_init = trip_var;
4188 trip_main = trip_var;
4189 trip_back = trip_var;
4190 }
4191
4192 gassign *assign_stmt
4193 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4194 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4195
4196 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4197 t = fold_build2 (MULT_EXPR, itype, t, step);
4198 if (POINTER_TYPE_P (type))
4199 t = fold_build_pointer_plus (n1, t);
4200 else
4201 t = fold_build2 (PLUS_EXPR, type, t, n1);
4202 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4203 true, GSI_SAME_STMT);
4204
4205 /* Remove the GIMPLE_OMP_FOR. */
4206 gsi_remove (&gsi, true);
4207
4208 gimple_stmt_iterator gsif = gsi;
4209
4210 /* Iteration space partitioning goes in ITER_PART_BB. */
4211 gsi = gsi_last_bb (iter_part_bb);
4212
4213 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4214 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4215 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4216 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4217 false, GSI_CONTINUE_LINKING);
4218
4219 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4220 t = fold_build2 (MIN_EXPR, itype, t, n);
4221 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4222 false, GSI_CONTINUE_LINKING);
4223
4224 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4225 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4226
4227 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4228 gsi = gsi_start_bb (seq_start_bb);
4229
4230 tree startvar = fd->loop.v;
4231 tree endvar = NULL_TREE;
4232
4233 if (gimple_omp_for_combined_p (fd->for_stmt))
4234 {
4235 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4236 ? gimple_omp_parallel_clauses (inner_stmt)
4237 : gimple_omp_for_clauses (inner_stmt);
4238 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4239 gcc_assert (innerc);
4240 startvar = OMP_CLAUSE_DECL (innerc);
4241 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4242 OMP_CLAUSE__LOOPTEMP_);
4243 gcc_assert (innerc);
4244 endvar = OMP_CLAUSE_DECL (innerc);
4245 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4246 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4247 {
4248 int i;
4249 for (i = 1; i < fd->collapse; i++)
4250 {
4251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4252 OMP_CLAUSE__LOOPTEMP_);
4253 gcc_assert (innerc);
4254 }
4255 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4256 OMP_CLAUSE__LOOPTEMP_);
4257 if (innerc)
4258 {
4259 /* If needed (distribute parallel for with lastprivate),
4260 propagate down the total number of iterations. */
4261 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4262 fd->loop.n2);
4263 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4264 GSI_CONTINUE_LINKING);
4265 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4266 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4267 }
4268 }
4269 }
4270
4271 t = fold_convert (itype, s0);
4272 t = fold_build2 (MULT_EXPR, itype, t, step);
4273 if (POINTER_TYPE_P (type))
4274 {
4275 t = fold_build_pointer_plus (n1, t);
4276 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4277 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4278 t = fold_convert (signed_type_for (type), t);
4279 }
4280 else
4281 t = fold_build2 (PLUS_EXPR, type, t, n1);
4282 t = fold_convert (TREE_TYPE (startvar), t);
4283 t = force_gimple_operand_gsi (&gsi, t,
4284 DECL_P (startvar)
4285 && TREE_ADDRESSABLE (startvar),
4286 NULL_TREE, false, GSI_CONTINUE_LINKING);
4287 assign_stmt = gimple_build_assign (startvar, t);
4288 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4289
4290 t = fold_convert (itype, e0);
4291 t = fold_build2 (MULT_EXPR, itype, t, step);
4292 if (POINTER_TYPE_P (type))
4293 {
4294 t = fold_build_pointer_plus (n1, t);
4295 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4296 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4297 t = fold_convert (signed_type_for (type), t);
4298 }
4299 else
4300 t = fold_build2 (PLUS_EXPR, type, t, n1);
4301 t = fold_convert (TREE_TYPE (startvar), t);
4302 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4303 false, GSI_CONTINUE_LINKING);
4304 if (endvar)
4305 {
4306 assign_stmt = gimple_build_assign (endvar, e);
4307 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4308 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4309 assign_stmt = gimple_build_assign (fd->loop.v, e);
4310 else
4311 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4312 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4313 }
4314 /* Handle linear clause adjustments. */
4315 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4316 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4317 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4318 c; c = OMP_CLAUSE_CHAIN (c))
4319 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4320 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4321 {
4322 tree d = OMP_CLAUSE_DECL (c);
4323 bool is_ref = omp_is_reference (d);
4324 tree t = d, a, dest;
4325 if (is_ref)
4326 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4327 tree type = TREE_TYPE (t);
4328 if (POINTER_TYPE_P (type))
4329 type = sizetype;
4330 dest = unshare_expr (t);
4331 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4332 expand_omp_build_assign (&gsif, v, t);
4333 if (itercnt == NULL_TREE)
4334 {
4335 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4336 {
4337 itercntbias
4338 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4339 fold_convert (itype, fd->loop.n1));
4340 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4341 itercntbias, step);
4342 itercntbias
4343 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4344 NULL_TREE, true,
4345 GSI_SAME_STMT);
4346 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4347 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4348 NULL_TREE, false,
4349 GSI_CONTINUE_LINKING);
4350 }
4351 else
4352 itercnt = s0;
4353 }
4354 a = fold_build2 (MULT_EXPR, type,
4355 fold_convert (type, itercnt),
4356 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4357 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4358 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4359 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4360 false, GSI_CONTINUE_LINKING);
4361 assign_stmt = gimple_build_assign (dest, t);
4362 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4363 }
4364 if (fd->collapse > 1)
4365 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4366
4367 if (!broken_loop)
4368 {
4369 /* The code controlling the sequential loop goes in CONT_BB,
4370 replacing the GIMPLE_OMP_CONTINUE. */
4371 gsi = gsi_last_nondebug_bb (cont_bb);
4372 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4373 vmain = gimple_omp_continue_control_use (cont_stmt);
4374 vback = gimple_omp_continue_control_def (cont_stmt);
4375
4376 if (!gimple_omp_for_combined_p (fd->for_stmt))
4377 {
4378 if (POINTER_TYPE_P (type))
4379 t = fold_build_pointer_plus (vmain, step);
4380 else
4381 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4382 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4383 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4384 true, GSI_SAME_STMT);
4385 assign_stmt = gimple_build_assign (vback, t);
4386 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4387
4388 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4389 t = build2 (EQ_EXPR, boolean_type_node,
4390 build_int_cst (itype, 0),
4391 build_int_cst (itype, 1));
4392 else
4393 t = build2 (fd->loop.cond_code, boolean_type_node,
4394 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4395 ? t : vback, e);
4396 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4397 }
4398
4399 /* Remove GIMPLE_OMP_CONTINUE. */
4400 gsi_remove (&gsi, true);
4401
4402 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4403 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4404
4405 /* Trip update code goes into TRIP_UPDATE_BB. */
4406 gsi = gsi_start_bb (trip_update_bb);
4407
4408 t = build_int_cst (itype, 1);
4409 t = build2 (PLUS_EXPR, itype, trip_main, t);
4410 assign_stmt = gimple_build_assign (trip_back, t);
4411 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4412 }
4413
4414 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4415 gsi = gsi_last_nondebug_bb (exit_bb);
4416 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4417 {
4418 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4419 if (fd->have_reductemp)
4420 {
4421 tree fn;
4422 if (t)
4423 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4424 else
4425 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4426 gcall *g = gimple_build_call (fn, 0);
4427 if (t)
4428 {
4429 gimple_call_set_lhs (g, t);
4430 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4431 NOP_EXPR, t),
4432 GSI_SAME_STMT);
4433 }
4434 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4435 }
4436 else
4437 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4438 }
4439 gsi_remove (&gsi, true);
4440
4441 /* Connect the new blocks. */
4442 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4443 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4444
4445 if (!broken_loop)
4446 {
4447 se = find_edge (cont_bb, body_bb);
4448 if (se == NULL)
4449 {
4450 se = BRANCH_EDGE (cont_bb);
4451 gcc_assert (single_succ (se->dest) == body_bb);
4452 }
4453 if (gimple_omp_for_combined_p (fd->for_stmt))
4454 {
4455 remove_edge (se);
4456 se = NULL;
4457 }
4458 else if (fd->collapse > 1)
4459 {
4460 remove_edge (se);
4461 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4462 }
4463 else
4464 se->flags = EDGE_TRUE_VALUE;
4465 find_edge (cont_bb, trip_update_bb)->flags
4466 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4467
4468 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4469 iter_part_bb);
4470 }
4471
4472 if (gimple_in_ssa_p (cfun))
4473 {
4474 gphi_iterator psi;
4475 gphi *phi;
4476 edge re, ene;
4477 edge_var_map *vm;
4478 size_t i;
4479
4480 gcc_assert (fd->collapse == 1 && !broken_loop);
4481
4482 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4483 remove arguments of the phi nodes in fin_bb. We need to create
4484 appropriate phi nodes in iter_part_bb instead. */
4485 se = find_edge (iter_part_bb, fin_bb);
4486 re = single_succ_edge (trip_update_bb);
4487 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4488 ene = single_succ_edge (entry_bb);
4489
4490 psi = gsi_start_phis (fin_bb);
4491 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4492 gsi_next (&psi), ++i)
4493 {
4494 gphi *nphi;
4495 location_t locus;
4496
4497 phi = psi.phi ();
4498 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4499 redirect_edge_var_map_def (vm), 0))
4500 continue;
4501
4502 t = gimple_phi_result (phi);
4503 gcc_assert (t == redirect_edge_var_map_result (vm));
4504
4505 if (!single_pred_p (fin_bb))
4506 t = copy_ssa_name (t, phi);
4507
4508 nphi = create_phi_node (t, iter_part_bb);
4509
4510 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4511 locus = gimple_phi_arg_location_from_edge (phi, se);
4512
4513 /* A special case -- fd->loop.v is not yet computed in
4514 iter_part_bb, we need to use vextra instead. */
4515 if (t == fd->loop.v)
4516 t = vextra;
4517 add_phi_arg (nphi, t, ene, locus);
4518 locus = redirect_edge_var_map_location (vm);
4519 tree back_arg = redirect_edge_var_map_def (vm);
4520 add_phi_arg (nphi, back_arg, re, locus);
4521 edge ce = find_edge (cont_bb, body_bb);
4522 if (ce == NULL)
4523 {
4524 ce = BRANCH_EDGE (cont_bb);
4525 gcc_assert (single_succ (ce->dest) == body_bb);
4526 ce = single_succ_edge (ce->dest);
4527 }
4528 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4529 gcc_assert (inner_loop_phi != NULL);
4530 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4531 find_edge (seq_start_bb, body_bb), locus);
4532
4533 if (!single_pred_p (fin_bb))
4534 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4535 }
4536 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4537 redirect_edge_var_map_clear (re);
4538 if (single_pred_p (fin_bb))
4539 while (1)
4540 {
4541 psi = gsi_start_phis (fin_bb);
4542 if (gsi_end_p (psi))
4543 break;
4544 remove_phi_node (&psi, false);
4545 }
4546
4547 /* Make phi node for trip. */
4548 phi = create_phi_node (trip_main, iter_part_bb);
4549 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4550 UNKNOWN_LOCATION);
4551 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4552 UNKNOWN_LOCATION);
4553 }
4554
4555 if (!broken_loop)
4556 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4557 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4558 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4559 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4560 recompute_dominator (CDI_DOMINATORS, fin_bb));
4561 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4562 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4563 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4564 recompute_dominator (CDI_DOMINATORS, body_bb));
4565
4566 if (!broken_loop)
4567 {
4568 struct loop *loop = body_bb->loop_father;
4569 struct loop *trip_loop = alloc_loop ();
4570 trip_loop->header = iter_part_bb;
4571 trip_loop->latch = trip_update_bb;
4572 add_loop (trip_loop, iter_part_bb->loop_father);
4573
4574 if (loop != entry_bb->loop_father)
4575 {
4576 gcc_assert (loop->header == body_bb);
4577 gcc_assert (loop->latch == region->cont
4578 || single_pred (loop->latch) == region->cont);
4579 trip_loop->inner = loop;
4580 return;
4581 }
4582
4583 if (!gimple_omp_for_combined_p (fd->for_stmt))
4584 {
4585 loop = alloc_loop ();
4586 loop->header = body_bb;
4587 if (collapse_bb == NULL)
4588 loop->latch = cont_bb;
4589 add_loop (loop, trip_loop);
4590 }
4591 }
4592 }
4593
4594 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4595 loop. Given parameters:
4596
4597 for (V = N1; V cond N2; V += STEP) BODY;
4598
4599 where COND is "<" or ">", we generate pseudocode
4600
4601 V = N1;
4602 goto L1;
4603 L0:
4604 BODY;
4605 V += STEP;
4606 L1:
4607 if (V cond N2) goto L0; else goto L2;
4608 L2:
4609
4610 For collapsed loops, given parameters:
4611 collapse(3)
4612 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4613 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4614 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4615 BODY;
4616
4617 we generate pseudocode
4618
4619 if (cond3 is <)
4620 adj = STEP3 - 1;
4621 else
4622 adj = STEP3 + 1;
4623 count3 = (adj + N32 - N31) / STEP3;
4624 if (cond2 is <)
4625 adj = STEP2 - 1;
4626 else
4627 adj = STEP2 + 1;
4628 count2 = (adj + N22 - N21) / STEP2;
4629 if (cond1 is <)
4630 adj = STEP1 - 1;
4631 else
4632 adj = STEP1 + 1;
4633 count1 = (adj + N12 - N11) / STEP1;
4634 count = count1 * count2 * count3;
4635 V = 0;
4636 V1 = N11;
4637 V2 = N21;
4638 V3 = N31;
4639 goto L1;
4640 L0:
4641 BODY;
4642 V += 1;
4643 V3 += STEP3;
4644 V2 += (V3 cond3 N32) ? 0 : STEP2;
4645 V3 = (V3 cond3 N32) ? V3 : N31;
4646 V1 += (V2 cond2 N22) ? 0 : STEP1;
4647 V2 = (V2 cond2 N22) ? V2 : N21;
4648 L1:
4649 if (V < count) goto L0; else goto L2;
4650 L2:
4651
4652 */
4653
4654 static void
4655 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4656 {
4657 tree type, t;
4658 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4659 gimple_stmt_iterator gsi;
4660 gimple *stmt;
4661 gcond *cond_stmt;
4662 bool broken_loop = region->cont == NULL;
4663 edge e, ne;
4664 tree *counts = NULL;
4665 int i;
4666 int safelen_int = INT_MAX;
4667 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4668 OMP_CLAUSE_SAFELEN);
4669 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4670 OMP_CLAUSE__SIMDUID_);
4671 tree n1, n2;
4672
4673 if (safelen)
4674 {
4675 poly_uint64 val;
4676 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4677 if (!poly_int_tree_p (safelen, &val))
4678 safelen_int = 0;
4679 else
4680 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4681 if (safelen_int == 1)
4682 safelen_int = 0;
4683 }
4684 type = TREE_TYPE (fd->loop.v);
4685 entry_bb = region->entry;
4686 cont_bb = region->cont;
4687 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4688 gcc_assert (broken_loop
4689 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4690 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4691 if (!broken_loop)
4692 {
4693 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4694 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4695 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4696 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4697 }
4698 else
4699 {
4700 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4701 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4702 l2_bb = single_succ (l1_bb);
4703 }
4704 exit_bb = region->exit;
4705 l2_dom_bb = NULL;
4706
4707 gsi = gsi_last_nondebug_bb (entry_bb);
4708
4709 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4710 /* Not needed in SSA form right now. */
4711 gcc_assert (!gimple_in_ssa_p (cfun));
4712 if (fd->collapse > 1)
4713 {
4714 int first_zero_iter = -1, dummy = -1;
4715 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4716
4717 counts = XALLOCAVEC (tree, fd->collapse);
4718 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4719 zero_iter_bb, first_zero_iter,
4720 dummy_bb, dummy, l2_dom_bb);
4721 }
4722 if (l2_dom_bb == NULL)
4723 l2_dom_bb = l1_bb;
4724
4725 n1 = fd->loop.n1;
4726 n2 = fd->loop.n2;
4727 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4728 {
4729 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4730 OMP_CLAUSE__LOOPTEMP_);
4731 gcc_assert (innerc);
4732 n1 = OMP_CLAUSE_DECL (innerc);
4733 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4734 OMP_CLAUSE__LOOPTEMP_);
4735 gcc_assert (innerc);
4736 n2 = OMP_CLAUSE_DECL (innerc);
4737 }
4738 tree step = fd->loop.step;
4739
4740 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4741 OMP_CLAUSE__SIMT_);
4742 if (is_simt)
4743 {
4744 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4745 is_simt = safelen_int > 1;
4746 }
4747 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4748 if (is_simt)
4749 {
4750 simt_lane = create_tmp_var (unsigned_type_node);
4751 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4752 gimple_call_set_lhs (g, simt_lane);
4753 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4754 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4755 fold_convert (TREE_TYPE (step), simt_lane));
4756 n1 = fold_convert (type, n1);
4757 if (POINTER_TYPE_P (type))
4758 n1 = fold_build_pointer_plus (n1, offset);
4759 else
4760 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4761
4762 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4763 if (fd->collapse > 1)
4764 simt_maxlane = build_one_cst (unsigned_type_node);
4765 else if (safelen_int < omp_max_simt_vf ())
4766 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4767 tree vf
4768 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4769 unsigned_type_node, 0);
4770 if (simt_maxlane)
4771 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4772 vf = fold_convert (TREE_TYPE (step), vf);
4773 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4774 }
4775
4776 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4777 if (fd->collapse > 1)
4778 {
4779 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4780 {
4781 gsi_prev (&gsi);
4782 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4783 gsi_next (&gsi);
4784 }
4785 else
4786 for (i = 0; i < fd->collapse; i++)
4787 {
4788 tree itype = TREE_TYPE (fd->loops[i].v);
4789 if (POINTER_TYPE_P (itype))
4790 itype = signed_type_for (itype);
4791 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4792 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4793 }
4794 }
4795
4796 /* Remove the GIMPLE_OMP_FOR statement. */
4797 gsi_remove (&gsi, true);
4798
4799 if (!broken_loop)
4800 {
4801 /* Code to control the increment goes in the CONT_BB. */
4802 gsi = gsi_last_nondebug_bb (cont_bb);
4803 stmt = gsi_stmt (gsi);
4804 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4805
4806 if (POINTER_TYPE_P (type))
4807 t = fold_build_pointer_plus (fd->loop.v, step);
4808 else
4809 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4810 expand_omp_build_assign (&gsi, fd->loop.v, t);
4811
4812 if (fd->collapse > 1)
4813 {
4814 i = fd->collapse - 1;
4815 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4816 {
4817 t = fold_convert (sizetype, fd->loops[i].step);
4818 t = fold_build_pointer_plus (fd->loops[i].v, t);
4819 }
4820 else
4821 {
4822 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4823 fd->loops[i].step);
4824 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4825 fd->loops[i].v, t);
4826 }
4827 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4828
4829 for (i = fd->collapse - 1; i > 0; i--)
4830 {
4831 tree itype = TREE_TYPE (fd->loops[i].v);
4832 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4833 if (POINTER_TYPE_P (itype2))
4834 itype2 = signed_type_for (itype2);
4835 t = fold_convert (itype2, fd->loops[i - 1].step);
4836 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4837 GSI_SAME_STMT);
4838 t = build3 (COND_EXPR, itype2,
4839 build2 (fd->loops[i].cond_code, boolean_type_node,
4840 fd->loops[i].v,
4841 fold_convert (itype, fd->loops[i].n2)),
4842 build_int_cst (itype2, 0), t);
4843 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4844 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4845 else
4846 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4847 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4848
4849 t = fold_convert (itype, fd->loops[i].n1);
4850 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4851 GSI_SAME_STMT);
4852 t = build3 (COND_EXPR, itype,
4853 build2 (fd->loops[i].cond_code, boolean_type_node,
4854 fd->loops[i].v,
4855 fold_convert (itype, fd->loops[i].n2)),
4856 fd->loops[i].v, t);
4857 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4858 }
4859 }
4860
4861 /* Remove GIMPLE_OMP_CONTINUE. */
4862 gsi_remove (&gsi, true);
4863 }
4864
4865 /* Emit the condition in L1_BB. */
4866 gsi = gsi_start_bb (l1_bb);
4867
4868 t = fold_convert (type, n2);
4869 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4870 false, GSI_CONTINUE_LINKING);
4871 tree v = fd->loop.v;
4872 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4873 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4874 false, GSI_CONTINUE_LINKING);
4875 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4876 cond_stmt = gimple_build_cond_empty (t);
4877 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4878 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4879 NULL, NULL)
4880 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4881 NULL, NULL))
4882 {
4883 gsi = gsi_for_stmt (cond_stmt);
4884 gimple_regimplify_operands (cond_stmt, &gsi);
4885 }
4886
4887 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4888 if (is_simt)
4889 {
4890 gsi = gsi_start_bb (l2_bb);
4891 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4892 if (POINTER_TYPE_P (type))
4893 t = fold_build_pointer_plus (fd->loop.v, step);
4894 else
4895 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4896 expand_omp_build_assign (&gsi, fd->loop.v, t);
4897 }
4898
4899 /* Remove GIMPLE_OMP_RETURN. */
4900 gsi = gsi_last_nondebug_bb (exit_bb);
4901 gsi_remove (&gsi, true);
4902
4903 /* Connect the new blocks. */
4904 remove_edge (FALLTHRU_EDGE (entry_bb));
4905
4906 if (!broken_loop)
4907 {
4908 remove_edge (BRANCH_EDGE (entry_bb));
4909 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4910
4911 e = BRANCH_EDGE (l1_bb);
4912 ne = FALLTHRU_EDGE (l1_bb);
4913 e->flags = EDGE_TRUE_VALUE;
4914 }
4915 else
4916 {
4917 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4918
4919 ne = single_succ_edge (l1_bb);
4920 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4921
4922 }
4923 ne->flags = EDGE_FALSE_VALUE;
4924 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4925 ne->probability = e->probability.invert ();
4926
4927 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4928 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4929
4930 if (simt_maxlane)
4931 {
4932 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4933 NULL_TREE, NULL_TREE);
4934 gsi = gsi_last_bb (entry_bb);
4935 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4936 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4937 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4938 FALLTHRU_EDGE (entry_bb)->probability
4939 = profile_probability::guessed_always ().apply_scale (7, 8);
4940 BRANCH_EDGE (entry_bb)->probability
4941 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4942 l2_dom_bb = entry_bb;
4943 }
4944 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4945
4946 if (!broken_loop)
4947 {
4948 struct loop *loop = alloc_loop ();
4949 loop->header = l1_bb;
4950 loop->latch = cont_bb;
4951 add_loop (loop, l1_bb->loop_father);
4952 loop->safelen = safelen_int;
4953 if (simduid)
4954 {
4955 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4956 cfun->has_simduid_loops = true;
4957 }
4958 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4959 the loop. */
4960 if ((flag_tree_loop_vectorize
4961 || !global_options_set.x_flag_tree_loop_vectorize)
4962 && flag_tree_loop_optimize
4963 && loop->safelen > 1)
4964 {
4965 loop->force_vectorize = true;
4966 cfun->has_force_vectorize_loops = true;
4967 }
4968 }
4969 else if (simduid)
4970 cfun->has_simduid_loops = true;
4971 }
4972
4973 /* Taskloop construct is represented after gimplification with
4974 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4975 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4976 which should just compute all the needed loop temporaries
4977 for GIMPLE_OMP_TASK. */
4978
4979 static void
4980 expand_omp_taskloop_for_outer (struct omp_region *region,
4981 struct omp_for_data *fd,
4982 gimple *inner_stmt)
4983 {
4984 tree type, bias = NULL_TREE;
4985 basic_block entry_bb, cont_bb, exit_bb;
4986 gimple_stmt_iterator gsi;
4987 gassign *assign_stmt;
4988 tree *counts = NULL;
4989 int i;
4990
4991 gcc_assert (inner_stmt);
4992 gcc_assert (region->cont);
4993 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4994 && gimple_omp_task_taskloop_p (inner_stmt));
4995 type = TREE_TYPE (fd->loop.v);
4996
4997 /* See if we need to bias by LLONG_MIN. */
4998 if (fd->iter_type == long_long_unsigned_type_node
4999 && TREE_CODE (type) == INTEGER_TYPE
5000 && !TYPE_UNSIGNED (type))
5001 {
5002 tree n1, n2;
5003
5004 if (fd->loop.cond_code == LT_EXPR)
5005 {
5006 n1 = fd->loop.n1;
5007 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5008 }
5009 else
5010 {
5011 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5012 n2 = fd->loop.n1;
5013 }
5014 if (TREE_CODE (n1) != INTEGER_CST
5015 || TREE_CODE (n2) != INTEGER_CST
5016 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5017 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5018 }
5019
5020 entry_bb = region->entry;
5021 cont_bb = region->cont;
5022 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5023 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5024 exit_bb = region->exit;
5025
5026 gsi = gsi_last_nondebug_bb (entry_bb);
5027 gimple *for_stmt = gsi_stmt (gsi);
5028 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5029 if (fd->collapse > 1)
5030 {
5031 int first_zero_iter = -1, dummy = -1;
5032 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5033
5034 counts = XALLOCAVEC (tree, fd->collapse);
5035 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5036 zero_iter_bb, first_zero_iter,
5037 dummy_bb, dummy, l2_dom_bb);
5038
5039 if (zero_iter_bb)
5040 {
5041 /* Some counts[i] vars might be uninitialized if
5042 some loop has zero iterations. But the body shouldn't
5043 be executed in that case, so just avoid uninit warnings. */
5044 for (i = first_zero_iter; i < fd->collapse; i++)
5045 if (SSA_VAR_P (counts[i]))
5046 TREE_NO_WARNING (counts[i]) = 1;
5047 gsi_prev (&gsi);
5048 edge e = split_block (entry_bb, gsi_stmt (gsi));
5049 entry_bb = e->dest;
5050 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5051 gsi = gsi_last_bb (entry_bb);
5052 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5053 get_immediate_dominator (CDI_DOMINATORS,
5054 zero_iter_bb));
5055 }
5056 }
5057
5058 tree t0, t1;
5059 t1 = fd->loop.n2;
5060 t0 = fd->loop.n1;
5061 if (POINTER_TYPE_P (TREE_TYPE (t0))
5062 && TYPE_PRECISION (TREE_TYPE (t0))
5063 != TYPE_PRECISION (fd->iter_type))
5064 {
5065 /* Avoid casting pointers to integer of a different size. */
5066 tree itype = signed_type_for (type);
5067 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5068 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5069 }
5070 else
5071 {
5072 t1 = fold_convert (fd->iter_type, t1);
5073 t0 = fold_convert (fd->iter_type, t0);
5074 }
5075 if (bias)
5076 {
5077 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5078 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5079 }
5080
5081 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5082 OMP_CLAUSE__LOOPTEMP_);
5083 gcc_assert (innerc);
5084 tree startvar = OMP_CLAUSE_DECL (innerc);
5085 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5086 gcc_assert (innerc);
5087 tree endvar = OMP_CLAUSE_DECL (innerc);
5088 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5089 {
5090 gcc_assert (innerc);
5091 for (i = 1; i < fd->collapse; i++)
5092 {
5093 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5094 OMP_CLAUSE__LOOPTEMP_);
5095 gcc_assert (innerc);
5096 }
5097 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5098 OMP_CLAUSE__LOOPTEMP_);
5099 if (innerc)
5100 {
5101 /* If needed (inner taskloop has lastprivate clause), propagate
5102 down the total number of iterations. */
5103 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5104 NULL_TREE, false,
5105 GSI_CONTINUE_LINKING);
5106 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5108 }
5109 }
5110
5111 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5112 GSI_CONTINUE_LINKING);
5113 assign_stmt = gimple_build_assign (startvar, t0);
5114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5115
5116 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5117 GSI_CONTINUE_LINKING);
5118 assign_stmt = gimple_build_assign (endvar, t1);
5119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5120 if (fd->collapse > 1)
5121 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5122
5123 /* Remove the GIMPLE_OMP_FOR statement. */
5124 gsi = gsi_for_stmt (for_stmt);
5125 gsi_remove (&gsi, true);
5126
5127 gsi = gsi_last_nondebug_bb (cont_bb);
5128 gsi_remove (&gsi, true);
5129
5130 gsi = gsi_last_nondebug_bb (exit_bb);
5131 gsi_remove (&gsi, true);
5132
5133 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5134 remove_edge (BRANCH_EDGE (entry_bb));
5135 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5136 remove_edge (BRANCH_EDGE (cont_bb));
5137 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5138 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5139 recompute_dominator (CDI_DOMINATORS, region->entry));
5140 }
5141
5142 /* Taskloop construct is represented after gimplification with
5143 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5144 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5145 GOMP_taskloop{,_ull} function arranges for each task to be given just
5146 a single range of iterations. */
5147
5148 static void
5149 expand_omp_taskloop_for_inner (struct omp_region *region,
5150 struct omp_for_data *fd,
5151 gimple *inner_stmt)
5152 {
5153 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5154 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5155 basic_block fin_bb;
5156 gimple_stmt_iterator gsi;
5157 edge ep;
5158 bool broken_loop = region->cont == NULL;
5159 tree *counts = NULL;
5160 tree n1, n2, step;
5161
5162 itype = type = TREE_TYPE (fd->loop.v);
5163 if (POINTER_TYPE_P (type))
5164 itype = signed_type_for (type);
5165
5166 /* See if we need to bias by LLONG_MIN. */
5167 if (fd->iter_type == long_long_unsigned_type_node
5168 && TREE_CODE (type) == INTEGER_TYPE
5169 && !TYPE_UNSIGNED (type))
5170 {
5171 tree n1, n2;
5172
5173 if (fd->loop.cond_code == LT_EXPR)
5174 {
5175 n1 = fd->loop.n1;
5176 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5177 }
5178 else
5179 {
5180 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5181 n2 = fd->loop.n1;
5182 }
5183 if (TREE_CODE (n1) != INTEGER_CST
5184 || TREE_CODE (n2) != INTEGER_CST
5185 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5186 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5187 }
5188
5189 entry_bb = region->entry;
5190 cont_bb = region->cont;
5191 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5192 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5193 gcc_assert (broken_loop
5194 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5195 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5196 if (!broken_loop)
5197 {
5198 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5199 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5200 }
5201 exit_bb = region->exit;
5202
5203 /* Iteration space partitioning goes in ENTRY_BB. */
5204 gsi = gsi_last_nondebug_bb (entry_bb);
5205 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5206
5207 if (fd->collapse > 1)
5208 {
5209 int first_zero_iter = -1, dummy = -1;
5210 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5211
5212 counts = XALLOCAVEC (tree, fd->collapse);
5213 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5214 fin_bb, first_zero_iter,
5215 dummy_bb, dummy, l2_dom_bb);
5216 t = NULL_TREE;
5217 }
5218 else
5219 t = integer_one_node;
5220
5221 step = fd->loop.step;
5222 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5223 OMP_CLAUSE__LOOPTEMP_);
5224 gcc_assert (innerc);
5225 n1 = OMP_CLAUSE_DECL (innerc);
5226 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5227 gcc_assert (innerc);
5228 n2 = OMP_CLAUSE_DECL (innerc);
5229 if (bias)
5230 {
5231 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5232 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5233 }
5234 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5235 true, NULL_TREE, true, GSI_SAME_STMT);
5236 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5237 true, NULL_TREE, true, GSI_SAME_STMT);
5238 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5239 true, NULL_TREE, true, GSI_SAME_STMT);
5240
5241 tree startvar = fd->loop.v;
5242 tree endvar = NULL_TREE;
5243
5244 if (gimple_omp_for_combined_p (fd->for_stmt))
5245 {
5246 tree clauses = gimple_omp_for_clauses (inner_stmt);
5247 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5248 gcc_assert (innerc);
5249 startvar = OMP_CLAUSE_DECL (innerc);
5250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5251 OMP_CLAUSE__LOOPTEMP_);
5252 gcc_assert (innerc);
5253 endvar = OMP_CLAUSE_DECL (innerc);
5254 }
5255 t = fold_convert (TREE_TYPE (startvar), n1);
5256 t = force_gimple_operand_gsi (&gsi, t,
5257 DECL_P (startvar)
5258 && TREE_ADDRESSABLE (startvar),
5259 NULL_TREE, false, GSI_CONTINUE_LINKING);
5260 gimple *assign_stmt = gimple_build_assign (startvar, t);
5261 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5262
5263 t = fold_convert (TREE_TYPE (startvar), n2);
5264 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5265 false, GSI_CONTINUE_LINKING);
5266 if (endvar)
5267 {
5268 assign_stmt = gimple_build_assign (endvar, e);
5269 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5270 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5271 assign_stmt = gimple_build_assign (fd->loop.v, e);
5272 else
5273 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5274 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5275 }
5276 if (fd->collapse > 1)
5277 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5278
5279 if (!broken_loop)
5280 {
5281 /* The code controlling the sequential loop replaces the
5282 GIMPLE_OMP_CONTINUE. */
5283 gsi = gsi_last_nondebug_bb (cont_bb);
5284 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5285 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5286 vmain = gimple_omp_continue_control_use (cont_stmt);
5287 vback = gimple_omp_continue_control_def (cont_stmt);
5288
5289 if (!gimple_omp_for_combined_p (fd->for_stmt))
5290 {
5291 if (POINTER_TYPE_P (type))
5292 t = fold_build_pointer_plus (vmain, step);
5293 else
5294 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5295 t = force_gimple_operand_gsi (&gsi, t,
5296 DECL_P (vback)
5297 && TREE_ADDRESSABLE (vback),
5298 NULL_TREE, true, GSI_SAME_STMT);
5299 assign_stmt = gimple_build_assign (vback, t);
5300 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5301
5302 t = build2 (fd->loop.cond_code, boolean_type_node,
5303 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5304 ? t : vback, e);
5305 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5306 }
5307
5308 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5309 gsi_remove (&gsi, true);
5310
5311 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5312 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5313 }
5314
5315 /* Remove the GIMPLE_OMP_FOR statement. */
5316 gsi = gsi_for_stmt (fd->for_stmt);
5317 gsi_remove (&gsi, true);
5318
5319 /* Remove the GIMPLE_OMP_RETURN statement. */
5320 gsi = gsi_last_nondebug_bb (exit_bb);
5321 gsi_remove (&gsi, true);
5322
5323 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5324 if (!broken_loop)
5325 remove_edge (BRANCH_EDGE (entry_bb));
5326 else
5327 {
5328 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5329 region->outer->cont = NULL;
5330 }
5331
5332 /* Connect all the blocks. */
5333 if (!broken_loop)
5334 {
5335 ep = find_edge (cont_bb, body_bb);
5336 if (gimple_omp_for_combined_p (fd->for_stmt))
5337 {
5338 remove_edge (ep);
5339 ep = NULL;
5340 }
5341 else if (fd->collapse > 1)
5342 {
5343 remove_edge (ep);
5344 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5345 }
5346 else
5347 ep->flags = EDGE_TRUE_VALUE;
5348 find_edge (cont_bb, fin_bb)->flags
5349 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5350 }
5351
5352 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5353 recompute_dominator (CDI_DOMINATORS, body_bb));
5354 if (!broken_loop)
5355 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5356 recompute_dominator (CDI_DOMINATORS, fin_bb));
5357
5358 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5359 {
5360 struct loop *loop = alloc_loop ();
5361 loop->header = body_bb;
5362 if (collapse_bb == NULL)
5363 loop->latch = cont_bb;
5364 add_loop (loop, body_bb->loop_father);
5365 }
5366 }
5367
5368 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5369 partitioned loop. The lowering here is abstracted, in that the
5370 loop parameters are passed through internal functions, which are
5371 further lowered by oacc_device_lower, once we get to the target
5372 compiler. The loop is of the form:
5373
5374 for (V = B; V LTGT E; V += S) {BODY}
5375
5376 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5377 (constant 0 for no chunking) and we will have a GWV partitioning
5378 mask, specifying dimensions over which the loop is to be
5379 partitioned (see note below). We generate code that looks like
5380 (this ignores tiling):
5381
5382 <entry_bb> [incoming FALL->body, BRANCH->exit]
5383 typedef signedintify (typeof (V)) T; // underlying signed integral type
5384 T range = E - B;
5385 T chunk_no = 0;
5386 T DIR = LTGT == '<' ? +1 : -1;
5387 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5388 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5389
5390 <head_bb> [created by splitting end of entry_bb]
5391 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5392 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5393 if (!(offset LTGT bound)) goto bottom_bb;
5394
5395 <body_bb> [incoming]
5396 V = B + offset;
5397 {BODY}
5398
5399 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5400 offset += step;
5401 if (offset LTGT bound) goto body_bb; [*]
5402
5403 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5404 chunk_no++;
5405 if (chunk < chunk_max) goto head_bb;
5406
5407 <exit_bb> [incoming]
5408 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5409
5410 [*] Needed if V live at end of loop. */
5411
5412 static void
5413 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5414 {
5415 tree v = fd->loop.v;
5416 enum tree_code cond_code = fd->loop.cond_code;
5417 enum tree_code plus_code = PLUS_EXPR;
5418
5419 tree chunk_size = integer_minus_one_node;
5420 tree gwv = integer_zero_node;
5421 tree iter_type = TREE_TYPE (v);
5422 tree diff_type = iter_type;
5423 tree plus_type = iter_type;
5424 struct oacc_collapse *counts = NULL;
5425
5426 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5427 == GF_OMP_FOR_KIND_OACC_LOOP);
5428 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5429 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5430
5431 if (POINTER_TYPE_P (iter_type))
5432 {
5433 plus_code = POINTER_PLUS_EXPR;
5434 plus_type = sizetype;
5435 }
5436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5437 diff_type = signed_type_for (diff_type);
5438 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5439 diff_type = integer_type_node;
5440
5441 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5442 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5443 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5444 basic_block bottom_bb = NULL;
5445
5446 /* entry_bb has two sucessors; the branch edge is to the exit
5447 block, fallthrough edge to body. */
5448 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5449 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5450
5451 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5452 body_bb, or to a block whose only successor is the body_bb. Its
5453 fallthrough successor is the final block (same as the branch
5454 successor of the entry_bb). */
5455 if (cont_bb)
5456 {
5457 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5458 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5459
5460 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5461 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5462 }
5463 else
5464 gcc_assert (!gimple_in_ssa_p (cfun));
5465
5466 /* The exit block only has entry_bb and cont_bb as predecessors. */
5467 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5468
5469 tree chunk_no;
5470 tree chunk_max = NULL_TREE;
5471 tree bound, offset;
5472 tree step = create_tmp_var (diff_type, ".step");
5473 bool up = cond_code == LT_EXPR;
5474 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5475 bool chunking = !gimple_in_ssa_p (cfun);
5476 bool negating;
5477
5478 /* Tiling vars. */
5479 tree tile_size = NULL_TREE;
5480 tree element_s = NULL_TREE;
5481 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5482 basic_block elem_body_bb = NULL;
5483 basic_block elem_cont_bb = NULL;
5484
5485 /* SSA instances. */
5486 tree offset_incr = NULL_TREE;
5487 tree offset_init = NULL_TREE;
5488
5489 gimple_stmt_iterator gsi;
5490 gassign *ass;
5491 gcall *call;
5492 gimple *stmt;
5493 tree expr;
5494 location_t loc;
5495 edge split, be, fte;
5496
5497 /* Split the end of entry_bb to create head_bb. */
5498 split = split_block (entry_bb, last_stmt (entry_bb));
5499 basic_block head_bb = split->dest;
5500 entry_bb = split->src;
5501
5502 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5503 gsi = gsi_last_nondebug_bb (entry_bb);
5504 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5505 loc = gimple_location (for_stmt);
5506
5507 if (gimple_in_ssa_p (cfun))
5508 {
5509 offset_init = gimple_omp_for_index (for_stmt, 0);
5510 gcc_assert (integer_zerop (fd->loop.n1));
5511 /* The SSA parallelizer does gang parallelism. */
5512 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5513 }
5514
5515 if (fd->collapse > 1 || fd->tiling)
5516 {
5517 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5518 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5519 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5520 TREE_TYPE (fd->loop.n2), loc);
5521
5522 if (SSA_VAR_P (fd->loop.n2))
5523 {
5524 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5525 true, GSI_SAME_STMT);
5526 ass = gimple_build_assign (fd->loop.n2, total);
5527 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5528 }
5529 }
5530
5531 tree b = fd->loop.n1;
5532 tree e = fd->loop.n2;
5533 tree s = fd->loop.step;
5534
5535 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5536 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5537
5538 /* Convert the step, avoiding possible unsigned->signed overflow. */
5539 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5540 if (negating)
5541 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5542 s = fold_convert (diff_type, s);
5543 if (negating)
5544 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5545 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5546
5547 if (!chunking)
5548 chunk_size = integer_zero_node;
5549 expr = fold_convert (diff_type, chunk_size);
5550 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5551 NULL_TREE, true, GSI_SAME_STMT);
5552
5553 if (fd->tiling)
5554 {
5555 /* Determine the tile size and element step,
5556 modify the outer loop step size. */
5557 tile_size = create_tmp_var (diff_type, ".tile_size");
5558 expr = build_int_cst (diff_type, 1);
5559 for (int ix = 0; ix < fd->collapse; ix++)
5560 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5561 expr = force_gimple_operand_gsi (&gsi, expr, true,
5562 NULL_TREE, true, GSI_SAME_STMT);
5563 ass = gimple_build_assign (tile_size, expr);
5564 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5565
5566 element_s = create_tmp_var (diff_type, ".element_s");
5567 ass = gimple_build_assign (element_s, s);
5568 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5569
5570 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5571 s = force_gimple_operand_gsi (&gsi, expr, true,
5572 NULL_TREE, true, GSI_SAME_STMT);
5573 }
5574
5575 /* Determine the range, avoiding possible unsigned->signed overflow. */
5576 negating = !up && TYPE_UNSIGNED (iter_type);
5577 expr = fold_build2 (MINUS_EXPR, plus_type,
5578 fold_convert (plus_type, negating ? b : e),
5579 fold_convert (plus_type, negating ? e : b));
5580 expr = fold_convert (diff_type, expr);
5581 if (negating)
5582 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5583 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5584 NULL_TREE, true, GSI_SAME_STMT);
5585
5586 chunk_no = build_int_cst (diff_type, 0);
5587 if (chunking)
5588 {
5589 gcc_assert (!gimple_in_ssa_p (cfun));
5590
5591 expr = chunk_no;
5592 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5593 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5594
5595 ass = gimple_build_assign (chunk_no, expr);
5596 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5597
5598 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5599 build_int_cst (integer_type_node,
5600 IFN_GOACC_LOOP_CHUNKS),
5601 dir, range, s, chunk_size, gwv);
5602 gimple_call_set_lhs (call, chunk_max);
5603 gimple_set_location (call, loc);
5604 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5605 }
5606 else
5607 chunk_size = chunk_no;
5608
5609 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5610 build_int_cst (integer_type_node,
5611 IFN_GOACC_LOOP_STEP),
5612 dir, range, s, chunk_size, gwv);
5613 gimple_call_set_lhs (call, step);
5614 gimple_set_location (call, loc);
5615 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5616
5617 /* Remove the GIMPLE_OMP_FOR. */
5618 gsi_remove (&gsi, true);
5619
5620 /* Fixup edges from head_bb. */
5621 be = BRANCH_EDGE (head_bb);
5622 fte = FALLTHRU_EDGE (head_bb);
5623 be->flags |= EDGE_FALSE_VALUE;
5624 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5625
5626 basic_block body_bb = fte->dest;
5627
5628 if (gimple_in_ssa_p (cfun))
5629 {
5630 gsi = gsi_last_nondebug_bb (cont_bb);
5631 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5632
5633 offset = gimple_omp_continue_control_use (cont_stmt);
5634 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5635 }
5636 else
5637 {
5638 offset = create_tmp_var (diff_type, ".offset");
5639 offset_init = offset_incr = offset;
5640 }
5641 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5642
5643 /* Loop offset & bound go into head_bb. */
5644 gsi = gsi_start_bb (head_bb);
5645
5646 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5647 build_int_cst (integer_type_node,
5648 IFN_GOACC_LOOP_OFFSET),
5649 dir, range, s,
5650 chunk_size, gwv, chunk_no);
5651 gimple_call_set_lhs (call, offset_init);
5652 gimple_set_location (call, loc);
5653 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5654
5655 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5656 build_int_cst (integer_type_node,
5657 IFN_GOACC_LOOP_BOUND),
5658 dir, range, s,
5659 chunk_size, gwv, offset_init);
5660 gimple_call_set_lhs (call, bound);
5661 gimple_set_location (call, loc);
5662 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5663
5664 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5665 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5666 GSI_CONTINUE_LINKING);
5667
5668 /* V assignment goes into body_bb. */
5669 if (!gimple_in_ssa_p (cfun))
5670 {
5671 gsi = gsi_start_bb (body_bb);
5672
5673 expr = build2 (plus_code, iter_type, b,
5674 fold_convert (plus_type, offset));
5675 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5676 true, GSI_SAME_STMT);
5677 ass = gimple_build_assign (v, expr);
5678 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5679
5680 if (fd->collapse > 1 || fd->tiling)
5681 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5682
5683 if (fd->tiling)
5684 {
5685 /* Determine the range of the element loop -- usually simply
5686 the tile_size, but could be smaller if the final
5687 iteration of the outer loop is a partial tile. */
5688 tree e_range = create_tmp_var (diff_type, ".e_range");
5689
5690 expr = build2 (MIN_EXPR, diff_type,
5691 build2 (MINUS_EXPR, diff_type, bound, offset),
5692 build2 (MULT_EXPR, diff_type, tile_size,
5693 element_s));
5694 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5695 true, GSI_SAME_STMT);
5696 ass = gimple_build_assign (e_range, expr);
5697 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5698
5699 /* Determine bound, offset & step of inner loop. */
5700 e_bound = create_tmp_var (diff_type, ".e_bound");
5701 e_offset = create_tmp_var (diff_type, ".e_offset");
5702 e_step = create_tmp_var (diff_type, ".e_step");
5703
5704 /* Mark these as element loops. */
5705 tree t, e_gwv = integer_minus_one_node;
5706 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5707
5708 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5709 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5710 element_s, chunk, e_gwv, chunk);
5711 gimple_call_set_lhs (call, e_offset);
5712 gimple_set_location (call, loc);
5713 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5714
5715 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5716 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5717 element_s, chunk, e_gwv, e_offset);
5718 gimple_call_set_lhs (call, e_bound);
5719 gimple_set_location (call, loc);
5720 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5721
5722 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5723 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5724 element_s, chunk, e_gwv);
5725 gimple_call_set_lhs (call, e_step);
5726 gimple_set_location (call, loc);
5727 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5728
5729 /* Add test and split block. */
5730 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5731 stmt = gimple_build_cond_empty (expr);
5732 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5733 split = split_block (body_bb, stmt);
5734 elem_body_bb = split->dest;
5735 if (cont_bb == body_bb)
5736 cont_bb = elem_body_bb;
5737 body_bb = split->src;
5738
5739 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5740
5741 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5742 if (cont_bb == NULL)
5743 {
5744 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5745 e->probability = profile_probability::even ();
5746 split->probability = profile_probability::even ();
5747 }
5748
5749 /* Initialize the user's loop vars. */
5750 gsi = gsi_start_bb (elem_body_bb);
5751 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5752 }
5753 }
5754
5755 /* Loop increment goes into cont_bb. If this is not a loop, we
5756 will have spawned threads as if it was, and each one will
5757 execute one iteration. The specification is not explicit about
5758 whether such constructs are ill-formed or not, and they can
5759 occur, especially when noreturn routines are involved. */
5760 if (cont_bb)
5761 {
5762 gsi = gsi_last_nondebug_bb (cont_bb);
5763 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5764 loc = gimple_location (cont_stmt);
5765
5766 if (fd->tiling)
5767 {
5768 /* Insert element loop increment and test. */
5769 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5770 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5771 true, GSI_SAME_STMT);
5772 ass = gimple_build_assign (e_offset, expr);
5773 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5774 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5775
5776 stmt = gimple_build_cond_empty (expr);
5777 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5778 split = split_block (cont_bb, stmt);
5779 elem_cont_bb = split->src;
5780 cont_bb = split->dest;
5781
5782 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5783 split->probability = profile_probability::unlikely ().guessed ();
5784 edge latch_edge
5785 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5786 latch_edge->probability = profile_probability::likely ().guessed ();
5787
5788 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5789 skip_edge->probability = profile_probability::unlikely ().guessed ();
5790 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5791 loop_entry_edge->probability
5792 = profile_probability::likely ().guessed ();
5793
5794 gsi = gsi_for_stmt (cont_stmt);
5795 }
5796
5797 /* Increment offset. */
5798 if (gimple_in_ssa_p (cfun))
5799 expr = build2 (plus_code, iter_type, offset,
5800 fold_convert (plus_type, step));
5801 else
5802 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5803 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5804 true, GSI_SAME_STMT);
5805 ass = gimple_build_assign (offset_incr, expr);
5806 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5807 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5808 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5809
5810 /* Remove the GIMPLE_OMP_CONTINUE. */
5811 gsi_remove (&gsi, true);
5812
5813 /* Fixup edges from cont_bb. */
5814 be = BRANCH_EDGE (cont_bb);
5815 fte = FALLTHRU_EDGE (cont_bb);
5816 be->flags |= EDGE_TRUE_VALUE;
5817 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5818
5819 if (chunking)
5820 {
5821 /* Split the beginning of exit_bb to make bottom_bb. We
5822 need to insert a nop at the start, because splitting is
5823 after a stmt, not before. */
5824 gsi = gsi_start_bb (exit_bb);
5825 stmt = gimple_build_nop ();
5826 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5827 split = split_block (exit_bb, stmt);
5828 bottom_bb = split->src;
5829 exit_bb = split->dest;
5830 gsi = gsi_last_bb (bottom_bb);
5831
5832 /* Chunk increment and test goes into bottom_bb. */
5833 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5834 build_int_cst (diff_type, 1));
5835 ass = gimple_build_assign (chunk_no, expr);
5836 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5837
5838 /* Chunk test at end of bottom_bb. */
5839 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5840 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5841 GSI_CONTINUE_LINKING);
5842
5843 /* Fixup edges from bottom_bb. */
5844 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5845 split->probability = profile_probability::unlikely ().guessed ();
5846 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5847 latch_edge->probability = profile_probability::likely ().guessed ();
5848 }
5849 }
5850
5851 gsi = gsi_last_nondebug_bb (exit_bb);
5852 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5853 loc = gimple_location (gsi_stmt (gsi));
5854
5855 if (!gimple_in_ssa_p (cfun))
5856 {
5857 /* Insert the final value of V, in case it is live. This is the
5858 value for the only thread that survives past the join. */
5859 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5860 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5861 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5862 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5863 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5864 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5865 true, GSI_SAME_STMT);
5866 ass = gimple_build_assign (v, expr);
5867 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5868 }
5869
5870 /* Remove the OMP_RETURN. */
5871 gsi_remove (&gsi, true);
5872
5873 if (cont_bb)
5874 {
5875 /* We now have one, two or three nested loops. Update the loop
5876 structures. */
5877 struct loop *parent = entry_bb->loop_father;
5878 struct loop *body = body_bb->loop_father;
5879
5880 if (chunking)
5881 {
5882 struct loop *chunk_loop = alloc_loop ();
5883 chunk_loop->header = head_bb;
5884 chunk_loop->latch = bottom_bb;
5885 add_loop (chunk_loop, parent);
5886 parent = chunk_loop;
5887 }
5888 else if (parent != body)
5889 {
5890 gcc_assert (body->header == body_bb);
5891 gcc_assert (body->latch == cont_bb
5892 || single_pred (body->latch) == cont_bb);
5893 parent = NULL;
5894 }
5895
5896 if (parent)
5897 {
5898 struct loop *body_loop = alloc_loop ();
5899 body_loop->header = body_bb;
5900 body_loop->latch = cont_bb;
5901 add_loop (body_loop, parent);
5902
5903 if (fd->tiling)
5904 {
5905 /* Insert tiling's element loop. */
5906 struct loop *inner_loop = alloc_loop ();
5907 inner_loop->header = elem_body_bb;
5908 inner_loop->latch = elem_cont_bb;
5909 add_loop (inner_loop, body_loop);
5910 }
5911 }
5912 }
5913 }
5914
5915 /* Expand the OMP loop defined by REGION. */
5916
5917 static void
5918 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5919 {
5920 struct omp_for_data fd;
5921 struct omp_for_data_loop *loops;
5922
5923 loops
5924 = (struct omp_for_data_loop *)
5925 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5926 * sizeof (struct omp_for_data_loop));
5927 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5928 &fd, loops);
5929 region->sched_kind = fd.sched_kind;
5930 region->sched_modifiers = fd.sched_modifiers;
5931
5932 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5933 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5934 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5935 if (region->cont)
5936 {
5937 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5938 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5939 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5940 }
5941 else
5942 /* If there isn't a continue then this is a degerate case where
5943 the introduction of abnormal edges during lowering will prevent
5944 original loops from being detected. Fix that up. */
5945 loops_state_set (LOOPS_NEED_FIXUP);
5946
5947 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5948 expand_omp_simd (region, &fd);
5949 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5950 {
5951 gcc_assert (!inner_stmt);
5952 expand_oacc_for (region, &fd);
5953 }
5954 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5955 {
5956 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5957 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5958 else
5959 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5960 }
5961 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5962 && !fd.have_ordered)
5963 {
5964 if (fd.chunk_size == NULL)
5965 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5966 else
5967 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5968 }
5969 else
5970 {
5971 int fn_index, start_ix, next_ix;
5972 unsigned HOST_WIDE_INT sched = 0;
5973 tree sched_arg = NULL_TREE;
5974
5975 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5976 == GF_OMP_FOR_KIND_FOR);
5977 if (fd.chunk_size == NULL
5978 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5979 fd.chunk_size = integer_zero_node;
5980 switch (fd.sched_kind)
5981 {
5982 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5983 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5984 {
5985 gcc_assert (!fd.have_ordered);
5986 fn_index = 6;
5987 sched = 4;
5988 }
5989 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5990 && !fd.have_ordered)
5991 fn_index = 7;
5992 else
5993 {
5994 fn_index = 3;
5995 sched = (HOST_WIDE_INT_1U << 31);
5996 }
5997 break;
5998 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5999 case OMP_CLAUSE_SCHEDULE_GUIDED:
6000 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6001 && !fd.have_ordered)
6002 {
6003 fn_index = 3 + fd.sched_kind;
6004 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6005 break;
6006 }
6007 fn_index = fd.sched_kind;
6008 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6009 sched += (HOST_WIDE_INT_1U << 31);
6010 break;
6011 case OMP_CLAUSE_SCHEDULE_STATIC:
6012 gcc_assert (fd.have_ordered);
6013 fn_index = 0;
6014 sched = (HOST_WIDE_INT_1U << 31) + 1;
6015 break;
6016 default:
6017 gcc_unreachable ();
6018 }
6019 if (!fd.ordered)
6020 fn_index += fd.have_ordered * 8;
6021 if (fd.ordered)
6022 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6023 else
6024 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6025 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6026 if (fd.have_reductemp)
6027 {
6028 if (fd.ordered)
6029 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6030 else if (fd.have_ordered)
6031 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6032 else
6033 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6034 sched_arg = build_int_cstu (long_integer_type_node, sched);
6035 if (!fd.chunk_size)
6036 fd.chunk_size = integer_zero_node;
6037 }
6038 if (fd.iter_type == long_long_unsigned_type_node)
6039 {
6040 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6041 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6042 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6043 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6044 }
6045 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6046 (enum built_in_function) next_ix, sched_arg,
6047 inner_stmt);
6048 }
6049
6050 if (gimple_in_ssa_p (cfun))
6051 update_ssa (TODO_update_ssa_only_virtuals);
6052 }
6053
6054 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6055
6056 v = GOMP_sections_start (n);
6057 L0:
6058 switch (v)
6059 {
6060 case 0:
6061 goto L2;
6062 case 1:
6063 section 1;
6064 goto L1;
6065 case 2:
6066 ...
6067 case n:
6068 ...
6069 default:
6070 abort ();
6071 }
6072 L1:
6073 v = GOMP_sections_next ();
6074 goto L0;
6075 L2:
6076 reduction;
6077
6078 If this is a combined parallel sections, replace the call to
6079 GOMP_sections_start with call to GOMP_sections_next. */
6080
6081 static void
6082 expand_omp_sections (struct omp_region *region)
6083 {
6084 tree t, u, vin = NULL, vmain, vnext, l2;
6085 unsigned len;
6086 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6087 gimple_stmt_iterator si, switch_si;
6088 gomp_sections *sections_stmt;
6089 gimple *stmt;
6090 gomp_continue *cont;
6091 edge_iterator ei;
6092 edge e;
6093 struct omp_region *inner;
6094 unsigned i, casei;
6095 bool exit_reachable = region->cont != NULL;
6096
6097 gcc_assert (region->exit != NULL);
6098 entry_bb = region->entry;
6099 l0_bb = single_succ (entry_bb);
6100 l1_bb = region->cont;
6101 l2_bb = region->exit;
6102 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6103 l2 = gimple_block_label (l2_bb);
6104 else
6105 {
6106 /* This can happen if there are reductions. */
6107 len = EDGE_COUNT (l0_bb->succs);
6108 gcc_assert (len > 0);
6109 e = EDGE_SUCC (l0_bb, len - 1);
6110 si = gsi_last_nondebug_bb (e->dest);
6111 l2 = NULL_TREE;
6112 if (gsi_end_p (si)
6113 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6114 l2 = gimple_block_label (e->dest);
6115 else
6116 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6117 {
6118 si = gsi_last_nondebug_bb (e->dest);
6119 if (gsi_end_p (si)
6120 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6121 {
6122 l2 = gimple_block_label (e->dest);
6123 break;
6124 }
6125 }
6126 }
6127 if (exit_reachable)
6128 default_bb = create_empty_bb (l1_bb->prev_bb);
6129 else
6130 default_bb = create_empty_bb (l0_bb);
6131
6132 /* We will build a switch() with enough cases for all the
6133 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6134 and a default case to abort if something goes wrong. */
6135 len = EDGE_COUNT (l0_bb->succs);
6136
6137 /* Use vec::quick_push on label_vec throughout, since we know the size
6138 in advance. */
6139 auto_vec<tree> label_vec (len);
6140
6141 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6142 GIMPLE_OMP_SECTIONS statement. */
6143 si = gsi_last_nondebug_bb (entry_bb);
6144 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6145 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6146 vin = gimple_omp_sections_control (sections_stmt);
6147 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6148 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6149 if (reductmp)
6150 {
6151 tree reductions = OMP_CLAUSE_DECL (reductmp);
6152 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6153 gimple *g = SSA_NAME_DEF_STMT (reductions);
6154 reductions = gimple_assign_rhs1 (g);
6155 OMP_CLAUSE_DECL (reductmp) = reductions;
6156 gimple_stmt_iterator gsi = gsi_for_stmt (g);
6157 t = build_int_cst (unsigned_type_node, len - 1);
6158 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6159 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6160 gimple_call_set_lhs (stmt, vin);
6161 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6162 gsi_remove (&gsi, true);
6163 release_ssa_name (gimple_assign_lhs (g));
6164 }
6165 else if (!is_combined_parallel (region))
6166 {
6167 /* If we are not inside a combined parallel+sections region,
6168 call GOMP_sections_start. */
6169 t = build_int_cst (unsigned_type_node, len - 1);
6170 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6171 stmt = gimple_build_call (u, 1, t);
6172 }
6173 else
6174 {
6175 /* Otherwise, call GOMP_sections_next. */
6176 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6177 stmt = gimple_build_call (u, 0);
6178 }
6179 if (!reductmp)
6180 {
6181 gimple_call_set_lhs (stmt, vin);
6182 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6183 }
6184 gsi_remove (&si, true);
6185
6186 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6187 L0_BB. */
6188 switch_si = gsi_last_nondebug_bb (l0_bb);
6189 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6190 if (exit_reachable)
6191 {
6192 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6193 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6194 vmain = gimple_omp_continue_control_use (cont);
6195 vnext = gimple_omp_continue_control_def (cont);
6196 }
6197 else
6198 {
6199 vmain = vin;
6200 vnext = NULL_TREE;
6201 }
6202
6203 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6204 label_vec.quick_push (t);
6205 i = 1;
6206
6207 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6208 for (inner = region->inner, casei = 1;
6209 inner;
6210 inner = inner->next, i++, casei++)
6211 {
6212 basic_block s_entry_bb, s_exit_bb;
6213
6214 /* Skip optional reduction region. */
6215 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6216 {
6217 --i;
6218 --casei;
6219 continue;
6220 }
6221
6222 s_entry_bb = inner->entry;
6223 s_exit_bb = inner->exit;
6224
6225 t = gimple_block_label (s_entry_bb);
6226 u = build_int_cst (unsigned_type_node, casei);
6227 u = build_case_label (u, NULL, t);
6228 label_vec.quick_push (u);
6229
6230 si = gsi_last_nondebug_bb (s_entry_bb);
6231 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6232 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6233 gsi_remove (&si, true);
6234 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6235
6236 if (s_exit_bb == NULL)
6237 continue;
6238
6239 si = gsi_last_nondebug_bb (s_exit_bb);
6240 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6241 gsi_remove (&si, true);
6242
6243 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6244 }
6245
6246 /* Error handling code goes in DEFAULT_BB. */
6247 t = gimple_block_label (default_bb);
6248 u = build_case_label (NULL, NULL, t);
6249 make_edge (l0_bb, default_bb, 0);
6250 add_bb_to_loop (default_bb, current_loops->tree_root);
6251
6252 stmt = gimple_build_switch (vmain, u, label_vec);
6253 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6254 gsi_remove (&switch_si, true);
6255
6256 si = gsi_start_bb (default_bb);
6257 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6258 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6259
6260 if (exit_reachable)
6261 {
6262 tree bfn_decl;
6263
6264 /* Code to get the next section goes in L1_BB. */
6265 si = gsi_last_nondebug_bb (l1_bb);
6266 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6267
6268 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6269 stmt = gimple_build_call (bfn_decl, 0);
6270 gimple_call_set_lhs (stmt, vnext);
6271 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6272 gsi_remove (&si, true);
6273
6274 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6275 }
6276
6277 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6278 si = gsi_last_nondebug_bb (l2_bb);
6279 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6280 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6281 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6282 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6283 else
6284 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6285 stmt = gimple_build_call (t, 0);
6286 if (gimple_omp_return_lhs (gsi_stmt (si)))
6287 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6288 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6289 gsi_remove (&si, true);
6290
6291 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6292 }
6293
6294 /* Expand code for an OpenMP single directive. We've already expanded
6295 much of the code, here we simply place the GOMP_barrier call. */
6296
6297 static void
6298 expand_omp_single (struct omp_region *region)
6299 {
6300 basic_block entry_bb, exit_bb;
6301 gimple_stmt_iterator si;
6302
6303 entry_bb = region->entry;
6304 exit_bb = region->exit;
6305
6306 si = gsi_last_nondebug_bb (entry_bb);
6307 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6308 gsi_remove (&si, true);
6309 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6310
6311 si = gsi_last_nondebug_bb (exit_bb);
6312 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6313 {
6314 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6315 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6316 }
6317 gsi_remove (&si, true);
6318 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6319 }
6320
6321 /* Generic expansion for OpenMP synchronization directives: master,
6322 ordered and critical. All we need to do here is remove the entry
6323 and exit markers for REGION. */
6324
6325 static void
6326 expand_omp_synch (struct omp_region *region)
6327 {
6328 basic_block entry_bb, exit_bb;
6329 gimple_stmt_iterator si;
6330
6331 entry_bb = region->entry;
6332 exit_bb = region->exit;
6333
6334 si = gsi_last_nondebug_bb (entry_bb);
6335 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6336 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6337 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6338 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6339 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6340 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6341 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6342 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6343 {
6344 expand_omp_taskreg (region);
6345 return;
6346 }
6347 gsi_remove (&si, true);
6348 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6349
6350 if (exit_bb)
6351 {
6352 si = gsi_last_nondebug_bb (exit_bb);
6353 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6354 gsi_remove (&si, true);
6355 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6356 }
6357 }
6358
6359 /* Translate enum omp_memory_order to enum memmodel. The two enums
6360 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6361 is 0. */
6362
6363 static enum memmodel
6364 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6365 {
6366 switch (mo)
6367 {
6368 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6369 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6370 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6371 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6372 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6373 default: gcc_unreachable ();
6374 }
6375 }
6376
6377 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6378 operation as a normal volatile load. */
6379
6380 static bool
6381 expand_omp_atomic_load (basic_block load_bb, tree addr,
6382 tree loaded_val, int index)
6383 {
6384 enum built_in_function tmpbase;
6385 gimple_stmt_iterator gsi;
6386 basic_block store_bb;
6387 location_t loc;
6388 gimple *stmt;
6389 tree decl, call, type, itype;
6390
6391 gsi = gsi_last_nondebug_bb (load_bb);
6392 stmt = gsi_stmt (gsi);
6393 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6394 loc = gimple_location (stmt);
6395
6396 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6397 is smaller than word size, then expand_atomic_load assumes that the load
6398 is atomic. We could avoid the builtin entirely in this case. */
6399
6400 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6401 decl = builtin_decl_explicit (tmpbase);
6402 if (decl == NULL_TREE)
6403 return false;
6404
6405 type = TREE_TYPE (loaded_val);
6406 itype = TREE_TYPE (TREE_TYPE (decl));
6407
6408 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6409 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6410 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6411 if (!useless_type_conversion_p (type, itype))
6412 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6413 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6414
6415 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6416 gsi_remove (&gsi, true);
6417
6418 store_bb = single_succ (load_bb);
6419 gsi = gsi_last_nondebug_bb (store_bb);
6420 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6421 gsi_remove (&gsi, true);
6422
6423 if (gimple_in_ssa_p (cfun))
6424 update_ssa (TODO_update_ssa_no_phi);
6425
6426 return true;
6427 }
6428
6429 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6430 operation as a normal volatile store. */
6431
6432 static bool
6433 expand_omp_atomic_store (basic_block load_bb, tree addr,
6434 tree loaded_val, tree stored_val, int index)
6435 {
6436 enum built_in_function tmpbase;
6437 gimple_stmt_iterator gsi;
6438 basic_block store_bb = single_succ (load_bb);
6439 location_t loc;
6440 gimple *stmt;
6441 tree decl, call, type, itype;
6442 machine_mode imode;
6443 bool exchange;
6444
6445 gsi = gsi_last_nondebug_bb (load_bb);
6446 stmt = gsi_stmt (gsi);
6447 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6448
6449 /* If the load value is needed, then this isn't a store but an exchange. */
6450 exchange = gimple_omp_atomic_need_value_p (stmt);
6451
6452 gsi = gsi_last_nondebug_bb (store_bb);
6453 stmt = gsi_stmt (gsi);
6454 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6455 loc = gimple_location (stmt);
6456
6457 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6458 is smaller than word size, then expand_atomic_store assumes that the store
6459 is atomic. We could avoid the builtin entirely in this case. */
6460
6461 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6462 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6463 decl = builtin_decl_explicit (tmpbase);
6464 if (decl == NULL_TREE)
6465 return false;
6466
6467 type = TREE_TYPE (stored_val);
6468
6469 /* Dig out the type of the function's second argument. */
6470 itype = TREE_TYPE (decl);
6471 itype = TYPE_ARG_TYPES (itype);
6472 itype = TREE_CHAIN (itype);
6473 itype = TREE_VALUE (itype);
6474 imode = TYPE_MODE (itype);
6475
6476 if (exchange && !can_atomic_exchange_p (imode, true))
6477 return false;
6478
6479 if (!useless_type_conversion_p (itype, type))
6480 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6481 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6482 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6483 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6484 if (exchange)
6485 {
6486 if (!useless_type_conversion_p (type, itype))
6487 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6488 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6489 }
6490
6491 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6492 gsi_remove (&gsi, true);
6493
6494 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6495 gsi = gsi_last_nondebug_bb (load_bb);
6496 gsi_remove (&gsi, true);
6497
6498 if (gimple_in_ssa_p (cfun))
6499 update_ssa (TODO_update_ssa_no_phi);
6500
6501 return true;
6502 }
6503
6504 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6505 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6506 size of the data type, and thus usable to find the index of the builtin
6507 decl. Returns false if the expression is not of the proper form. */
6508
6509 static bool
6510 expand_omp_atomic_fetch_op (basic_block load_bb,
6511 tree addr, tree loaded_val,
6512 tree stored_val, int index)
6513 {
6514 enum built_in_function oldbase, newbase, tmpbase;
6515 tree decl, itype, call;
6516 tree lhs, rhs;
6517 basic_block store_bb = single_succ (load_bb);
6518 gimple_stmt_iterator gsi;
6519 gimple *stmt;
6520 location_t loc;
6521 enum tree_code code;
6522 bool need_old, need_new;
6523 machine_mode imode;
6524
6525 /* We expect to find the following sequences:
6526
6527 load_bb:
6528 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6529
6530 store_bb:
6531 val = tmp OP something; (or: something OP tmp)
6532 GIMPLE_OMP_STORE (val)
6533
6534 ???FIXME: Allow a more flexible sequence.
6535 Perhaps use data flow to pick the statements.
6536
6537 */
6538
6539 gsi = gsi_after_labels (store_bb);
6540 stmt = gsi_stmt (gsi);
6541 if (is_gimple_debug (stmt))
6542 {
6543 gsi_next_nondebug (&gsi);
6544 if (gsi_end_p (gsi))
6545 return false;
6546 stmt = gsi_stmt (gsi);
6547 }
6548 loc = gimple_location (stmt);
6549 if (!is_gimple_assign (stmt))
6550 return false;
6551 gsi_next_nondebug (&gsi);
6552 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6553 return false;
6554 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6555 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6556 enum omp_memory_order omo
6557 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6558 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6559 gcc_checking_assert (!need_old || !need_new);
6560
6561 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6562 return false;
6563
6564 /* Check for one of the supported fetch-op operations. */
6565 code = gimple_assign_rhs_code (stmt);
6566 switch (code)
6567 {
6568 case PLUS_EXPR:
6569 case POINTER_PLUS_EXPR:
6570 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6571 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6572 break;
6573 case MINUS_EXPR:
6574 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6575 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6576 break;
6577 case BIT_AND_EXPR:
6578 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6579 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6580 break;
6581 case BIT_IOR_EXPR:
6582 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6583 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6584 break;
6585 case BIT_XOR_EXPR:
6586 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6587 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6588 break;
6589 default:
6590 return false;
6591 }
6592
6593 /* Make sure the expression is of the proper form. */
6594 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6595 rhs = gimple_assign_rhs2 (stmt);
6596 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6597 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6598 rhs = gimple_assign_rhs1 (stmt);
6599 else
6600 return false;
6601
6602 tmpbase = ((enum built_in_function)
6603 ((need_new ? newbase : oldbase) + index + 1));
6604 decl = builtin_decl_explicit (tmpbase);
6605 if (decl == NULL_TREE)
6606 return false;
6607 itype = TREE_TYPE (TREE_TYPE (decl));
6608 imode = TYPE_MODE (itype);
6609
6610 /* We could test all of the various optabs involved, but the fact of the
6611 matter is that (with the exception of i486 vs i586 and xadd) all targets
6612 that support any atomic operaton optab also implements compare-and-swap.
6613 Let optabs.c take care of expanding any compare-and-swap loop. */
6614 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6615 return false;
6616
6617 gsi = gsi_last_nondebug_bb (load_bb);
6618 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6619
6620 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6621 It only requires that the operation happen atomically. Thus we can
6622 use the RELAXED memory model. */
6623 call = build_call_expr_loc (loc, decl, 3, addr,
6624 fold_convert_loc (loc, itype, rhs),
6625 build_int_cst (NULL, mo));
6626
6627 if (need_old || need_new)
6628 {
6629 lhs = need_old ? loaded_val : stored_val;
6630 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6631 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6632 }
6633 else
6634 call = fold_convert_loc (loc, void_type_node, call);
6635 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6636 gsi_remove (&gsi, true);
6637
6638 gsi = gsi_last_nondebug_bb (store_bb);
6639 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6640 gsi_remove (&gsi, true);
6641 gsi = gsi_last_nondebug_bb (store_bb);
6642 stmt = gsi_stmt (gsi);
6643 gsi_remove (&gsi, true);
6644
6645 if (gimple_in_ssa_p (cfun))
6646 {
6647 release_defs (stmt);
6648 update_ssa (TODO_update_ssa_no_phi);
6649 }
6650
6651 return true;
6652 }
6653
6654 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6655
6656 oldval = *addr;
6657 repeat:
6658 newval = rhs; // with oldval replacing *addr in rhs
6659 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6660 if (oldval != newval)
6661 goto repeat;
6662
6663 INDEX is log2 of the size of the data type, and thus usable to find the
6664 index of the builtin decl. */
6665
6666 static bool
6667 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6668 tree addr, tree loaded_val, tree stored_val,
6669 int index)
6670 {
6671 tree loadedi, storedi, initial, new_storedi, old_vali;
6672 tree type, itype, cmpxchg, iaddr, atype;
6673 gimple_stmt_iterator si;
6674 basic_block loop_header = single_succ (load_bb);
6675 gimple *phi, *stmt;
6676 edge e;
6677 enum built_in_function fncode;
6678
6679 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6680 order to use the RELAXED memory model effectively. */
6681 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6682 + index + 1);
6683 cmpxchg = builtin_decl_explicit (fncode);
6684 if (cmpxchg == NULL_TREE)
6685 return false;
6686 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6687 atype = type;
6688 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6689
6690 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6691 || !can_atomic_load_p (TYPE_MODE (itype)))
6692 return false;
6693
6694 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6695 si = gsi_last_nondebug_bb (load_bb);
6696 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6697
6698 /* For floating-point values, we'll need to view-convert them to integers
6699 so that we can perform the atomic compare and swap. Simplify the
6700 following code by always setting up the "i"ntegral variables. */
6701 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6702 {
6703 tree iaddr_val;
6704
6705 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6706 true));
6707 atype = itype;
6708 iaddr_val
6709 = force_gimple_operand_gsi (&si,
6710 fold_convert (TREE_TYPE (iaddr), addr),
6711 false, NULL_TREE, true, GSI_SAME_STMT);
6712 stmt = gimple_build_assign (iaddr, iaddr_val);
6713 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6714 loadedi = create_tmp_var (itype);
6715 if (gimple_in_ssa_p (cfun))
6716 loadedi = make_ssa_name (loadedi);
6717 }
6718 else
6719 {
6720 iaddr = addr;
6721 loadedi = loaded_val;
6722 }
6723
6724 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6725 tree loaddecl = builtin_decl_explicit (fncode);
6726 if (loaddecl)
6727 initial
6728 = fold_convert (atype,
6729 build_call_expr (loaddecl, 2, iaddr,
6730 build_int_cst (NULL_TREE,
6731 MEMMODEL_RELAXED)));
6732 else
6733 {
6734 tree off
6735 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6736 true), 0);
6737 initial = build2 (MEM_REF, atype, iaddr, off);
6738 }
6739
6740 initial
6741 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6742 GSI_SAME_STMT);
6743
6744 /* Move the value to the LOADEDI temporary. */
6745 if (gimple_in_ssa_p (cfun))
6746 {
6747 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6748 phi = create_phi_node (loadedi, loop_header);
6749 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6750 initial);
6751 }
6752 else
6753 gsi_insert_before (&si,
6754 gimple_build_assign (loadedi, initial),
6755 GSI_SAME_STMT);
6756 if (loadedi != loaded_val)
6757 {
6758 gimple_stmt_iterator gsi2;
6759 tree x;
6760
6761 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6762 gsi2 = gsi_start_bb (loop_header);
6763 if (gimple_in_ssa_p (cfun))
6764 {
6765 gassign *stmt;
6766 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6767 true, GSI_SAME_STMT);
6768 stmt = gimple_build_assign (loaded_val, x);
6769 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6770 }
6771 else
6772 {
6773 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6774 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6775 true, GSI_SAME_STMT);
6776 }
6777 }
6778 gsi_remove (&si, true);
6779
6780 si = gsi_last_nondebug_bb (store_bb);
6781 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6782
6783 if (iaddr == addr)
6784 storedi = stored_val;
6785 else
6786 storedi
6787 = force_gimple_operand_gsi (&si,
6788 build1 (VIEW_CONVERT_EXPR, itype,
6789 stored_val), true, NULL_TREE, true,
6790 GSI_SAME_STMT);
6791
6792 /* Build the compare&swap statement. */
6793 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6794 new_storedi = force_gimple_operand_gsi (&si,
6795 fold_convert (TREE_TYPE (loadedi),
6796 new_storedi),
6797 true, NULL_TREE,
6798 true, GSI_SAME_STMT);
6799
6800 if (gimple_in_ssa_p (cfun))
6801 old_vali = loadedi;
6802 else
6803 {
6804 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6805 stmt = gimple_build_assign (old_vali, loadedi);
6806 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6807
6808 stmt = gimple_build_assign (loadedi, new_storedi);
6809 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6810 }
6811
6812 /* Note that we always perform the comparison as an integer, even for
6813 floating point. This allows the atomic operation to properly
6814 succeed even with NaNs and -0.0. */
6815 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6816 stmt = gimple_build_cond_empty (ne);
6817 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6818
6819 /* Update cfg. */
6820 e = single_succ_edge (store_bb);
6821 e->flags &= ~EDGE_FALLTHRU;
6822 e->flags |= EDGE_FALSE_VALUE;
6823 /* Expect no looping. */
6824 e->probability = profile_probability::guessed_always ();
6825
6826 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6827 e->probability = profile_probability::guessed_never ();
6828
6829 /* Copy the new value to loadedi (we already did that before the condition
6830 if we are not in SSA). */
6831 if (gimple_in_ssa_p (cfun))
6832 {
6833 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6834 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6835 }
6836
6837 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6838 gsi_remove (&si, true);
6839
6840 struct loop *loop = alloc_loop ();
6841 loop->header = loop_header;
6842 loop->latch = store_bb;
6843 add_loop (loop, loop_header->loop_father);
6844
6845 if (gimple_in_ssa_p (cfun))
6846 update_ssa (TODO_update_ssa_no_phi);
6847
6848 return true;
6849 }
6850
6851 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6852
6853 GOMP_atomic_start ();
6854 *addr = rhs;
6855 GOMP_atomic_end ();
6856
6857 The result is not globally atomic, but works so long as all parallel
6858 references are within #pragma omp atomic directives. According to
6859 responses received from omp@openmp.org, appears to be within spec.
6860 Which makes sense, since that's how several other compilers handle
6861 this situation as well.
6862 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6863 expanding. STORED_VAL is the operand of the matching
6864 GIMPLE_OMP_ATOMIC_STORE.
6865
6866 We replace
6867 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6868 loaded_val = *addr;
6869
6870 and replace
6871 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6872 *addr = stored_val;
6873 */
6874
6875 static bool
6876 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6877 tree addr, tree loaded_val, tree stored_val)
6878 {
6879 gimple_stmt_iterator si;
6880 gassign *stmt;
6881 tree t;
6882
6883 si = gsi_last_nondebug_bb (load_bb);
6884 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6885
6886 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6887 t = build_call_expr (t, 0);
6888 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6889
6890 tree mem = build_simple_mem_ref (addr);
6891 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6892 TREE_OPERAND (mem, 1)
6893 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6894 true),
6895 TREE_OPERAND (mem, 1));
6896 stmt = gimple_build_assign (loaded_val, mem);
6897 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6898 gsi_remove (&si, true);
6899
6900 si = gsi_last_nondebug_bb (store_bb);
6901 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6902
6903 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6904 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6905
6906 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6907 t = build_call_expr (t, 0);
6908 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6909 gsi_remove (&si, true);
6910
6911 if (gimple_in_ssa_p (cfun))
6912 update_ssa (TODO_update_ssa_no_phi);
6913 return true;
6914 }
6915
6916 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6917 using expand_omp_atomic_fetch_op. If it failed, we try to
6918 call expand_omp_atomic_pipeline, and if it fails too, the
6919 ultimate fallback is wrapping the operation in a mutex
6920 (expand_omp_atomic_mutex). REGION is the atomic region built
6921 by build_omp_regions_1(). */
6922
6923 static void
6924 expand_omp_atomic (struct omp_region *region)
6925 {
6926 basic_block load_bb = region->entry, store_bb = region->exit;
6927 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6928 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6929 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6930 tree addr = gimple_omp_atomic_load_rhs (load);
6931 tree stored_val = gimple_omp_atomic_store_val (store);
6932 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6933 HOST_WIDE_INT index;
6934
6935 /* Make sure the type is one of the supported sizes. */
6936 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6937 index = exact_log2 (index);
6938 if (index >= 0 && index <= 4)
6939 {
6940 unsigned int align = TYPE_ALIGN_UNIT (type);
6941
6942 /* __sync builtins require strict data alignment. */
6943 if (exact_log2 (align) >= index)
6944 {
6945 /* Atomic load. */
6946 scalar_mode smode;
6947 if (loaded_val == stored_val
6948 && (is_int_mode (TYPE_MODE (type), &smode)
6949 || is_float_mode (TYPE_MODE (type), &smode))
6950 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6951 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6952 return;
6953
6954 /* Atomic store. */
6955 if ((is_int_mode (TYPE_MODE (type), &smode)
6956 || is_float_mode (TYPE_MODE (type), &smode))
6957 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6958 && store_bb == single_succ (load_bb)
6959 && first_stmt (store_bb) == store
6960 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6961 stored_val, index))
6962 return;
6963
6964 /* When possible, use specialized atomic update functions. */
6965 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6966 && store_bb == single_succ (load_bb)
6967 && expand_omp_atomic_fetch_op (load_bb, addr,
6968 loaded_val, stored_val, index))
6969 return;
6970
6971 /* If we don't have specialized __sync builtins, try and implement
6972 as a compare and swap loop. */
6973 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6974 loaded_val, stored_val, index))
6975 return;
6976 }
6977 }
6978
6979 /* The ultimate fallback is wrapping the operation in a mutex. */
6980 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6981 }
6982
6983 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6984 at REGION_EXIT. */
6985
6986 static void
6987 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6988 basic_block region_exit)
6989 {
6990 struct loop *outer = region_entry->loop_father;
6991 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6992
6993 /* Don't parallelize the kernels region if it contains more than one outer
6994 loop. */
6995 unsigned int nr_outer_loops = 0;
6996 struct loop *single_outer = NULL;
6997 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6998 {
6999 gcc_assert (loop_outer (loop) == outer);
7000
7001 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7002 continue;
7003
7004 if (region_exit != NULL
7005 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7006 continue;
7007
7008 nr_outer_loops++;
7009 single_outer = loop;
7010 }
7011 if (nr_outer_loops != 1)
7012 return;
7013
7014 for (struct loop *loop = single_outer->inner;
7015 loop != NULL;
7016 loop = loop->inner)
7017 if (loop->next)
7018 return;
7019
7020 /* Mark the loops in the region. */
7021 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7022 loop->in_oacc_kernels_region = true;
7023 }
7024
7025 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7026
7027 struct GTY(()) grid_launch_attributes_trees
7028 {
7029 tree kernel_dim_array_type;
7030 tree kernel_lattrs_dimnum_decl;
7031 tree kernel_lattrs_grid_decl;
7032 tree kernel_lattrs_group_decl;
7033 tree kernel_launch_attributes_type;
7034 };
7035
7036 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7037
7038 /* Create types used to pass kernel launch attributes to target. */
7039
7040 static void
7041 grid_create_kernel_launch_attr_types (void)
7042 {
7043 if (grid_attr_trees)
7044 return;
7045 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7046
7047 tree dim_arr_index_type
7048 = build_index_type (build_int_cst (integer_type_node, 2));
7049 grid_attr_trees->kernel_dim_array_type
7050 = build_array_type (uint32_type_node, dim_arr_index_type);
7051
7052 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7053 grid_attr_trees->kernel_lattrs_dimnum_decl
7054 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7055 uint32_type_node);
7056 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7057
7058 grid_attr_trees->kernel_lattrs_grid_decl
7059 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7060 grid_attr_trees->kernel_dim_array_type);
7061 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7062 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7063 grid_attr_trees->kernel_lattrs_group_decl
7064 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7065 grid_attr_trees->kernel_dim_array_type);
7066 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7067 = grid_attr_trees->kernel_lattrs_grid_decl;
7068 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7069 "__gomp_kernel_launch_attributes",
7070 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7071 }
7072
7073 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7074 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7075 of type uint32_type_node. */
7076
7077 static void
7078 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7079 tree fld_decl, int index, tree value)
7080 {
7081 tree ref = build4 (ARRAY_REF, uint32_type_node,
7082 build3 (COMPONENT_REF,
7083 grid_attr_trees->kernel_dim_array_type,
7084 range_var, fld_decl, NULL_TREE),
7085 build_int_cst (integer_type_node, index),
7086 NULL_TREE, NULL_TREE);
7087 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7088 }
7089
7090 /* Return a tree representation of a pointer to a structure with grid and
7091 work-group size information. Statements filling that information will be
7092 inserted before GSI, TGT_STMT is the target statement which has the
7093 necessary information in it. */
7094
7095 static tree
7096 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7097 gomp_target *tgt_stmt)
7098 {
7099 grid_create_kernel_launch_attr_types ();
7100 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7101 "__kernel_launch_attrs");
7102
7103 unsigned max_dim = 0;
7104 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7105 clause;
7106 clause = OMP_CLAUSE_CHAIN (clause))
7107 {
7108 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7109 continue;
7110
7111 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7112 max_dim = MAX (dim, max_dim);
7113
7114 grid_insert_store_range_dim (gsi, lattrs,
7115 grid_attr_trees->kernel_lattrs_grid_decl,
7116 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7117 grid_insert_store_range_dim (gsi, lattrs,
7118 grid_attr_trees->kernel_lattrs_group_decl,
7119 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7120 }
7121
7122 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7123 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7124 gcc_checking_assert (max_dim <= 2);
7125 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7126 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7127 GSI_SAME_STMT);
7128 TREE_ADDRESSABLE (lattrs) = 1;
7129 return build_fold_addr_expr (lattrs);
7130 }
7131
7132 /* Build target argument identifier from the DEVICE identifier, value
7133 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7134
7135 static tree
7136 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7137 {
7138 tree t = build_int_cst (integer_type_node, device);
7139 if (subseqent_param)
7140 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7141 build_int_cst (integer_type_node,
7142 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7143 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7144 build_int_cst (integer_type_node, id));
7145 return t;
7146 }
7147
7148 /* Like above but return it in type that can be directly stored as an element
7149 of the argument array. */
7150
7151 static tree
7152 get_target_argument_identifier (int device, bool subseqent_param, int id)
7153 {
7154 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7155 return fold_convert (ptr_type_node, t);
7156 }
7157
7158 /* Return a target argument consisting of DEVICE identifier, value identifier
7159 ID, and the actual VALUE. */
7160
7161 static tree
7162 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7163 tree value)
7164 {
7165 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7166 fold_convert (integer_type_node, value),
7167 build_int_cst (unsigned_type_node,
7168 GOMP_TARGET_ARG_VALUE_SHIFT));
7169 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7170 get_target_argument_identifier_1 (device, false, id));
7171 t = fold_convert (ptr_type_node, t);
7172 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7173 }
7174
7175 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7176 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7177 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7178 arguments. */
7179
7180 static void
7181 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7182 int id, tree value, vec <tree> *args)
7183 {
7184 if (tree_fits_shwi_p (value)
7185 && tree_to_shwi (value) > -(1 << 15)
7186 && tree_to_shwi (value) < (1 << 15))
7187 args->quick_push (get_target_argument_value (gsi, device, id, value));
7188 else
7189 {
7190 args->quick_push (get_target_argument_identifier (device, true, id));
7191 value = fold_convert (ptr_type_node, value);
7192 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7193 GSI_SAME_STMT);
7194 args->quick_push (value);
7195 }
7196 }
7197
7198 /* Create an array of arguments that is then passed to GOMP_target. */
7199
7200 static tree
7201 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7202 {
7203 auto_vec <tree, 6> args;
7204 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7205 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7206 if (c)
7207 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7208 else
7209 t = integer_minus_one_node;
7210 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7211 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7212
7213 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7214 if (c)
7215 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7216 else
7217 t = integer_minus_one_node;
7218 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7219 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7220 &args);
7221
7222 /* Add HSA-specific grid sizes, if available. */
7223 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7224 OMP_CLAUSE__GRIDDIM_))
7225 {
7226 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7227 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7228 args.quick_push (t);
7229 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7230 }
7231
7232 /* Produce more, perhaps device specific, arguments here. */
7233
7234 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7235 args.length () + 1),
7236 ".omp_target_args");
7237 for (unsigned i = 0; i < args.length (); i++)
7238 {
7239 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7240 build_int_cst (integer_type_node, i),
7241 NULL_TREE, NULL_TREE);
7242 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7243 GSI_SAME_STMT);
7244 }
7245 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7246 build_int_cst (integer_type_node, args.length ()),
7247 NULL_TREE, NULL_TREE);
7248 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7249 GSI_SAME_STMT);
7250 TREE_ADDRESSABLE (argarray) = 1;
7251 return build_fold_addr_expr (argarray);
7252 }
7253
7254 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7255
7256 static void
7257 expand_omp_target (struct omp_region *region)
7258 {
7259 basic_block entry_bb, exit_bb, new_bb;
7260 struct function *child_cfun;
7261 tree child_fn, block, t;
7262 gimple_stmt_iterator gsi;
7263 gomp_target *entry_stmt;
7264 gimple *stmt;
7265 edge e;
7266 bool offloaded, data_region;
7267
7268 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7269 new_bb = region->entry;
7270
7271 offloaded = is_gimple_omp_offloaded (entry_stmt);
7272 switch (gimple_omp_target_kind (entry_stmt))
7273 {
7274 case GF_OMP_TARGET_KIND_REGION:
7275 case GF_OMP_TARGET_KIND_UPDATE:
7276 case GF_OMP_TARGET_KIND_ENTER_DATA:
7277 case GF_OMP_TARGET_KIND_EXIT_DATA:
7278 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7279 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7280 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7281 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7282 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7283 data_region = false;
7284 break;
7285 case GF_OMP_TARGET_KIND_DATA:
7286 case GF_OMP_TARGET_KIND_OACC_DATA:
7287 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7288 data_region = true;
7289 break;
7290 default:
7291 gcc_unreachable ();
7292 }
7293
7294 child_fn = NULL_TREE;
7295 child_cfun = NULL;
7296 if (offloaded)
7297 {
7298 child_fn = gimple_omp_target_child_fn (entry_stmt);
7299 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7300 }
7301
7302 /* Supported by expand_omp_taskreg, but not here. */
7303 if (child_cfun != NULL)
7304 gcc_checking_assert (!child_cfun->cfg);
7305 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7306
7307 entry_bb = region->entry;
7308 exit_bb = region->exit;
7309
7310 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7311 {
7312 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7313
7314 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7315 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7316 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7317 DECL_ATTRIBUTES (child_fn)
7318 = tree_cons (get_identifier ("oacc kernels"),
7319 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7320 }
7321
7322 if (offloaded)
7323 {
7324 unsigned srcidx, dstidx, num;
7325
7326 /* If the offloading region needs data sent from the parent
7327 function, then the very first statement (except possible
7328 tree profile counter updates) of the offloading body
7329 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7330 &.OMP_DATA_O is passed as an argument to the child function,
7331 we need to replace it with the argument as seen by the child
7332 function.
7333
7334 In most cases, this will end up being the identity assignment
7335 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7336 a function call that has been inlined, the original PARM_DECL
7337 .OMP_DATA_I may have been converted into a different local
7338 variable. In which case, we need to keep the assignment. */
7339 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7340 if (data_arg)
7341 {
7342 basic_block entry_succ_bb = single_succ (entry_bb);
7343 gimple_stmt_iterator gsi;
7344 tree arg;
7345 gimple *tgtcopy_stmt = NULL;
7346 tree sender = TREE_VEC_ELT (data_arg, 0);
7347
7348 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7349 {
7350 gcc_assert (!gsi_end_p (gsi));
7351 stmt = gsi_stmt (gsi);
7352 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7353 continue;
7354
7355 if (gimple_num_ops (stmt) == 2)
7356 {
7357 tree arg = gimple_assign_rhs1 (stmt);
7358
7359 /* We're ignoring the subcode because we're
7360 effectively doing a STRIP_NOPS. */
7361
7362 if (TREE_CODE (arg) == ADDR_EXPR
7363 && TREE_OPERAND (arg, 0) == sender)
7364 {
7365 tgtcopy_stmt = stmt;
7366 break;
7367 }
7368 }
7369 }
7370
7371 gcc_assert (tgtcopy_stmt != NULL);
7372 arg = DECL_ARGUMENTS (child_fn);
7373
7374 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7375 gsi_remove (&gsi, true);
7376 }
7377
7378 /* Declare local variables needed in CHILD_CFUN. */
7379 block = DECL_INITIAL (child_fn);
7380 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7381 /* The gimplifier could record temporaries in the offloading block
7382 rather than in containing function's local_decls chain,
7383 which would mean cgraph missed finalizing them. Do it now. */
7384 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7385 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7386 varpool_node::finalize_decl (t);
7387 DECL_SAVED_TREE (child_fn) = NULL;
7388 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7389 gimple_set_body (child_fn, NULL);
7390 TREE_USED (block) = 1;
7391
7392 /* Reset DECL_CONTEXT on function arguments. */
7393 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7394 DECL_CONTEXT (t) = child_fn;
7395
7396 /* Split ENTRY_BB at GIMPLE_*,
7397 so that it can be moved to the child function. */
7398 gsi = gsi_last_nondebug_bb (entry_bb);
7399 stmt = gsi_stmt (gsi);
7400 gcc_assert (stmt
7401 && gimple_code (stmt) == gimple_code (entry_stmt));
7402 e = split_block (entry_bb, stmt);
7403 gsi_remove (&gsi, true);
7404 entry_bb = e->dest;
7405 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7406
7407 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7408 if (exit_bb)
7409 {
7410 gsi = gsi_last_nondebug_bb (exit_bb);
7411 gcc_assert (!gsi_end_p (gsi)
7412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7413 stmt = gimple_build_return (NULL);
7414 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7415 gsi_remove (&gsi, true);
7416 }
7417
7418 /* Move the offloading region into CHILD_CFUN. */
7419
7420 block = gimple_block (entry_stmt);
7421
7422 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7423 if (exit_bb)
7424 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7425 /* When the OMP expansion process cannot guarantee an up-to-date
7426 loop tree arrange for the child function to fixup loops. */
7427 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7428 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7429
7430 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7431 num = vec_safe_length (child_cfun->local_decls);
7432 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7433 {
7434 t = (*child_cfun->local_decls)[srcidx];
7435 if (DECL_CONTEXT (t) == cfun->decl)
7436 continue;
7437 if (srcidx != dstidx)
7438 (*child_cfun->local_decls)[dstidx] = t;
7439 dstidx++;
7440 }
7441 if (dstidx != num)
7442 vec_safe_truncate (child_cfun->local_decls, dstidx);
7443
7444 /* Inform the callgraph about the new function. */
7445 child_cfun->curr_properties = cfun->curr_properties;
7446 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7447 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7448 cgraph_node *node = cgraph_node::get_create (child_fn);
7449 node->parallelized_function = 1;
7450 cgraph_node::add_new_function (child_fn, true);
7451
7452 /* Add the new function to the offload table. */
7453 if (ENABLE_OFFLOADING)
7454 {
7455 if (in_lto_p)
7456 DECL_PRESERVE_P (child_fn) = 1;
7457 vec_safe_push (offload_funcs, child_fn);
7458 }
7459
7460 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7461 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7462
7463 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7464 fixed in a following pass. */
7465 push_cfun (child_cfun);
7466 if (need_asm)
7467 assign_assembler_name_if_needed (child_fn);
7468 cgraph_edge::rebuild_edges ();
7469
7470 /* Some EH regions might become dead, see PR34608. If
7471 pass_cleanup_cfg isn't the first pass to happen with the
7472 new child, these dead EH edges might cause problems.
7473 Clean them up now. */
7474 if (flag_exceptions)
7475 {
7476 basic_block bb;
7477 bool changed = false;
7478
7479 FOR_EACH_BB_FN (bb, cfun)
7480 changed |= gimple_purge_dead_eh_edges (bb);
7481 if (changed)
7482 cleanup_tree_cfg ();
7483 }
7484 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7485 verify_loop_structure ();
7486 pop_cfun ();
7487
7488 if (dump_file && !gimple_in_ssa_p (cfun))
7489 {
7490 omp_any_child_fn_dumped = true;
7491 dump_function_header (dump_file, child_fn, dump_flags);
7492 dump_function_to_file (child_fn, dump_file, dump_flags);
7493 }
7494
7495 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7496 }
7497
7498 /* Emit a library call to launch the offloading region, or do data
7499 transfers. */
7500 tree t1, t2, t3, t4, depend, c, clauses;
7501 enum built_in_function start_ix;
7502 unsigned int flags_i = 0;
7503
7504 switch (gimple_omp_target_kind (entry_stmt))
7505 {
7506 case GF_OMP_TARGET_KIND_REGION:
7507 start_ix = BUILT_IN_GOMP_TARGET;
7508 break;
7509 case GF_OMP_TARGET_KIND_DATA:
7510 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7511 break;
7512 case GF_OMP_TARGET_KIND_UPDATE:
7513 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7514 break;
7515 case GF_OMP_TARGET_KIND_ENTER_DATA:
7516 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7517 break;
7518 case GF_OMP_TARGET_KIND_EXIT_DATA:
7519 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7520 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7521 break;
7522 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7523 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7524 start_ix = BUILT_IN_GOACC_PARALLEL;
7525 break;
7526 case GF_OMP_TARGET_KIND_OACC_DATA:
7527 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7528 start_ix = BUILT_IN_GOACC_DATA_START;
7529 break;
7530 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7531 start_ix = BUILT_IN_GOACC_UPDATE;
7532 break;
7533 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7534 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7535 break;
7536 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7537 start_ix = BUILT_IN_GOACC_DECLARE;
7538 break;
7539 default:
7540 gcc_unreachable ();
7541 }
7542
7543 clauses = gimple_omp_target_clauses (entry_stmt);
7544
7545 tree device = NULL_TREE;
7546 location_t device_loc = UNKNOWN_LOCATION;
7547 tree goacc_flags = NULL_TREE;
7548 if (is_gimple_omp_oacc (entry_stmt))
7549 {
7550 /* By default, no GOACC_FLAGs are set. */
7551 goacc_flags = integer_zero_node;
7552 }
7553 else
7554 {
7555 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7556 if (c)
7557 {
7558 device = OMP_CLAUSE_DEVICE_ID (c);
7559 device_loc = OMP_CLAUSE_LOCATION (c);
7560 }
7561 else
7562 {
7563 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7564 library choose). */
7565 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7566 device_loc = gimple_location (entry_stmt);
7567 }
7568
7569 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7570 if (c)
7571 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7572 }
7573
7574 /* By default, there is no conditional. */
7575 tree cond = NULL_TREE;
7576 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7577 if (c)
7578 cond = OMP_CLAUSE_IF_EXPR (c);
7579 /* If we found the clause 'if (cond)', build:
7580 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7581 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7582 if (cond)
7583 {
7584 tree *tp;
7585 if (is_gimple_omp_oacc (entry_stmt))
7586 tp = &goacc_flags;
7587 else
7588 {
7589 /* Ensure 'device' is of the correct type. */
7590 device = fold_convert_loc (device_loc, integer_type_node, device);
7591
7592 tp = &device;
7593 }
7594
7595 cond = gimple_boolify (cond);
7596
7597 basic_block cond_bb, then_bb, else_bb;
7598 edge e;
7599 tree tmp_var;
7600
7601 tmp_var = create_tmp_var (TREE_TYPE (*tp));
7602 if (offloaded)
7603 e = split_block_after_labels (new_bb);
7604 else
7605 {
7606 gsi = gsi_last_nondebug_bb (new_bb);
7607 gsi_prev (&gsi);
7608 e = split_block (new_bb, gsi_stmt (gsi));
7609 }
7610 cond_bb = e->src;
7611 new_bb = e->dest;
7612 remove_edge (e);
7613
7614 then_bb = create_empty_bb (cond_bb);
7615 else_bb = create_empty_bb (then_bb);
7616 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7617 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7618
7619 stmt = gimple_build_cond_empty (cond);
7620 gsi = gsi_last_bb (cond_bb);
7621 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7622
7623 gsi = gsi_start_bb (then_bb);
7624 stmt = gimple_build_assign (tmp_var, *tp);
7625 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7626
7627 gsi = gsi_start_bb (else_bb);
7628 if (is_gimple_omp_oacc (entry_stmt))
7629 stmt = gimple_build_assign (tmp_var,
7630 BIT_IOR_EXPR,
7631 *tp,
7632 build_int_cst (integer_type_node,
7633 GOACC_FLAG_HOST_FALLBACK));
7634 else
7635 stmt = gimple_build_assign (tmp_var,
7636 build_int_cst (integer_type_node,
7637 GOMP_DEVICE_HOST_FALLBACK));
7638 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7639
7640 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7641 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7642 add_bb_to_loop (then_bb, cond_bb->loop_father);
7643 add_bb_to_loop (else_bb, cond_bb->loop_father);
7644 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7645 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7646
7647 *tp = tmp_var;
7648
7649 gsi = gsi_last_nondebug_bb (new_bb);
7650 }
7651 else
7652 {
7653 gsi = gsi_last_nondebug_bb (new_bb);
7654
7655 if (device != NULL_TREE)
7656 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7657 true, GSI_SAME_STMT);
7658 }
7659
7660 t = gimple_omp_target_data_arg (entry_stmt);
7661 if (t == NULL)
7662 {
7663 t1 = size_zero_node;
7664 t2 = build_zero_cst (ptr_type_node);
7665 t3 = t2;
7666 t4 = t2;
7667 }
7668 else
7669 {
7670 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7671 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7672 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7673 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7674 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7675 }
7676
7677 gimple *g;
7678 bool tagging = false;
7679 /* The maximum number used by any start_ix, without varargs. */
7680 auto_vec<tree, 11> args;
7681 if (is_gimple_omp_oacc (entry_stmt))
7682 {
7683 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7684 TREE_TYPE (goacc_flags), goacc_flags);
7685 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7686 NULL_TREE, true,
7687 GSI_SAME_STMT);
7688 args.quick_push (goacc_flags_m);
7689 }
7690 else
7691 args.quick_push (device);
7692 if (offloaded)
7693 args.quick_push (build_fold_addr_expr (child_fn));
7694 args.quick_push (t1);
7695 args.quick_push (t2);
7696 args.quick_push (t3);
7697 args.quick_push (t4);
7698 switch (start_ix)
7699 {
7700 case BUILT_IN_GOACC_DATA_START:
7701 case BUILT_IN_GOACC_DECLARE:
7702 case BUILT_IN_GOMP_TARGET_DATA:
7703 break;
7704 case BUILT_IN_GOMP_TARGET:
7705 case BUILT_IN_GOMP_TARGET_UPDATE:
7706 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7707 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7708 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7709 if (c)
7710 depend = OMP_CLAUSE_DECL (c);
7711 else
7712 depend = build_int_cst (ptr_type_node, 0);
7713 args.quick_push (depend);
7714 if (start_ix == BUILT_IN_GOMP_TARGET)
7715 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7716 break;
7717 case BUILT_IN_GOACC_PARALLEL:
7718 oacc_set_fn_attrib (child_fn, clauses, &args);
7719 tagging = true;
7720 /* FALLTHRU */
7721 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7722 case BUILT_IN_GOACC_UPDATE:
7723 {
7724 tree t_async = NULL_TREE;
7725
7726 /* If present, use the value specified by the respective
7727 clause, making sure that is of the correct type. */
7728 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7729 if (c)
7730 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7731 integer_type_node,
7732 OMP_CLAUSE_ASYNC_EXPR (c));
7733 else if (!tagging)
7734 /* Default values for t_async. */
7735 t_async = fold_convert_loc (gimple_location (entry_stmt),
7736 integer_type_node,
7737 build_int_cst (integer_type_node,
7738 GOMP_ASYNC_SYNC));
7739 if (tagging && t_async)
7740 {
7741 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7742
7743 if (TREE_CODE (t_async) == INTEGER_CST)
7744 {
7745 /* See if we can pack the async arg in to the tag's
7746 operand. */
7747 i_async = TREE_INT_CST_LOW (t_async);
7748 if (i_async < GOMP_LAUNCH_OP_MAX)
7749 t_async = NULL_TREE;
7750 else
7751 i_async = GOMP_LAUNCH_OP_MAX;
7752 }
7753 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7754 i_async));
7755 }
7756 if (t_async)
7757 args.safe_push (t_async);
7758
7759 /* Save the argument index, and ... */
7760 unsigned t_wait_idx = args.length ();
7761 unsigned num_waits = 0;
7762 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7763 if (!tagging || c)
7764 /* ... push a placeholder. */
7765 args.safe_push (integer_zero_node);
7766
7767 for (; c; c = OMP_CLAUSE_CHAIN (c))
7768 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7769 {
7770 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7771 integer_type_node,
7772 OMP_CLAUSE_WAIT_EXPR (c)));
7773 num_waits++;
7774 }
7775
7776 if (!tagging || num_waits)
7777 {
7778 tree len;
7779
7780 /* Now that we know the number, update the placeholder. */
7781 if (tagging)
7782 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7783 else
7784 len = build_int_cst (integer_type_node, num_waits);
7785 len = fold_convert_loc (gimple_location (entry_stmt),
7786 unsigned_type_node, len);
7787 args[t_wait_idx] = len;
7788 }
7789 }
7790 break;
7791 default:
7792 gcc_unreachable ();
7793 }
7794 if (tagging)
7795 /* Push terminal marker - zero. */
7796 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7797
7798 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7799 gimple_set_location (g, gimple_location (entry_stmt));
7800 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7801 if (!offloaded)
7802 {
7803 g = gsi_stmt (gsi);
7804 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7805 gsi_remove (&gsi, true);
7806 }
7807 if (data_region && region->exit)
7808 {
7809 gsi = gsi_last_nondebug_bb (region->exit);
7810 g = gsi_stmt (gsi);
7811 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7812 gsi_remove (&gsi, true);
7813 }
7814 }
7815
7816 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7817 iteration variable derived from the thread number. INTRA_GROUP means this
7818 is an expansion of a loop iterating over work-items within a separate
7819 iteration over groups. */
7820
7821 static void
7822 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7823 {
7824 gimple_stmt_iterator gsi;
7825 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7826 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7827 == GF_OMP_FOR_KIND_GRID_LOOP);
7828 size_t collapse = gimple_omp_for_collapse (for_stmt);
7829 struct omp_for_data_loop *loops
7830 = XALLOCAVEC (struct omp_for_data_loop,
7831 gimple_omp_for_collapse (for_stmt));
7832 struct omp_for_data fd;
7833
7834 remove_edge (BRANCH_EDGE (kfor->entry));
7835 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7836
7837 gcc_assert (kfor->cont);
7838 omp_extract_for_data (for_stmt, &fd, loops);
7839
7840 gsi = gsi_start_bb (body_bb);
7841
7842 for (size_t dim = 0; dim < collapse; dim++)
7843 {
7844 tree type, itype;
7845 itype = type = TREE_TYPE (fd.loops[dim].v);
7846 if (POINTER_TYPE_P (type))
7847 itype = signed_type_for (type);
7848
7849 tree n1 = fd.loops[dim].n1;
7850 tree step = fd.loops[dim].step;
7851 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7852 true, NULL_TREE, true, GSI_SAME_STMT);
7853 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7854 true, NULL_TREE, true, GSI_SAME_STMT);
7855 tree threadid;
7856 if (gimple_omp_for_grid_group_iter (for_stmt))
7857 {
7858 gcc_checking_assert (!intra_group);
7859 threadid = build_call_expr (builtin_decl_explicit
7860 (BUILT_IN_HSA_WORKGROUPID), 1,
7861 build_int_cstu (unsigned_type_node, dim));
7862 }
7863 else if (intra_group)
7864 threadid = build_call_expr (builtin_decl_explicit
7865 (BUILT_IN_HSA_WORKITEMID), 1,
7866 build_int_cstu (unsigned_type_node, dim));
7867 else
7868 threadid = build_call_expr (builtin_decl_explicit
7869 (BUILT_IN_HSA_WORKITEMABSID), 1,
7870 build_int_cstu (unsigned_type_node, dim));
7871 threadid = fold_convert (itype, threadid);
7872 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7873 true, GSI_SAME_STMT);
7874
7875 tree startvar = fd.loops[dim].v;
7876 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7877 if (POINTER_TYPE_P (type))
7878 t = fold_build_pointer_plus (n1, t);
7879 else
7880 t = fold_build2 (PLUS_EXPR, type, t, n1);
7881 t = fold_convert (type, t);
7882 t = force_gimple_operand_gsi (&gsi, t,
7883 DECL_P (startvar)
7884 && TREE_ADDRESSABLE (startvar),
7885 NULL_TREE, true, GSI_SAME_STMT);
7886 gassign *assign_stmt = gimple_build_assign (startvar, t);
7887 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7888 }
7889 /* Remove the omp for statement. */
7890 gsi = gsi_last_nondebug_bb (kfor->entry);
7891 gsi_remove (&gsi, true);
7892
7893 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7894 gsi = gsi_last_nondebug_bb (kfor->cont);
7895 gcc_assert (!gsi_end_p (gsi)
7896 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7897 gsi_remove (&gsi, true);
7898
7899 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7900 gsi = gsi_last_nondebug_bb (kfor->exit);
7901 gcc_assert (!gsi_end_p (gsi)
7902 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7903 if (intra_group)
7904 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7905 gsi_remove (&gsi, true);
7906
7907 /* Fixup the much simpler CFG. */
7908 remove_edge (find_edge (kfor->cont, body_bb));
7909
7910 if (kfor->cont != body_bb)
7911 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7912 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7913 }
7914
7915 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7916 argument_decls. */
7917
7918 struct grid_arg_decl_map
7919 {
7920 tree old_arg;
7921 tree new_arg;
7922 };
7923
7924 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7925 pertaining to kernel function. */
7926
7927 static tree
7928 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7929 {
7930 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7931 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7932 tree t = *tp;
7933
7934 if (t == adm->old_arg)
7935 *tp = adm->new_arg;
7936 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7937 return NULL_TREE;
7938 }
7939
7940 /* If TARGET region contains a kernel body for loop, remove its region from the
7941 TARGET and expand it in HSA gridified kernel fashion. */
7942
7943 static void
7944 grid_expand_target_grid_body (struct omp_region *target)
7945 {
7946 if (!hsa_gen_requested_p ())
7947 return;
7948
7949 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7950 struct omp_region **pp;
7951
7952 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7953 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7954 break;
7955
7956 struct omp_region *gpukernel = *pp;
7957
7958 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7959 if (!gpukernel)
7960 {
7961 /* HSA cannot handle OACC stuff. */
7962 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7963 return;
7964 gcc_checking_assert (orig_child_fndecl);
7965 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7966 OMP_CLAUSE__GRIDDIM_));
7967 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7968
7969 hsa_register_kernel (n);
7970 return;
7971 }
7972
7973 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7974 OMP_CLAUSE__GRIDDIM_));
7975 tree inside_block
7976 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7977 *pp = gpukernel->next;
7978 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7979 if ((*pp)->type == GIMPLE_OMP_FOR)
7980 break;
7981
7982 struct omp_region *kfor = *pp;
7983 gcc_assert (kfor);
7984 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7985 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7986 *pp = kfor->next;
7987 if (kfor->inner)
7988 {
7989 if (gimple_omp_for_grid_group_iter (for_stmt))
7990 {
7991 struct omp_region **next_pp;
7992 for (pp = &kfor->inner; *pp; pp = next_pp)
7993 {
7994 next_pp = &(*pp)->next;
7995 if ((*pp)->type != GIMPLE_OMP_FOR)
7996 continue;
7997 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7998 gcc_assert (gimple_omp_for_kind (inner)
7999 == GF_OMP_FOR_KIND_GRID_LOOP);
8000 grid_expand_omp_for_loop (*pp, true);
8001 *pp = (*pp)->next;
8002 next_pp = pp;
8003 }
8004 }
8005 expand_omp (kfor->inner);
8006 }
8007 if (gpukernel->inner)
8008 expand_omp (gpukernel->inner);
8009
8010 tree kern_fndecl = copy_node (orig_child_fndecl);
8011 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8012 "kernel");
8013 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8014 tree tgtblock = gimple_block (tgt_stmt);
8015 tree fniniblock = make_node (BLOCK);
8016 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8017 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8018 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8019 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8020 DECL_INITIAL (kern_fndecl) = fniniblock;
8021 push_struct_function (kern_fndecl);
8022 cfun->function_end_locus = gimple_location (tgt_stmt);
8023 init_tree_ssa (cfun);
8024 pop_cfun ();
8025
8026 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8027 gcc_assert (!DECL_CHAIN (old_parm_decl));
8028 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8029 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8030 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8031 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8032 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8033 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8034 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8035 kern_cfun->curr_properties = cfun->curr_properties;
8036
8037 grid_expand_omp_for_loop (kfor, false);
8038
8039 /* Remove the omp for statement. */
8040 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8041 gsi_remove (&gsi, true);
8042 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8043 return. */
8044 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8045 gcc_assert (!gsi_end_p (gsi)
8046 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8047 gimple *ret_stmt = gimple_build_return (NULL);
8048 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8049 gsi_remove (&gsi, true);
8050
8051 /* Statements in the first BB in the target construct have been produced by
8052 target lowering and must be copied inside the GPUKERNEL, with the two
8053 exceptions of the first OMP statement and the OMP_DATA assignment
8054 statement. */
8055 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8056 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8057 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8058 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8059 !gsi_end_p (tsi); gsi_next (&tsi))
8060 {
8061 gimple *stmt = gsi_stmt (tsi);
8062 if (is_gimple_omp (stmt))
8063 break;
8064 if (sender
8065 && is_gimple_assign (stmt)
8066 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8067 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8068 continue;
8069 gimple *copy = gimple_copy (stmt);
8070 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8071 gimple_set_block (copy, fniniblock);
8072 }
8073
8074 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8075 gpukernel->exit, inside_block);
8076
8077 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8078 kcn->mark_force_output ();
8079 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8080
8081 hsa_register_kernel (kcn, orig_child);
8082
8083 cgraph_node::add_new_function (kern_fndecl, true);
8084 push_cfun (kern_cfun);
8085 cgraph_edge::rebuild_edges ();
8086
8087 /* Re-map any mention of the PARM_DECL of the original function to the
8088 PARM_DECL of the new one.
8089
8090 TODO: It would be great if lowering produced references into the GPU
8091 kernel decl straight away and we did not have to do this. */
8092 struct grid_arg_decl_map adm;
8093 adm.old_arg = old_parm_decl;
8094 adm.new_arg = new_parm_decl;
8095 basic_block bb;
8096 FOR_EACH_BB_FN (bb, kern_cfun)
8097 {
8098 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8099 {
8100 gimple *stmt = gsi_stmt (gsi);
8101 struct walk_stmt_info wi;
8102 memset (&wi, 0, sizeof (wi));
8103 wi.info = &adm;
8104 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8105 }
8106 }
8107 pop_cfun ();
8108
8109 return;
8110 }
8111
8112 /* Expand the parallel region tree rooted at REGION. Expansion
8113 proceeds in depth-first order. Innermost regions are expanded
8114 first. This way, parallel regions that require a new function to
8115 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8116 internal dependencies in their body. */
8117
8118 static void
8119 expand_omp (struct omp_region *region)
8120 {
8121 omp_any_child_fn_dumped = false;
8122 while (region)
8123 {
8124 location_t saved_location;
8125 gimple *inner_stmt = NULL;
8126
8127 /* First, determine whether this is a combined parallel+workshare
8128 region. */
8129 if (region->type == GIMPLE_OMP_PARALLEL)
8130 determine_parallel_type (region);
8131 else if (region->type == GIMPLE_OMP_TARGET)
8132 grid_expand_target_grid_body (region);
8133
8134 if (region->type == GIMPLE_OMP_FOR
8135 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8136 inner_stmt = last_stmt (region->inner->entry);
8137
8138 if (region->inner)
8139 expand_omp (region->inner);
8140
8141 saved_location = input_location;
8142 if (gimple_has_location (last_stmt (region->entry)))
8143 input_location = gimple_location (last_stmt (region->entry));
8144
8145 switch (region->type)
8146 {
8147 case GIMPLE_OMP_PARALLEL:
8148 case GIMPLE_OMP_TASK:
8149 expand_omp_taskreg (region);
8150 break;
8151
8152 case GIMPLE_OMP_FOR:
8153 expand_omp_for (region, inner_stmt);
8154 break;
8155
8156 case GIMPLE_OMP_SECTIONS:
8157 expand_omp_sections (region);
8158 break;
8159
8160 case GIMPLE_OMP_SECTION:
8161 /* Individual omp sections are handled together with their
8162 parent GIMPLE_OMP_SECTIONS region. */
8163 break;
8164
8165 case GIMPLE_OMP_SINGLE:
8166 expand_omp_single (region);
8167 break;
8168
8169 case GIMPLE_OMP_ORDERED:
8170 {
8171 gomp_ordered *ord_stmt
8172 = as_a <gomp_ordered *> (last_stmt (region->entry));
8173 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8174 OMP_CLAUSE_DEPEND))
8175 {
8176 /* We'll expand these when expanding corresponding
8177 worksharing region with ordered(n) clause. */
8178 gcc_assert (region->outer
8179 && region->outer->type == GIMPLE_OMP_FOR);
8180 region->ord_stmt = ord_stmt;
8181 break;
8182 }
8183 }
8184 /* FALLTHRU */
8185 case GIMPLE_OMP_MASTER:
8186 case GIMPLE_OMP_TASKGROUP:
8187 case GIMPLE_OMP_CRITICAL:
8188 case GIMPLE_OMP_TEAMS:
8189 expand_omp_synch (region);
8190 break;
8191
8192 case GIMPLE_OMP_ATOMIC_LOAD:
8193 expand_omp_atomic (region);
8194 break;
8195
8196 case GIMPLE_OMP_TARGET:
8197 expand_omp_target (region);
8198 break;
8199
8200 default:
8201 gcc_unreachable ();
8202 }
8203
8204 input_location = saved_location;
8205 region = region->next;
8206 }
8207 if (omp_any_child_fn_dumped)
8208 {
8209 if (dump_file)
8210 dump_function_header (dump_file, current_function_decl, dump_flags);
8211 omp_any_child_fn_dumped = false;
8212 }
8213 }
8214
8215 /* Helper for build_omp_regions. Scan the dominator tree starting at
8216 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8217 true, the function ends once a single tree is built (otherwise, whole
8218 forest of OMP constructs may be built). */
8219
8220 static void
8221 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8222 bool single_tree)
8223 {
8224 gimple_stmt_iterator gsi;
8225 gimple *stmt;
8226 basic_block son;
8227
8228 gsi = gsi_last_nondebug_bb (bb);
8229 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8230 {
8231 struct omp_region *region;
8232 enum gimple_code code;
8233
8234 stmt = gsi_stmt (gsi);
8235 code = gimple_code (stmt);
8236 if (code == GIMPLE_OMP_RETURN)
8237 {
8238 /* STMT is the return point out of region PARENT. Mark it
8239 as the exit point and make PARENT the immediately
8240 enclosing region. */
8241 gcc_assert (parent);
8242 region = parent;
8243 region->exit = bb;
8244 parent = parent->outer;
8245 }
8246 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8247 {
8248 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8249 GIMPLE_OMP_RETURN, but matches with
8250 GIMPLE_OMP_ATOMIC_LOAD. */
8251 gcc_assert (parent);
8252 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8253 region = parent;
8254 region->exit = bb;
8255 parent = parent->outer;
8256 }
8257 else if (code == GIMPLE_OMP_CONTINUE)
8258 {
8259 gcc_assert (parent);
8260 parent->cont = bb;
8261 }
8262 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8263 {
8264 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8265 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8266 }
8267 else
8268 {
8269 region = new_omp_region (bb, code, parent);
8270 /* Otherwise... */
8271 if (code == GIMPLE_OMP_TARGET)
8272 {
8273 switch (gimple_omp_target_kind (stmt))
8274 {
8275 case GF_OMP_TARGET_KIND_REGION:
8276 case GF_OMP_TARGET_KIND_DATA:
8277 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8278 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8279 case GF_OMP_TARGET_KIND_OACC_DATA:
8280 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8281 break;
8282 case GF_OMP_TARGET_KIND_UPDATE:
8283 case GF_OMP_TARGET_KIND_ENTER_DATA:
8284 case GF_OMP_TARGET_KIND_EXIT_DATA:
8285 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8286 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8287 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8288 /* ..., other than for those stand-alone directives... */
8289 region = NULL;
8290 break;
8291 default:
8292 gcc_unreachable ();
8293 }
8294 }
8295 else if (code == GIMPLE_OMP_ORDERED
8296 && omp_find_clause (gimple_omp_ordered_clauses
8297 (as_a <gomp_ordered *> (stmt)),
8298 OMP_CLAUSE_DEPEND))
8299 /* #pragma omp ordered depend is also just a stand-alone
8300 directive. */
8301 region = NULL;
8302 else if (code == GIMPLE_OMP_TASK
8303 && gimple_omp_task_taskwait_p (stmt))
8304 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8305 region = NULL;
8306 /* ..., this directive becomes the parent for a new region. */
8307 if (region)
8308 parent = region;
8309 }
8310 }
8311
8312 if (single_tree && !parent)
8313 return;
8314
8315 for (son = first_dom_son (CDI_DOMINATORS, bb);
8316 son;
8317 son = next_dom_son (CDI_DOMINATORS, son))
8318 build_omp_regions_1 (son, parent, single_tree);
8319 }
8320
8321 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8322 root_omp_region. */
8323
8324 static void
8325 build_omp_regions_root (basic_block root)
8326 {
8327 gcc_assert (root_omp_region == NULL);
8328 build_omp_regions_1 (root, NULL, true);
8329 gcc_assert (root_omp_region != NULL);
8330 }
8331
8332 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8333
8334 void
8335 omp_expand_local (basic_block head)
8336 {
8337 build_omp_regions_root (head);
8338 if (dump_file && (dump_flags & TDF_DETAILS))
8339 {
8340 fprintf (dump_file, "\nOMP region tree\n\n");
8341 dump_omp_region (dump_file, root_omp_region, 0);
8342 fprintf (dump_file, "\n");
8343 }
8344
8345 remove_exit_barriers (root_omp_region);
8346 expand_omp (root_omp_region);
8347
8348 omp_free_regions ();
8349 }
8350
8351 /* Scan the CFG and build a tree of OMP regions. Return the root of
8352 the OMP region tree. */
8353
8354 static void
8355 build_omp_regions (void)
8356 {
8357 gcc_assert (root_omp_region == NULL);
8358 calculate_dominance_info (CDI_DOMINATORS);
8359 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8360 }
8361
8362 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8363
8364 static unsigned int
8365 execute_expand_omp (void)
8366 {
8367 build_omp_regions ();
8368
8369 if (!root_omp_region)
8370 return 0;
8371
8372 if (dump_file)
8373 {
8374 fprintf (dump_file, "\nOMP region tree\n\n");
8375 dump_omp_region (dump_file, root_omp_region, 0);
8376 fprintf (dump_file, "\n");
8377 }
8378
8379 remove_exit_barriers (root_omp_region);
8380
8381 expand_omp (root_omp_region);
8382
8383 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8384 verify_loop_structure ();
8385 cleanup_tree_cfg ();
8386
8387 omp_free_regions ();
8388
8389 return 0;
8390 }
8391
8392 /* OMP expansion -- the default pass, run before creation of SSA form. */
8393
8394 namespace {
8395
8396 const pass_data pass_data_expand_omp =
8397 {
8398 GIMPLE_PASS, /* type */
8399 "ompexp", /* name */
8400 OPTGROUP_OMP, /* optinfo_flags */
8401 TV_NONE, /* tv_id */
8402 PROP_gimple_any, /* properties_required */
8403 PROP_gimple_eomp, /* properties_provided */
8404 0, /* properties_destroyed */
8405 0, /* todo_flags_start */
8406 0, /* todo_flags_finish */
8407 };
8408
8409 class pass_expand_omp : public gimple_opt_pass
8410 {
8411 public:
8412 pass_expand_omp (gcc::context *ctxt)
8413 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8414 {}
8415
8416 /* opt_pass methods: */
8417 virtual unsigned int execute (function *)
8418 {
8419 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8420 || flag_openmp_simd != 0)
8421 && !seen_error ());
8422
8423 /* This pass always runs, to provide PROP_gimple_eomp.
8424 But often, there is nothing to do. */
8425 if (!gate)
8426 return 0;
8427
8428 return execute_expand_omp ();
8429 }
8430
8431 }; // class pass_expand_omp
8432
8433 } // anon namespace
8434
8435 gimple_opt_pass *
8436 make_pass_expand_omp (gcc::context *ctxt)
8437 {
8438 return new pass_expand_omp (ctxt);
8439 }
8440
8441 namespace {
8442
8443 const pass_data pass_data_expand_omp_ssa =
8444 {
8445 GIMPLE_PASS, /* type */
8446 "ompexpssa", /* name */
8447 OPTGROUP_OMP, /* optinfo_flags */
8448 TV_NONE, /* tv_id */
8449 PROP_cfg | PROP_ssa, /* properties_required */
8450 PROP_gimple_eomp, /* properties_provided */
8451 0, /* properties_destroyed */
8452 0, /* todo_flags_start */
8453 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8454 };
8455
8456 class pass_expand_omp_ssa : public gimple_opt_pass
8457 {
8458 public:
8459 pass_expand_omp_ssa (gcc::context *ctxt)
8460 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8461 {}
8462
8463 /* opt_pass methods: */
8464 virtual bool gate (function *fun)
8465 {
8466 return !(fun->curr_properties & PROP_gimple_eomp);
8467 }
8468 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8469 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8470
8471 }; // class pass_expand_omp_ssa
8472
8473 } // anon namespace
8474
8475 gimple_opt_pass *
8476 make_pass_expand_omp_ssa (gcc::context *ctxt)
8477 {
8478 return new pass_expand_omp_ssa (ctxt);
8479 }
8480
8481 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8482 GIMPLE_* codes. */
8483
8484 bool
8485 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8486 int *region_idx)
8487 {
8488 gimple *last = last_stmt (bb);
8489 enum gimple_code code = gimple_code (last);
8490 struct omp_region *cur_region = *region;
8491 bool fallthru = false;
8492
8493 switch (code)
8494 {
8495 case GIMPLE_OMP_PARALLEL:
8496 case GIMPLE_OMP_FOR:
8497 case GIMPLE_OMP_SINGLE:
8498 case GIMPLE_OMP_TEAMS:
8499 case GIMPLE_OMP_MASTER:
8500 case GIMPLE_OMP_TASKGROUP:
8501 case GIMPLE_OMP_CRITICAL:
8502 case GIMPLE_OMP_SECTION:
8503 case GIMPLE_OMP_GRID_BODY:
8504 cur_region = new_omp_region (bb, code, cur_region);
8505 fallthru = true;
8506 break;
8507
8508 case GIMPLE_OMP_TASK:
8509 cur_region = new_omp_region (bb, code, cur_region);
8510 fallthru = true;
8511 if (gimple_omp_task_taskwait_p (last))
8512 cur_region = cur_region->outer;
8513 break;
8514
8515 case GIMPLE_OMP_ORDERED:
8516 cur_region = new_omp_region (bb, code, cur_region);
8517 fallthru = true;
8518 if (omp_find_clause (gimple_omp_ordered_clauses
8519 (as_a <gomp_ordered *> (last)),
8520 OMP_CLAUSE_DEPEND))
8521 cur_region = cur_region->outer;
8522 break;
8523
8524 case GIMPLE_OMP_TARGET:
8525 cur_region = new_omp_region (bb, code, cur_region);
8526 fallthru = true;
8527 switch (gimple_omp_target_kind (last))
8528 {
8529 case GF_OMP_TARGET_KIND_REGION:
8530 case GF_OMP_TARGET_KIND_DATA:
8531 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8532 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8533 case GF_OMP_TARGET_KIND_OACC_DATA:
8534 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8535 break;
8536 case GF_OMP_TARGET_KIND_UPDATE:
8537 case GF_OMP_TARGET_KIND_ENTER_DATA:
8538 case GF_OMP_TARGET_KIND_EXIT_DATA:
8539 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8540 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8541 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8542 cur_region = cur_region->outer;
8543 break;
8544 default:
8545 gcc_unreachable ();
8546 }
8547 break;
8548
8549 case GIMPLE_OMP_SECTIONS:
8550 cur_region = new_omp_region (bb, code, cur_region);
8551 fallthru = true;
8552 break;
8553
8554 case GIMPLE_OMP_SECTIONS_SWITCH:
8555 fallthru = false;
8556 break;
8557
8558 case GIMPLE_OMP_ATOMIC_LOAD:
8559 case GIMPLE_OMP_ATOMIC_STORE:
8560 fallthru = true;
8561 break;
8562
8563 case GIMPLE_OMP_RETURN:
8564 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8565 somewhere other than the next block. This will be
8566 created later. */
8567 cur_region->exit = bb;
8568 if (cur_region->type == GIMPLE_OMP_TASK)
8569 /* Add an edge corresponding to not scheduling the task
8570 immediately. */
8571 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8572 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8573 cur_region = cur_region->outer;
8574 break;
8575
8576 case GIMPLE_OMP_CONTINUE:
8577 cur_region->cont = bb;
8578 switch (cur_region->type)
8579 {
8580 case GIMPLE_OMP_FOR:
8581 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8582 succs edges as abnormal to prevent splitting
8583 them. */
8584 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8585 /* Make the loopback edge. */
8586 make_edge (bb, single_succ (cur_region->entry),
8587 EDGE_ABNORMAL);
8588
8589 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8590 corresponds to the case that the body of the loop
8591 is not executed at all. */
8592 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8593 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8594 fallthru = false;
8595 break;
8596
8597 case GIMPLE_OMP_SECTIONS:
8598 /* Wire up the edges into and out of the nested sections. */
8599 {
8600 basic_block switch_bb = single_succ (cur_region->entry);
8601
8602 struct omp_region *i;
8603 for (i = cur_region->inner; i ; i = i->next)
8604 {
8605 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8606 make_edge (switch_bb, i->entry, 0);
8607 make_edge (i->exit, bb, EDGE_FALLTHRU);
8608 }
8609
8610 /* Make the loopback edge to the block with
8611 GIMPLE_OMP_SECTIONS_SWITCH. */
8612 make_edge (bb, switch_bb, 0);
8613
8614 /* Make the edge from the switch to exit. */
8615 make_edge (switch_bb, bb->next_bb, 0);
8616 fallthru = false;
8617 }
8618 break;
8619
8620 case GIMPLE_OMP_TASK:
8621 fallthru = true;
8622 break;
8623
8624 default:
8625 gcc_unreachable ();
8626 }
8627 break;
8628
8629 default:
8630 gcc_unreachable ();
8631 }
8632
8633 if (*region != cur_region)
8634 {
8635 *region = cur_region;
8636 if (cur_region)
8637 *region_idx = cur_region->entry->index;
8638 else
8639 *region_idx = 0;
8640 }
8641
8642 return fallthru;
8643 }
8644
8645 #include "gt-omp-expand.h"