1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
68 /* The enclosing region. */
69 struct omp_region
*outer
;
71 /* First child region. */
72 struct omp_region
*inner
;
74 /* Next peer region. */
75 struct omp_region
*next
;
77 /* Block containing the omp directive as its last stmt. */
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
89 vec
<tree
, va_gc
> *ws_args
;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type
;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind
;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers
;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel
;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 gomp_ordered
*ord_stmt
;
108 static struct omp_region
*root_omp_region
;
109 static bool omp_any_child_fn_dumped
;
111 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
113 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
114 static void expand_omp (struct omp_region
*region
);
116 /* Return true if REGION is a combined parallel+workshare region. */
119 is_combined_parallel (struct omp_region
*region
)
121 return region
->is_combined_parallel
;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
140 # BLOCK 2 (PAR_ENTRY_BB)
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
167 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
169 struct omp_for_data fd
;
170 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
172 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
175 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
176 if (gimple_omp_for_kind (ws_stmt
) != GF_OMP_FOR_KIND_FOR
)
179 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
181 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
183 if (fd
.iter_type
!= long_integer_type_node
)
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
191 if (!is_gimple_min_invariant (fd
.loop
.n1
)
192 || !is_gimple_min_invariant (fd
.loop
.n2
)
193 || !is_gimple_min_invariant (fd
.loop
.step
)
194 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
204 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
206 if (!simd_schedule
|| integer_zerop (chunk_size
))
209 poly_uint64 vf
= omp_max_vf ();
210 if (known_eq (vf
, 1U))
213 tree type
= TREE_TYPE (chunk_size
);
214 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
215 build_int_cst (type
, vf
- 1));
216 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
217 build_int_cst (type
, -vf
));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
224 static vec
<tree
, va_gc
> *
225 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
228 location_t loc
= gimple_location (ws_stmt
);
229 vec
<tree
, va_gc
> *ws_args
;
231 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
233 struct omp_for_data fd
;
236 omp_extract_for_data (for_stmt
, &fd
, NULL
);
240 if (gimple_omp_for_combined_into_p (for_stmt
))
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
244 OMP_CLAUSE__LOOPTEMP_
);
246 n1
= OMP_CLAUSE_DECL (innerc
);
247 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
248 OMP_CLAUSE__LOOPTEMP_
);
250 n2
= OMP_CLAUSE_DECL (innerc
);
253 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
255 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
256 ws_args
->quick_push (t
);
258 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
259 ws_args
->quick_push (t
);
261 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
262 ws_args
->quick_push (t
);
266 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
267 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
268 ws_args
->quick_push (t
);
273 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
279 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
280 vec_alloc (ws_args
, 1);
281 ws_args
->quick_push (t
);
288 /* Discover whether REGION is a combined parallel+workshare region. */
291 determine_parallel_type (struct omp_region
*region
)
293 basic_block par_entry_bb
, par_exit_bb
;
294 basic_block ws_entry_bb
, ws_exit_bb
;
296 if (region
== NULL
|| region
->inner
== NULL
297 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
298 || region
->inner
->cont
== NULL
)
301 /* We only support parallel+for and parallel+sections. */
302 if (region
->type
!= GIMPLE_OMP_PARALLEL
303 || (region
->inner
->type
!= GIMPLE_OMP_FOR
304 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb
= region
->entry
;
310 par_exit_bb
= region
->exit
;
311 ws_entry_bb
= region
->inner
->entry
;
312 ws_exit_bb
= region
->inner
->exit
;
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
317 tree pclauses
= gimple_omp_parallel_clauses (last_stmt (par_entry_bb
));
318 if (omp_find_clause (pclauses
, OMP_CLAUSE__REDUCTEMP_
))
321 if (single_succ (par_entry_bb
) == ws_entry_bb
322 && single_succ (ws_exit_bb
) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb
)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
325 || (last_and_only_stmt (ws_entry_bb
)
326 && last_and_only_stmt (par_exit_bb
))))
328 gimple
*par_stmt
= last_stmt (par_entry_bb
);
329 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
331 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
343 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
346 == OMP_CLAUSE_SCHEDULE_STATIC
)
347 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
)
348 || omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
))
351 else if (region
->inner
->type
== GIMPLE_OMP_SECTIONS
352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt
),
353 OMP_CLAUSE__REDUCTEMP_
))
356 region
->is_combined_parallel
= true;
357 region
->inner
->is_combined_parallel
= true;
358 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
362 /* Debugging dumps for parallel regions. */
363 void dump_omp_region (FILE *, struct omp_region
*, int);
364 void debug_omp_region (struct omp_region
*);
365 void debug_all_omp_regions (void);
367 /* Dump the parallel region tree rooted at REGION. */
370 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
372 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
373 gimple_code_name
[region
->type
]);
376 dump_omp_region (file
, region
->inner
, indent
+ 4);
380 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
381 region
->cont
->index
);
385 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
386 region
->exit
->index
);
388 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
391 dump_omp_region (file
, region
->next
, indent
);
395 debug_omp_region (struct omp_region
*region
)
397 dump_omp_region (stderr
, region
, 0);
401 debug_all_omp_regions (void)
403 dump_omp_region (stderr
, root_omp_region
, 0);
406 /* Create a new parallel region starting at STMT inside region PARENT. */
408 static struct omp_region
*
409 new_omp_region (basic_block bb
, enum gimple_code type
,
410 struct omp_region
*parent
)
412 struct omp_region
*region
= XCNEW (struct omp_region
);
414 region
->outer
= parent
;
420 /* This is a nested region. Add it to the list of inner
421 regions in PARENT. */
422 region
->next
= parent
->inner
;
423 parent
->inner
= region
;
427 /* This is a toplevel region. Add it to the list of toplevel
428 regions in ROOT_OMP_REGION. */
429 region
->next
= root_omp_region
;
430 root_omp_region
= region
;
436 /* Release the memory associated with the region tree rooted at REGION. */
439 free_omp_region_1 (struct omp_region
*region
)
441 struct omp_region
*i
, *n
;
443 for (i
= region
->inner
; i
; i
= n
)
446 free_omp_region_1 (i
);
452 /* Release the memory for the entire omp region tree. */
455 omp_free_regions (void)
457 struct omp_region
*r
, *n
;
458 for (r
= root_omp_region
; r
; r
= n
)
461 free_omp_region_1 (r
);
463 root_omp_region
= NULL
;
466 /* A convenience function to build an empty GIMPLE_COND with just the
470 gimple_build_cond_empty (tree cond
)
472 enum tree_code pred_code
;
475 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
476 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
479 /* Return true if a parallel REGION is within a declare target function or
480 within a target region and is not a part of a gridified target. */
483 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
485 bool indirect
= false;
486 for (region
= region
->outer
; region
; region
= region
->outer
)
488 if (region
->type
== GIMPLE_OMP_PARALLEL
)
490 else if (region
->type
== GIMPLE_OMP_TARGET
)
492 gomp_target
*tgt_stmt
493 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
496 OMP_CLAUSE__GRIDDIM_
))
503 if (lookup_attribute ("omp declare target",
504 DECL_ATTRIBUTES (current_function_decl
)))
510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511 Add CHILD_FNDECL to decl chain of the supercontext of the block
512 ENTRY_BLOCK - this is the block which originally contained the
513 code from which CHILD_FNDECL was created.
515 Together, these actions ensure that the debug info for the outlined
516 function will be emitted with the correct lexical scope. */
519 adjust_context_and_scope (struct omp_region
*region
, tree entry_block
,
522 tree parent_fndecl
= NULL_TREE
;
524 /* OMP expansion expands inner regions before outer ones, so if
525 we e.g. have explicit task region nested in parallel region, when
526 expanding the task region current_function_decl will be the original
527 source function, but we actually want to use as context the child
528 function of the parallel. */
529 for (region
= region
->outer
;
530 region
&& parent_fndecl
== NULL_TREE
; region
= region
->outer
)
531 switch (region
->type
)
533 case GIMPLE_OMP_PARALLEL
:
534 case GIMPLE_OMP_TASK
:
535 case GIMPLE_OMP_TEAMS
:
536 entry_stmt
= last_stmt (region
->entry
);
537 parent_fndecl
= gimple_omp_taskreg_child_fn (entry_stmt
);
539 case GIMPLE_OMP_TARGET
:
540 entry_stmt
= last_stmt (region
->entry
);
542 = gimple_omp_target_child_fn (as_a
<gomp_target
*> (entry_stmt
));
548 if (parent_fndecl
== NULL_TREE
)
549 parent_fndecl
= current_function_decl
;
550 DECL_CONTEXT (child_fndecl
) = parent_fndecl
;
552 if (entry_block
!= NULL_TREE
&& TREE_CODE (entry_block
) == BLOCK
)
554 tree b
= BLOCK_SUPERCONTEXT (entry_block
);
555 if (TREE_CODE (b
) == BLOCK
)
557 DECL_CHAIN (child_fndecl
) = BLOCK_VARS (b
);
558 BLOCK_VARS (b
) = child_fndecl
;
563 /* Build the function calls to GOMP_parallel etc to actually
564 generate the parallel operation. REGION is the parallel region
565 being expanded. BB is the block where to insert the code. WS_ARGS
566 will be set if this is a call to a combined parallel+workshare
567 construct, it contains the list of additional arguments needed by
568 the workshare construct. */
571 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
572 gomp_parallel
*entry_stmt
,
573 vec
<tree
, va_gc
> *ws_args
)
575 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
576 gimple_stmt_iterator gsi
;
578 enum built_in_function start_ix
;
580 location_t clause_loc
;
581 vec
<tree
, va_gc
> *args
;
583 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
585 /* Determine what flavor of GOMP_parallel we will be
587 start_ix
= BUILT_IN_GOMP_PARALLEL
;
588 tree rtmp
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
590 start_ix
= BUILT_IN_GOMP_PARALLEL_REDUCTIONS
;
591 else if (is_combined_parallel (region
))
593 switch (region
->inner
->type
)
596 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
597 switch (region
->inner
->sched_kind
)
599 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
600 if ((region
->inner
->sched_modifiers
601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
) != 0)
603 else if ((region
->inner
->sched_modifiers
604 & OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0)
609 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
610 case OMP_CLAUSE_SCHEDULE_GUIDED
:
611 if ((region
->inner
->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0)
614 start_ix2
= 3 + region
->inner
->sched_kind
;
619 start_ix2
= region
->inner
->sched_kind
;
622 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
623 start_ix
= (enum built_in_function
) start_ix2
;
625 case GIMPLE_OMP_SECTIONS
:
626 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
633 /* By default, the value of NUM_THREADS is zero (selected at run time)
634 and there is no conditional. */
636 val
= build_int_cst (unsigned_type_node
, 0);
637 flags
= build_int_cst (unsigned_type_node
, 0);
639 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
641 cond
= OMP_CLAUSE_IF_EXPR (c
);
643 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
646 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
647 clause_loc
= OMP_CLAUSE_LOCATION (c
);
650 clause_loc
= gimple_location (entry_stmt
);
652 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
654 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
656 /* Ensure 'val' is of the correct type. */
657 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
659 /* If we found the clause 'if (cond)', build either
660 (cond != 0) or (cond ? val : 1u). */
663 cond
= gimple_boolify (cond
);
665 if (integer_zerop (val
))
666 val
= fold_build2_loc (clause_loc
,
667 EQ_EXPR
, unsigned_type_node
, cond
,
668 build_int_cst (TREE_TYPE (cond
), 0));
671 basic_block cond_bb
, then_bb
, else_bb
;
672 edge e
, e_then
, e_else
;
673 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
675 tmp_var
= create_tmp_var (TREE_TYPE (val
));
676 if (gimple_in_ssa_p (cfun
))
678 tmp_then
= make_ssa_name (tmp_var
);
679 tmp_else
= make_ssa_name (tmp_var
);
680 tmp_join
= make_ssa_name (tmp_var
);
689 e
= split_block_after_labels (bb
);
694 then_bb
= create_empty_bb (cond_bb
);
695 else_bb
= create_empty_bb (then_bb
);
696 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
697 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
699 stmt
= gimple_build_cond_empty (cond
);
700 gsi
= gsi_start_bb (cond_bb
);
701 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
703 gsi
= gsi_start_bb (then_bb
);
704 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
706 gsi
= gsi_start_bb (else_bb
);
707 expand_omp_build_assign (&gsi
, tmp_else
,
708 build_int_cst (unsigned_type_node
, 1),
711 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
712 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
713 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
714 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
715 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
716 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
718 if (gimple_in_ssa_p (cfun
))
720 gphi
*phi
= create_phi_node (tmp_join
, bb
);
721 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
722 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
728 gsi
= gsi_start_bb (bb
);
729 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
730 false, GSI_CONTINUE_LINKING
);
733 gsi
= gsi_last_nondebug_bb (bb
);
734 t
= gimple_omp_parallel_data_arg (entry_stmt
);
736 t1
= null_pointer_node
;
738 t1
= build_fold_addr_expr (t
);
739 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
740 t2
= build_fold_addr_expr (child_fndecl
);
742 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
743 args
->quick_push (t2
);
744 args
->quick_push (t1
);
745 args
->quick_push (val
);
747 args
->splice (*ws_args
);
748 args
->quick_push (flags
);
750 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
751 builtin_decl_explicit (start_ix
), args
);
755 tree type
= TREE_TYPE (OMP_CLAUSE_DECL (rtmp
));
756 t
= build2 (MODIFY_EXPR
, type
, OMP_CLAUSE_DECL (rtmp
),
758 fold_convert (pointer_sized_int_node
, t
)));
760 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
761 false, GSI_CONTINUE_LINKING
);
763 if (hsa_gen_requested_p ()
764 && parallel_needs_hsa_kernel_p (region
))
766 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
767 hsa_register_kernel (child_cnode
);
771 /* Build the function call to GOMP_task to actually
772 generate the task operation. BB is the block where to insert the code. */
775 expand_task_call (struct omp_region
*region
, basic_block bb
,
776 gomp_task
*entry_stmt
)
779 gimple_stmt_iterator gsi
;
780 location_t loc
= gimple_location (entry_stmt
);
782 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
784 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
785 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
786 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
787 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
788 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
789 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
792 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
793 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
794 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
796 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
797 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
798 tree num_tasks
= NULL_TREE
;
802 gimple
*g
= last_stmt (region
->outer
->entry
);
803 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
804 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
805 struct omp_for_data fd
;
806 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
807 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
808 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
809 OMP_CLAUSE__LOOPTEMP_
);
810 startvar
= OMP_CLAUSE_DECL (startvar
);
811 endvar
= OMP_CLAUSE_DECL (endvar
);
812 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
813 if (fd
.loop
.cond_code
== LT_EXPR
)
814 iflags
|= GOMP_TASK_FLAG_UP
;
815 tree tclauses
= gimple_omp_for_clauses (g
);
816 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
818 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
821 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
824 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
825 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
828 num_tasks
= integer_zero_node
;
830 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
831 if (ifc
== NULL_TREE
)
832 iflags
|= GOMP_TASK_FLAG_IF
;
833 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
834 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
835 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
836 if (omp_find_clause (clauses
, OMP_CLAUSE_REDUCTION
))
837 iflags
|= GOMP_TASK_FLAG_REDUCTION
;
840 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
842 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
844 tree cond
= boolean_true_node
;
849 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
850 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
851 build_int_cst (unsigned_type_node
,
853 build_int_cst (unsigned_type_node
, 0));
854 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
858 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
863 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
864 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
865 build_int_cst (unsigned_type_node
,
866 GOMP_TASK_FLAG_FINAL
),
867 build_int_cst (unsigned_type_node
, 0));
868 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
871 depend
= OMP_CLAUSE_DECL (depend
);
873 depend
= build_int_cst (ptr_type_node
, 0);
875 priority
= fold_convert (integer_type_node
,
876 OMP_CLAUSE_PRIORITY_EXPR (priority
));
878 priority
= integer_zero_node
;
880 gsi
= gsi_last_nondebug_bb (bb
);
881 tree t
= gimple_omp_task_data_arg (entry_stmt
);
883 t2
= null_pointer_node
;
885 t2
= build_fold_addr_expr_loc (loc
, t
);
886 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
887 t
= gimple_omp_task_copy_fn (entry_stmt
);
889 t3
= null_pointer_node
;
891 t3
= build_fold_addr_expr_loc (loc
, t
);
894 t
= build_call_expr (ull
895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
898 gimple_omp_task_arg_size (entry_stmt
),
899 gimple_omp_task_arg_align (entry_stmt
), flags
,
900 num_tasks
, priority
, startvar
, endvar
, step
);
902 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
904 gimple_omp_task_arg_size (entry_stmt
),
905 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
908 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
909 false, GSI_CONTINUE_LINKING
);
912 /* Build the function call to GOMP_taskwait_depend to actually
913 generate the taskwait operation. BB is the block where to insert the
917 expand_taskwait_call (basic_block bb
, gomp_task
*entry_stmt
)
919 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
920 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
921 if (depend
== NULL_TREE
)
924 depend
= OMP_CLAUSE_DECL (depend
);
926 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (bb
);
928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND
),
931 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
932 false, GSI_CONTINUE_LINKING
);
935 /* Build the function call to GOMP_teams_reg to actually
936 generate the host teams operation. REGION is the teams region
937 being expanded. BB is the block where to insert the code. */
940 expand_teams_call (basic_block bb
, gomp_teams
*entry_stmt
)
942 tree clauses
= gimple_omp_teams_clauses (entry_stmt
);
943 tree num_teams
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
944 if (num_teams
== NULL_TREE
)
945 num_teams
= build_int_cst (unsigned_type_node
, 0);
948 num_teams
= OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams
);
949 num_teams
= fold_convert (unsigned_type_node
, num_teams
);
951 tree thread_limit
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
952 if (thread_limit
== NULL_TREE
)
953 thread_limit
= build_int_cst (unsigned_type_node
, 0);
956 thread_limit
= OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit
);
957 thread_limit
= fold_convert (unsigned_type_node
, thread_limit
);
960 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (bb
);
961 tree t
= gimple_omp_teams_data_arg (entry_stmt
), t1
;
963 t1
= null_pointer_node
;
965 t1
= build_fold_addr_expr (t
);
966 tree child_fndecl
= gimple_omp_teams_child_fn (entry_stmt
);
967 tree t2
= build_fold_addr_expr (child_fndecl
);
969 vec
<tree
, va_gc
> *args
;
971 args
->quick_push (t2
);
972 args
->quick_push (t1
);
973 args
->quick_push (num_teams
);
974 args
->quick_push (thread_limit
);
975 /* For future extensibility. */
976 args
->quick_push (build_zero_cst (unsigned_type_node
));
978 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG
),
982 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
983 false, GSI_CONTINUE_LINKING
);
986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
989 vec2chain (vec
<tree
, va_gc
> *v
)
991 tree chain
= NULL_TREE
, t
;
994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
996 DECL_CHAIN (t
) = chain
;
1003 /* Remove barriers in REGION->EXIT's block. Note that this is only
1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1010 remove_exit_barrier (struct omp_region
*region
)
1012 gimple_stmt_iterator gsi
;
1013 basic_block exit_bb
;
1017 int any_addressable_vars
= -1;
1019 exit_bb
= region
->exit
;
1021 /* If the parallel region doesn't return, we don't have REGION->EXIT
1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1028 statements that can appear in between are extremely limited -- no
1029 memory operations at all. Here, we allow nothing at all, so the
1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1031 gsi
= gsi_last_nondebug_bb (exit_bb
);
1032 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1033 gsi_prev_nondebug (&gsi
);
1034 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
1037 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
1039 gsi
= gsi_last_nondebug_bb (e
->src
);
1040 if (gsi_end_p (gsi
))
1042 stmt
= gsi_stmt (gsi
);
1043 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
1044 && !gimple_omp_return_nowait_p (stmt
))
1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 in many cases. If there could be tasks queued, the barrier
1048 might be needed to let the tasks run before some local
1049 variable of the parallel that the task uses as shared
1050 runs out of scope. The task can be spawned either
1051 from within current function (this would be easy to check)
1052 or from some function it calls and gets passed an address
1053 of such a variable. */
1054 if (any_addressable_vars
< 0)
1056 gomp_parallel
*parallel_stmt
1057 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
1058 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
1059 tree local_decls
, block
, decl
;
1062 any_addressable_vars
= 0;
1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
1064 if (TREE_ADDRESSABLE (decl
))
1066 any_addressable_vars
= 1;
1069 for (block
= gimple_block (stmt
);
1070 !any_addressable_vars
1072 && TREE_CODE (block
) == BLOCK
;
1073 block
= BLOCK_SUPERCONTEXT (block
))
1075 for (local_decls
= BLOCK_VARS (block
);
1077 local_decls
= DECL_CHAIN (local_decls
))
1078 if (TREE_ADDRESSABLE (local_decls
))
1080 any_addressable_vars
= 1;
1083 if (block
== gimple_block (parallel_stmt
))
1087 if (!any_addressable_vars
)
1088 gimple_omp_return_set_nowait (stmt
);
1094 remove_exit_barriers (struct omp_region
*region
)
1096 if (region
->type
== GIMPLE_OMP_PARALLEL
)
1097 remove_exit_barrier (region
);
1101 region
= region
->inner
;
1102 remove_exit_barriers (region
);
1103 while (region
->next
)
1105 region
= region
->next
;
1106 remove_exit_barriers (region
);
1111 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112 calls. These can't be declared as const functions, but
1113 within one parallel body they are constant, so they can be
1114 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115 which are declared const. Similarly for task body, except
1116 that in untied task omp_get_thread_num () can change at any task
1117 scheduling point. */
1120 optimize_omp_library_calls (gimple
*entry_stmt
)
1123 gimple_stmt_iterator gsi
;
1124 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1125 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1126 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1127 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1128 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1130 OMP_CLAUSE_UNTIED
) != NULL
);
1132 FOR_EACH_BB_FN (bb
, cfun
)
1133 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1135 gimple
*call
= gsi_stmt (gsi
);
1138 if (is_gimple_call (call
)
1139 && (decl
= gimple_call_fndecl (call
))
1140 && DECL_EXTERNAL (decl
)
1141 && TREE_PUBLIC (decl
)
1142 && DECL_INITIAL (decl
) == NULL
)
1146 if (DECL_NAME (decl
) == thr_num_id
)
1148 /* In #pragma omp task untied omp_get_thread_num () can change
1149 during the execution of the task region. */
1152 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1154 else if (DECL_NAME (decl
) == num_thr_id
)
1155 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1159 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1160 || gimple_call_num_args (call
) != 0)
1163 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1166 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1168 TREE_TYPE (TREE_TYPE (built_in
))))
1171 gimple_call_set_fndecl (call
, built_in
);
1176 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1180 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1185 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1188 if (TREE_CODE (t
) == ADDR_EXPR
)
1189 recompute_tree_invariant_for_addr_expr (t
);
1191 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1195 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1198 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1201 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1202 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1203 !after
, after
? GSI_CONTINUE_LINKING
1205 gimple
*stmt
= gimple_build_assign (to
, from
);
1207 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1209 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1210 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1211 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1213 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1214 gimple_regimplify_operands (stmt
, &gsi
);
1218 /* Expand the OpenMP parallel or task directive starting at REGION. */
1221 expand_omp_taskreg (struct omp_region
*region
)
1223 basic_block entry_bb
, exit_bb
, new_bb
;
1224 struct function
*child_cfun
;
1225 tree child_fn
, block
, t
;
1226 gimple_stmt_iterator gsi
;
1227 gimple
*entry_stmt
, *stmt
;
1229 vec
<tree
, va_gc
> *ws_args
;
1231 entry_stmt
= last_stmt (region
->entry
);
1232 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1233 && gimple_omp_task_taskwait_p (entry_stmt
))
1235 new_bb
= region
->entry
;
1236 gsi
= gsi_last_nondebug_bb (region
->entry
);
1237 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1238 gsi_remove (&gsi
, true);
1239 expand_taskwait_call (new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1243 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1244 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1246 entry_bb
= region
->entry
;
1247 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1248 exit_bb
= region
->cont
;
1250 exit_bb
= region
->exit
;
1252 if (is_combined_parallel (region
))
1253 ws_args
= region
->ws_args
;
1257 if (child_cfun
->cfg
)
1259 /* Due to inlining, it may happen that we have already outlined
1260 the region, in which case all we need to do is make the
1261 sub-graph unreachable and emit the parallel call. */
1262 edge entry_succ_e
, exit_succ_e
;
1264 entry_succ_e
= single_succ_edge (entry_bb
);
1266 gsi
= gsi_last_nondebug_bb (entry_bb
);
1267 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1268 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
1269 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TEAMS
);
1270 gsi_remove (&gsi
, true);
1275 exit_succ_e
= single_succ_edge (exit_bb
);
1276 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1278 remove_edge_and_dominated_blocks (entry_succ_e
);
1282 unsigned srcidx
, dstidx
, num
;
1284 /* If the parallel region needs data sent from the parent
1285 function, then the very first statement (except possible
1286 tree profile counter updates) of the parallel body
1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1288 &.OMP_DATA_O is passed as an argument to the child function,
1289 we need to replace it with the argument as seen by the child
1292 In most cases, this will end up being the identity assignment
1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1294 a function call that has been inlined, the original PARM_DECL
1295 .OMP_DATA_I may have been converted into a different local
1296 variable. In which case, we need to keep the assignment. */
1297 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1299 basic_block entry_succ_bb
1300 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1301 : FALLTHRU_EDGE (entry_bb
)->dest
;
1303 gimple
*parcopy_stmt
= NULL
;
1305 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1309 gcc_assert (!gsi_end_p (gsi
));
1310 stmt
= gsi_stmt (gsi
);
1311 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1314 if (gimple_num_ops (stmt
) == 2)
1316 tree arg
= gimple_assign_rhs1 (stmt
);
1318 /* We're ignore the subcode because we're
1319 effectively doing a STRIP_NOPS. */
1321 if (TREE_CODE (arg
) == ADDR_EXPR
1322 && (TREE_OPERAND (arg
, 0)
1323 == gimple_omp_taskreg_data_arg (entry_stmt
)))
1325 parcopy_stmt
= stmt
;
1331 gcc_assert (parcopy_stmt
!= NULL
);
1332 arg
= DECL_ARGUMENTS (child_fn
);
1334 if (!gimple_in_ssa_p (cfun
))
1336 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1337 gsi_remove (&gsi
, true);
1340 /* ?? Is setting the subcode really necessary ?? */
1341 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1342 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1347 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1348 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1349 /* We'd like to set the rhs to the default def in the child_fn,
1350 but it's too early to create ssa names in the child_fn.
1351 Instead, we set the rhs to the parm. In
1352 move_sese_region_to_fn, we introduce a default def for the
1353 parm, map the parm to it's default def, and once we encounter
1354 this stmt, replace the parm with the default def. */
1355 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1356 update_stmt (parcopy_stmt
);
1360 /* Declare local variables needed in CHILD_CFUN. */
1361 block
= DECL_INITIAL (child_fn
);
1362 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1363 /* The gimplifier could record temporaries in parallel/task block
1364 rather than in containing function's local_decls chain,
1365 which would mean cgraph missed finalizing them. Do it now. */
1366 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1367 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1368 varpool_node::finalize_decl (t
);
1369 DECL_SAVED_TREE (child_fn
) = NULL
;
1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1371 gimple_set_body (child_fn
, NULL
);
1372 TREE_USED (block
) = 1;
1374 /* Reset DECL_CONTEXT on function arguments. */
1375 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1376 DECL_CONTEXT (t
) = child_fn
;
1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 so that it can be moved to the child function. */
1380 gsi
= gsi_last_nondebug_bb (entry_bb
);
1381 stmt
= gsi_stmt (gsi
);
1382 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1383 || gimple_code (stmt
) == GIMPLE_OMP_TASK
1384 || gimple_code (stmt
) == GIMPLE_OMP_TEAMS
));
1385 e
= split_block (entry_bb
, stmt
);
1386 gsi_remove (&gsi
, true);
1389 if (gimple_code (entry_stmt
) != GIMPLE_OMP_TASK
)
1390 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1393 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1394 gcc_assert (e2
->dest
== region
->exit
);
1395 remove_edge (BRANCH_EDGE (entry_bb
));
1396 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1397 gsi
= gsi_last_nondebug_bb (region
->exit
);
1398 gcc_assert (!gsi_end_p (gsi
)
1399 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1400 gsi_remove (&gsi
, true);
1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1406 gsi
= gsi_last_nondebug_bb (exit_bb
);
1407 gcc_assert (!gsi_end_p (gsi
)
1408 && (gimple_code (gsi_stmt (gsi
))
1409 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1410 stmt
= gimple_build_return (NULL
);
1411 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1412 gsi_remove (&gsi
, true);
1415 /* Move the parallel region into CHILD_CFUN. */
1417 if (gimple_in_ssa_p (cfun
))
1419 init_tree_ssa (child_cfun
);
1420 init_ssa_operands (child_cfun
);
1421 child_cfun
->gimple_df
->in_ssa_p
= true;
1425 block
= gimple_block (entry_stmt
);
1427 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1429 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1432 basic_block dest_bb
= e2
->dest
;
1434 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1436 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1438 /* When the OMP expansion process cannot guarantee an up-to-date
1439 loop tree arrange for the child function to fixup loops. */
1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1441 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1444 num
= vec_safe_length (child_cfun
->local_decls
);
1445 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1447 t
= (*child_cfun
->local_decls
)[srcidx
];
1448 if (DECL_CONTEXT (t
) == cfun
->decl
)
1450 if (srcidx
!= dstidx
)
1451 (*child_cfun
->local_decls
)[dstidx
] = t
;
1455 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1457 /* Inform the callgraph about the new function. */
1458 child_cfun
->curr_properties
= cfun
->curr_properties
;
1459 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1460 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1461 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1462 node
->parallelized_function
= 1;
1463 cgraph_node::add_new_function (child_fn
, true);
1465 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1469 fixed in a following pass. */
1470 push_cfun (child_cfun
);
1472 assign_assembler_name_if_needed (child_fn
);
1475 optimize_omp_library_calls (entry_stmt
);
1476 update_max_bb_count ();
1477 cgraph_edge::rebuild_edges ();
1479 /* Some EH regions might become dead, see PR34608. If
1480 pass_cleanup_cfg isn't the first pass to happen with the
1481 new child, these dead EH edges might cause problems.
1482 Clean them up now. */
1483 if (flag_exceptions
)
1486 bool changed
= false;
1488 FOR_EACH_BB_FN (bb
, cfun
)
1489 changed
|= gimple_purge_dead_eh_edges (bb
);
1491 cleanup_tree_cfg ();
1493 if (gimple_in_ssa_p (cfun
))
1494 update_ssa (TODO_update_ssa
);
1495 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1496 verify_loop_structure ();
1499 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1501 omp_any_child_fn_dumped
= true;
1502 dump_function_header (dump_file
, child_fn
, dump_flags
);
1503 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1507 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
1509 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1510 expand_parallel_call (region
, new_bb
,
1511 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1512 else if (gimple_code (entry_stmt
) == GIMPLE_OMP_TEAMS
)
1513 expand_teams_call (new_bb
, as_a
<gomp_teams
*> (entry_stmt
));
1515 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1516 if (gimple_in_ssa_p (cfun
))
1517 update_ssa (TODO_update_ssa_only_virtuals
);
1520 /* Information about members of an OpenACC collapsed loop nest. */
1522 struct oacc_collapse
1524 tree base
; /* Base value. */
1525 tree iters
; /* Number of steps. */
1526 tree step
; /* Step size. */
1527 tree tile
; /* Tile increment (if tiled). */
1528 tree outer
; /* Tile iterator var. */
1531 /* Helper for expand_oacc_for. Determine collapsed loop information.
1532 Fill in COUNTS array. Emit any initialization code before GSI.
1533 Return the calculated outer loop bound of BOUND_TYPE. */
1536 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1537 gimple_stmt_iterator
*gsi
,
1538 oacc_collapse
*counts
, tree bound_type
,
1541 tree tiling
= fd
->tiling
;
1542 tree total
= build_int_cst (bound_type
, 1);
1545 gcc_assert (integer_onep (fd
->loop
.step
));
1546 gcc_assert (integer_zerop (fd
->loop
.n1
));
1548 /* When tiling, the first operand of the tile clause applies to the
1549 innermost loop, and we work outwards from there. Seems
1550 backwards, but whatever. */
1551 for (ix
= fd
->collapse
; ix
--;)
1553 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1555 tree iter_type
= TREE_TYPE (loop
->v
);
1556 tree diff_type
= iter_type
;
1557 tree plus_type
= iter_type
;
1559 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1561 if (POINTER_TYPE_P (iter_type
))
1562 plus_type
= sizetype
;
1563 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1564 diff_type
= signed_type_for (diff_type
);
1565 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
1566 diff_type
= integer_type_node
;
1570 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1571 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1572 tree tile
= TREE_VALUE (tiling
);
1574 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1575 /* gwv-outer=*/integer_zero_node
,
1576 /* gwv-inner=*/integer_zero_node
);
1578 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1579 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1580 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1581 gimple_set_location (call
, loc
);
1582 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1584 tiling
= TREE_CHAIN (tiling
);
1588 counts
[ix
].tile
= NULL
;
1589 counts
[ix
].outer
= loop
->v
;
1594 tree s
= loop
->step
;
1595 bool up
= loop
->cond_code
== LT_EXPR
;
1596 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1600 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1601 true, GSI_SAME_STMT
);
1602 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1603 true, GSI_SAME_STMT
);
1605 /* Convert the step, avoiding possible unsigned->signed overflow. */
1606 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1608 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1609 s
= fold_convert (diff_type
, s
);
1611 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1612 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1613 true, GSI_SAME_STMT
);
1615 /* Determine the range, avoiding possible unsigned->signed overflow. */
1616 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1617 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1618 fold_convert (plus_type
, negating
? b
: e
),
1619 fold_convert (plus_type
, negating
? e
: b
));
1620 expr
= fold_convert (diff_type
, expr
);
1622 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1623 tree range
= force_gimple_operand_gsi
1624 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1626 /* Determine number of iterations. */
1627 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1628 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1629 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1631 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1632 true, GSI_SAME_STMT
);
1634 counts
[ix
].base
= b
;
1635 counts
[ix
].iters
= iters
;
1636 counts
[ix
].step
= s
;
1638 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1639 fold_convert (bound_type
, iters
));
1645 /* Emit initializers for collapsed loop members. INNER is true if
1646 this is for the element loop of a TILE. IVAR is the outer
1647 loop iteration variable, from which collapsed loop iteration values
1648 are calculated. COUNTS array has been initialized by
1649 expand_oacc_collapse_inits. */
1652 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1653 gimple_stmt_iterator
*gsi
,
1654 const oacc_collapse
*counts
, tree ivar
)
1656 tree ivar_type
= TREE_TYPE (ivar
);
1658 /* The most rapidly changing iteration variable is the innermost
1660 for (int ix
= fd
->collapse
; ix
--;)
1662 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1663 const oacc_collapse
*collapse
= &counts
[ix
];
1664 tree v
= inner
? loop
->v
: collapse
->outer
;
1665 tree iter_type
= TREE_TYPE (v
);
1666 tree diff_type
= TREE_TYPE (collapse
->step
);
1667 tree plus_type
= iter_type
;
1668 enum tree_code plus_code
= PLUS_EXPR
;
1671 if (POINTER_TYPE_P (iter_type
))
1673 plus_code
= POINTER_PLUS_EXPR
;
1674 plus_type
= sizetype
;
1680 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1681 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1682 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1683 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1684 true, GSI_SAME_STMT
);
1687 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1689 expr
= fold_build2 (plus_code
, iter_type
,
1690 inner
? collapse
->outer
: collapse
->base
,
1691 fold_convert (plus_type
, expr
));
1692 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1693 true, GSI_SAME_STMT
);
1694 gassign
*ass
= gimple_build_assign (v
, expr
);
1695 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1699 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1700 of the combined collapse > 1 loop constructs, generate code like:
1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1706 count3 = (adj + N32 - N31) / STEP3;
1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1712 count2 = (adj + N22 - N21) / STEP2;
1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1718 count1 = (adj + N12 - N11) / STEP1;
1719 count = count1 * count2 * count3;
1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1723 of the combined loop constructs, just initialize COUNTS array
1724 from the _looptemp_ clauses. */
1726 /* NOTE: It *could* be better to moosh all of the BBs together,
1727 creating one larger BB with all the computation and the unexpected
1728 jump at the end. I.e.
1730 bool zero3, zero2, zero1, zero;
1733 count3 = (N32 - N31) /[cl] STEP3;
1735 count2 = (N22 - N21) /[cl] STEP2;
1737 count1 = (N12 - N11) /[cl] STEP1;
1738 zero = zero3 || zero2 || zero1;
1739 count = count1 * count2 * count3;
1740 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1742 After all, we expect the zero=false, and thus we expect to have to
1743 evaluate all of the comparison expressions, so short-circuiting
1744 oughtn't be a win. Since the condition isn't protecting a
1745 denominator, we're not concerned about divide-by-zero, so we can
1746 fully evaluate count even if a numerator turned out to be wrong.
1748 It seems like putting this all together would create much better
1749 scheduling opportunities, and less pressure on the chip's branch
1753 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1754 basic_block
&entry_bb
, tree
*counts
,
1755 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1756 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1757 basic_block
&l2_dom_bb
)
1759 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1763 /* Collapsed loops need work for expansion into SSA form. */
1764 gcc_assert (!gimple_in_ssa_p (cfun
));
1766 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1767 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1769 gcc_assert (fd
->ordered
== 0);
1770 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 isn't supposed to be handled, as the inner loop doesn't
1773 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1774 OMP_CLAUSE__LOOPTEMP_
);
1775 gcc_assert (innerc
);
1776 for (i
= 0; i
< fd
->collapse
; i
++)
1778 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1779 OMP_CLAUSE__LOOPTEMP_
);
1780 gcc_assert (innerc
);
1782 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1784 counts
[0] = NULL_TREE
;
1789 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1791 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1792 counts
[i
] = NULL_TREE
;
1793 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1794 fold_convert (itype
, fd
->loops
[i
].n1
),
1795 fold_convert (itype
, fd
->loops
[i
].n2
));
1796 if (t
&& integer_zerop (t
))
1798 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1799 counts
[i
] = build_int_cst (type
, 0);
1803 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1805 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1807 if (i
>= fd
->collapse
&& counts
[i
])
1809 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1810 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1811 fold_convert (itype
, fd
->loops
[i
].n1
),
1812 fold_convert (itype
, fd
->loops
[i
].n2
)))
1813 == NULL_TREE
|| !integer_onep (t
)))
1817 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1818 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1819 true, GSI_SAME_STMT
);
1820 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1821 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1822 true, GSI_SAME_STMT
);
1823 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1824 NULL_TREE
, NULL_TREE
);
1825 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1827 expand_omp_regimplify_p
, NULL
, NULL
)
1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1829 expand_omp_regimplify_p
, NULL
, NULL
))
1831 *gsi
= gsi_for_stmt (cond_stmt
);
1832 gimple_regimplify_operands (cond_stmt
, gsi
);
1834 e
= split_block (entry_bb
, cond_stmt
);
1835 basic_block
&zero_iter_bb
1836 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1837 int &first_zero_iter
1838 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1839 if (zero_iter_bb
== NULL
)
1841 gassign
*assign_stmt
;
1842 first_zero_iter
= i
;
1843 zero_iter_bb
= create_empty_bb (entry_bb
);
1844 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1845 *gsi
= gsi_after_labels (zero_iter_bb
);
1846 if (i
< fd
->collapse
)
1847 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1848 build_zero_cst (type
));
1851 counts
[i
] = create_tmp_reg (type
, ".count");
1853 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1855 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1856 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1859 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1860 ne
->probability
= profile_probability::very_unlikely ();
1861 e
->flags
= EDGE_TRUE_VALUE
;
1862 e
->probability
= ne
->probability
.invert ();
1863 if (l2_dom_bb
== NULL
)
1864 l2_dom_bb
= entry_bb
;
1866 *gsi
= gsi_last_nondebug_bb (entry_bb
);
1869 if (POINTER_TYPE_P (itype
))
1870 itype
= signed_type_for (itype
);
1871 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1873 t
= fold_build2 (PLUS_EXPR
, itype
,
1874 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1875 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1876 fold_convert (itype
, fd
->loops
[i
].n2
));
1877 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1878 fold_convert (itype
, fd
->loops
[i
].n1
));
1879 /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1881 generate the same code in the end because generically we
1882 don't know that the values involved must be negative for
1884 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1885 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1886 fold_build1 (NEGATE_EXPR
, itype
, t
),
1887 fold_build1 (NEGATE_EXPR
, itype
,
1888 fold_convert (itype
,
1889 fd
->loops
[i
].step
)));
1891 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1892 fold_convert (itype
, fd
->loops
[i
].step
));
1893 t
= fold_convert (type
, t
);
1894 if (TREE_CODE (t
) == INTEGER_CST
)
1898 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1899 counts
[i
] = create_tmp_reg (type
, ".count");
1900 expand_omp_build_assign (gsi
, counts
[i
], t
);
1902 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1907 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1908 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1913 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1915 V3 = N31 + (T % count3) * STEP3;
1917 V2 = N21 + (T % count2) * STEP2;
1919 V1 = N11 + T * STEP1;
1920 if this loop doesn't have an inner loop construct combined with it.
1921 If it does have an inner loop construct combined with it and the
1922 iteration count isn't known constant, store values from counts array
1923 into its _looptemp_ temporaries instead. */
1926 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1927 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1930 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1932 /* If fd->loop.n2 is constant, then no propagation of the counts
1933 is needed, they are constant. */
1934 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1937 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1938 ? gimple_omp_taskreg_clauses (inner_stmt
)
1939 : gimple_omp_for_clauses (inner_stmt
);
1940 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 isn't supposed to be handled, as the inner loop doesn't
1943 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1944 gcc_assert (innerc
);
1945 for (i
= 0; i
< fd
->collapse
; i
++)
1947 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1948 OMP_CLAUSE__LOOPTEMP_
);
1949 gcc_assert (innerc
);
1952 tree tem
= OMP_CLAUSE_DECL (innerc
);
1953 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1954 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1955 false, GSI_CONTINUE_LINKING
);
1956 gassign
*stmt
= gimple_build_assign (tem
, t
);
1957 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1963 tree type
= TREE_TYPE (fd
->loop
.v
);
1964 tree tem
= create_tmp_reg (type
, ".tem");
1965 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1966 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1968 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1970 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1972 if (POINTER_TYPE_P (vtype
))
1973 itype
= signed_type_for (vtype
);
1975 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1978 t
= fold_convert (itype
, t
);
1979 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1980 fold_convert (itype
, fd
->loops
[i
].step
));
1981 if (POINTER_TYPE_P (vtype
))
1982 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1984 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1985 t
= force_gimple_operand_gsi (gsi
, t
,
1986 DECL_P (fd
->loops
[i
].v
)
1987 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1989 GSI_CONTINUE_LINKING
);
1990 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1991 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1994 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1995 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1996 false, GSI_CONTINUE_LINKING
);
1997 stmt
= gimple_build_assign (tem
, t
);
1998 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2003 /* Helper function for expand_omp_for_*. Generate code like:
2006 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2010 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2017 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
2018 basic_block body_bb
)
2020 basic_block last_bb
, bb
, collapse_bb
= NULL
;
2022 gimple_stmt_iterator gsi
;
2028 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
2030 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
2032 bb
= create_empty_bb (last_bb
);
2033 add_bb_to_loop (bb
, last_bb
->loop_father
);
2034 gsi
= gsi_start_bb (bb
);
2036 if (i
< fd
->collapse
- 1)
2038 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
2039 e
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2041 t
= fd
->loops
[i
+ 1].n1
;
2042 t
= force_gimple_operand_gsi (&gsi
, t
,
2043 DECL_P (fd
->loops
[i
+ 1].v
)
2044 && TREE_ADDRESSABLE (fd
->loops
[i
2047 GSI_CONTINUE_LINKING
);
2048 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
2049 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2054 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
2056 if (POINTER_TYPE_P (vtype
))
2057 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
2059 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
2060 t
= force_gimple_operand_gsi (&gsi
, t
,
2061 DECL_P (fd
->loops
[i
].v
)
2062 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
2063 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2064 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
2065 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2069 t
= fd
->loops
[i
].n2
;
2070 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2071 false, GSI_CONTINUE_LINKING
);
2072 tree v
= fd
->loops
[i
].v
;
2073 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
2074 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
2075 false, GSI_CONTINUE_LINKING
);
2076 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
2077 stmt
= gimple_build_cond_empty (t
);
2078 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
2079 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
2080 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
2083 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
2090 /* Expand #pragma omp ordered depend(source). */
2093 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
2094 tree
*counts
, location_t loc
)
2096 enum built_in_function source_ix
2097 = fd
->iter_type
== long_integer_type_node
2098 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
2100 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
2101 build_fold_addr_expr (counts
[fd
->ordered
]));
2102 gimple_set_location (g
, loc
);
2103 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2106 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2109 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
2110 tree
*counts
, tree c
, location_t loc
)
2112 auto_vec
<tree
, 10> args
;
2113 enum built_in_function sink_ix
2114 = fd
->iter_type
== long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
2116 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
2118 gimple_stmt_iterator gsi2
= *gsi
;
2119 bool warned_step
= false;
2121 for (i
= 0; i
< fd
->ordered
; i
++)
2123 tree step
= NULL_TREE
;
2124 off
= TREE_PURPOSE (deps
);
2125 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2127 step
= TREE_OPERAND (off
, 1);
2128 off
= TREE_OPERAND (off
, 0);
2130 if (!integer_zerop (off
))
2132 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2133 || fd
->loops
[i
].cond_code
== GT_EXPR
);
2134 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
2137 /* Non-simple Fortran DO loops. If step is variable,
2138 we don't know at compile even the direction, so can't
2140 if (TREE_CODE (step
) != INTEGER_CST
)
2142 forward
= tree_int_cst_sgn (step
) != -1;
2144 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2145 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
2146 "lexically later iteration");
2149 deps
= TREE_CHAIN (deps
);
2151 /* If all offsets corresponding to the collapsed loops are zero,
2152 this depend clause can be ignored. FIXME: but there is still a
2153 flush needed. We need to emit one __sync_synchronize () for it
2154 though (perhaps conditionally)? Solve this together with the
2155 conservative dependence folding optimization.
2156 if (i >= fd->collapse)
2159 deps
= OMP_CLAUSE_DECL (c
);
2161 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2162 edge e2
= split_block_after_labels (e1
->dest
);
2164 gsi2
= gsi_after_labels (e1
->dest
);
2165 *gsi
= gsi_last_bb (e1
->src
);
2166 for (i
= 0; i
< fd
->ordered
; i
++)
2168 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2169 tree step
= NULL_TREE
;
2170 tree orig_off
= NULL_TREE
;
2171 if (POINTER_TYPE_P (itype
))
2174 deps
= TREE_CHAIN (deps
);
2175 off
= TREE_PURPOSE (deps
);
2176 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2178 step
= TREE_OPERAND (off
, 1);
2179 off
= TREE_OPERAND (off
, 0);
2180 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2181 && integer_onep (fd
->loops
[i
].step
)
2182 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2184 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2187 off
= fold_convert_loc (loc
, itype
, off
);
2189 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2192 if (integer_zerop (off
))
2193 t
= boolean_true_node
;
2197 tree co
= fold_convert_loc (loc
, itype
, off
);
2198 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2200 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2201 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2202 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2203 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2206 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2207 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2208 fd
->loops
[i
].v
, co
);
2210 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2211 fd
->loops
[i
].v
, co
);
2215 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2216 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2219 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2221 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2222 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2225 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2227 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2228 step
, build_int_cst (TREE_TYPE (step
), 0));
2229 if (TREE_CODE (step
) != INTEGER_CST
)
2231 t1
= unshare_expr (t1
);
2232 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2233 false, GSI_CONTINUE_LINKING
);
2234 t2
= unshare_expr (t2
);
2235 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2236 false, GSI_CONTINUE_LINKING
);
2238 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2241 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2243 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2244 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2247 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2250 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2251 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2254 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2258 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2262 off
= fold_convert_loc (loc
, itype
, off
);
2265 || (fd
->loops
[i
].cond_code
== LT_EXPR
2266 ? !integer_onep (fd
->loops
[i
].step
)
2267 : !integer_minus_onep (fd
->loops
[i
].step
)))
2269 if (step
== NULL_TREE
2270 && TYPE_UNSIGNED (itype
)
2271 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2272 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2273 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2276 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2277 orig_off
? orig_off
: off
, s
);
2278 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2279 build_int_cst (itype
, 0));
2280 if (integer_zerop (t
) && !warned_step
)
2282 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2283 "in the iteration space");
2286 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2290 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2296 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2297 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2298 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2301 /* We have divided off by step already earlier. */;
2302 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2303 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2304 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2307 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2308 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2309 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2310 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2311 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2314 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2316 if (i
< fd
->collapse
- 1)
2318 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2323 off
= unshare_expr (off
);
2324 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2325 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2326 true, GSI_SAME_STMT
);
2329 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2330 gimple_set_location (g
, loc
);
2331 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2333 cond
= unshare_expr (cond
);
2334 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2335 GSI_CONTINUE_LINKING
);
2336 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2337 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2338 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2339 e1
->probability
= e3
->probability
.invert ();
2340 e1
->flags
= EDGE_TRUE_VALUE
;
2341 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2343 *gsi
= gsi_after_labels (e2
->dest
);
2346 /* Expand all #pragma omp ordered depend(source) and
2347 #pragma omp ordered depend(sink:...) constructs in the current
2348 #pragma omp for ordered(n) region. */
2351 expand_omp_ordered_source_sink (struct omp_region
*region
,
2352 struct omp_for_data
*fd
, tree
*counts
,
2353 basic_block cont_bb
)
2355 struct omp_region
*inner
;
2357 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2358 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2359 counts
[i
] = NULL_TREE
;
2360 else if (i
>= fd
->collapse
&& !cont_bb
)
2361 counts
[i
] = build_zero_cst (fd
->iter_type
);
2362 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2363 && integer_onep (fd
->loops
[i
].step
))
2364 counts
[i
] = NULL_TREE
;
2366 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2368 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2369 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2370 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2372 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2373 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2375 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2376 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2377 location_t loc
= gimple_location (ord_stmt
);
2379 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2380 c
; c
= OMP_CLAUSE_CHAIN (c
))
2381 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2384 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2385 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2386 c
; c
= OMP_CLAUSE_CHAIN (c
))
2387 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2388 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2389 gsi_remove (&gsi
, true);
2393 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2397 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2398 basic_block cont_bb
, basic_block body_bb
,
2399 bool ordered_lastprivate
)
2401 if (fd
->ordered
== fd
->collapse
)
2406 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2407 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2409 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2410 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2411 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2412 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2413 size_int (i
- fd
->collapse
+ 1),
2414 NULL_TREE
, NULL_TREE
);
2415 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2420 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2422 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2423 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2424 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2425 fold_convert (type
, fd
->loops
[i
].n1
));
2427 expand_omp_build_assign (&gsi
, counts
[i
],
2428 build_zero_cst (fd
->iter_type
));
2429 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2430 size_int (i
- fd
->collapse
+ 1),
2431 NULL_TREE
, NULL_TREE
);
2432 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2433 if (!gsi_end_p (gsi
))
2436 gsi
= gsi_last_bb (body_bb
);
2437 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2438 basic_block new_body
= e1
->dest
;
2439 if (body_bb
== cont_bb
)
2442 basic_block new_header
;
2443 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2445 gsi
= gsi_last_bb (cont_bb
);
2446 if (POINTER_TYPE_P (type
))
2447 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2448 fold_convert (sizetype
,
2449 fd
->loops
[i
].step
));
2451 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2452 fold_convert (type
, fd
->loops
[i
].step
));
2453 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2456 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2457 build_int_cst (fd
->iter_type
, 1));
2458 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2463 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2464 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2465 t
= fold_convert (fd
->iter_type
, t
);
2466 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2467 true, GSI_SAME_STMT
);
2469 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2470 size_int (i
- fd
->collapse
+ 1),
2471 NULL_TREE
, NULL_TREE
);
2472 expand_omp_build_assign (&gsi
, aref
, t
);
2474 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2475 new_header
= e2
->dest
;
2478 new_header
= cont_bb
;
2479 gsi
= gsi_after_labels (new_header
);
2480 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2481 true, GSI_SAME_STMT
);
2483 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2484 true, NULL_TREE
, true, GSI_SAME_STMT
);
2485 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2486 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2487 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2490 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2491 e3
->flags
= EDGE_FALSE_VALUE
;
2492 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
2493 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2494 e1
->probability
= e3
->probability
.invert ();
2496 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2497 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2501 struct loop
*loop
= alloc_loop ();
2502 loop
->header
= new_header
;
2503 loop
->latch
= e2
->src
;
2504 add_loop (loop
, body_bb
->loop_father
);
2508 /* If there are any lastprivate clauses and it is possible some loops
2509 might have zero iterations, ensure all the decls are initialized,
2510 otherwise we could crash evaluating C++ class iterators with lastprivate
2512 bool need_inits
= false;
2513 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2516 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2517 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2518 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2519 fold_convert (type
, fd
->loops
[i
].n1
));
2523 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2524 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2526 fold_convert (type
, fd
->loops
[i
].n1
),
2527 fold_convert (type
, fd
->loops
[i
].n2
));
2528 if (!integer_onep (this_cond
))
2535 /* A subroutine of expand_omp_for. Generate code for a parallel
2536 loop with any schedule. Given parameters:
2538 for (V = N1; V cond N2; V += STEP) BODY;
2540 where COND is "<" or ">", we generate pseudocode
2542 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2543 if (more) goto L0; else goto L3;
2550 if (V cond iend) goto L1; else goto L2;
2552 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2555 If this is a combined omp parallel loop, instead of the call to
2556 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2557 If this is gimple_omp_for_combined_p loop, then instead of assigning
2558 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2559 inner GIMPLE_OMP_FOR and V += STEP; and
2560 if (V cond iend) goto L1; else goto L2; are removed.
2562 For collapsed loops, given parameters:
2564 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2565 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2566 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2569 we generate pseudocode
2571 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2576 count3 = (adj + N32 - N31) / STEP3;
2577 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2582 count2 = (adj + N22 - N21) / STEP2;
2583 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2588 count1 = (adj + N12 - N11) / STEP1;
2589 count = count1 * count2 * count3;
2594 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2595 if (more) goto L0; else goto L3;
2599 V3 = N31 + (T % count3) * STEP3;
2601 V2 = N21 + (T % count2) * STEP2;
2603 V1 = N11 + T * STEP1;
2608 if (V < iend) goto L10; else goto L2;
2611 if (V3 cond3 N32) goto L1; else goto L11;
2615 if (V2 cond2 N22) goto L1; else goto L12;
2621 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2627 expand_omp_for_generic (struct omp_region
*region
,
2628 struct omp_for_data
*fd
,
2629 enum built_in_function start_fn
,
2630 enum built_in_function next_fn
,
2634 tree type
, istart0
, iend0
, iend
;
2635 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2636 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2637 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2638 gimple_stmt_iterator gsi
;
2639 gassign
*assign_stmt
;
2640 bool in_combined_parallel
= is_combined_parallel (region
);
2641 bool broken_loop
= region
->cont
== NULL
;
2643 tree
*counts
= NULL
;
2645 bool ordered_lastprivate
= false;
2647 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2648 gcc_assert (fd
->iter_type
== long_integer_type_node
2649 || !in_combined_parallel
);
2651 entry_bb
= region
->entry
;
2652 cont_bb
= region
->cont
;
2654 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2655 gcc_assert (broken_loop
2656 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2657 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2658 l1_bb
= single_succ (l0_bb
);
2661 l2_bb
= create_empty_bb (cont_bb
);
2662 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2663 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2665 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2669 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2670 exit_bb
= region
->exit
;
2672 gsi
= gsi_last_nondebug_bb (entry_bb
);
2674 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2676 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2677 OMP_CLAUSE_LASTPRIVATE
))
2678 ordered_lastprivate
= false;
2679 tree reductions
= NULL_TREE
;
2680 tree mem
= NULL_TREE
;
2683 if (fd
->have_reductemp
)
2685 tree c
= omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2686 OMP_CLAUSE__REDUCTEMP_
);
2687 reductions
= OMP_CLAUSE_DECL (c
);
2688 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
2689 gimple
*g
= SSA_NAME_DEF_STMT (reductions
);
2690 reductions
= gimple_assign_rhs1 (g
);
2691 OMP_CLAUSE_DECL (c
) = reductions
;
2692 entry_bb
= gimple_bb (g
);
2693 edge e
= split_block (entry_bb
, g
);
2694 if (region
->entry
== entry_bb
)
2695 region
->entry
= e
->dest
;
2696 gsi
= gsi_last_bb (entry_bb
);
2699 reductions
= null_pointer_node
;
2701 mem
= null_pointer_node
;
2703 if (fd
->collapse
> 1 || fd
->ordered
)
2705 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2706 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2708 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2709 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2710 zero_iter1_bb
, first_zero_iter1
,
2711 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2715 /* Some counts[i] vars might be uninitialized if
2716 some loop has zero iterations. But the body shouldn't
2717 be executed in that case, so just avoid uninit warnings. */
2718 for (i
= first_zero_iter1
;
2719 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2720 if (SSA_VAR_P (counts
[i
]))
2721 TREE_NO_WARNING (counts
[i
]) = 1;
2723 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2725 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2726 gsi
= gsi_last_nondebug_bb (entry_bb
);
2727 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2728 get_immediate_dominator (CDI_DOMINATORS
,
2733 /* Some counts[i] vars might be uninitialized if
2734 some loop has zero iterations. But the body shouldn't
2735 be executed in that case, so just avoid uninit warnings. */
2736 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2737 if (SSA_VAR_P (counts
[i
]))
2738 TREE_NO_WARNING (counts
[i
]) = 1;
2740 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2744 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2746 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2747 gsi
= gsi_last_nondebug_bb (entry_bb
);
2748 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2749 get_immediate_dominator
2750 (CDI_DOMINATORS
, zero_iter2_bb
));
2753 if (fd
->collapse
== 1)
2755 counts
[0] = fd
->loop
.n2
;
2756 fd
->loop
= fd
->loops
[0];
2760 type
= TREE_TYPE (fd
->loop
.v
);
2761 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2762 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2763 TREE_ADDRESSABLE (istart0
) = 1;
2764 TREE_ADDRESSABLE (iend0
) = 1;
2766 /* See if we need to bias by LLONG_MIN. */
2767 if (fd
->iter_type
== long_long_unsigned_type_node
2768 && TREE_CODE (type
) == INTEGER_TYPE
2769 && !TYPE_UNSIGNED (type
)
2770 && fd
->ordered
== 0)
2774 if (fd
->loop
.cond_code
== LT_EXPR
)
2777 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2781 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2784 if (TREE_CODE (n1
) != INTEGER_CST
2785 || TREE_CODE (n2
) != INTEGER_CST
2786 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2787 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2790 gimple_stmt_iterator gsif
= gsi
;
2793 tree arr
= NULL_TREE
;
2794 if (in_combined_parallel
)
2796 gcc_assert (fd
->ordered
== 0);
2797 /* In a combined parallel loop, emit a call to
2798 GOMP_loop_foo_next. */
2799 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2800 build_fold_addr_expr (istart0
),
2801 build_fold_addr_expr (iend0
));
2805 tree t0
, t1
, t2
, t3
, t4
;
2806 /* If this is not a combined parallel loop, emit a call to
2807 GOMP_loop_foo_start in ENTRY_BB. */
2808 t4
= build_fold_addr_expr (iend0
);
2809 t3
= build_fold_addr_expr (istart0
);
2812 t0
= build_int_cst (unsigned_type_node
,
2813 fd
->ordered
- fd
->collapse
+ 1);
2814 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2816 - fd
->collapse
+ 1),
2818 DECL_NAMELESS (arr
) = 1;
2819 TREE_ADDRESSABLE (arr
) = 1;
2820 TREE_STATIC (arr
) = 1;
2821 vec
<constructor_elt
, va_gc
> *v
;
2822 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2825 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2828 if (idx
== 0 && fd
->collapse
> 1)
2831 c
= counts
[idx
+ fd
->collapse
- 1];
2832 tree purpose
= size_int (idx
);
2833 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2834 if (TREE_CODE (c
) != INTEGER_CST
)
2835 TREE_STATIC (arr
) = 0;
2838 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2839 if (!TREE_STATIC (arr
))
2840 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2841 void_type_node
, arr
),
2842 true, NULL_TREE
, true, GSI_SAME_STMT
);
2843 t1
= build_fold_addr_expr (arr
);
2848 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2851 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2854 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2855 OMP_CLAUSE__LOOPTEMP_
);
2856 gcc_assert (innerc
);
2857 t0
= OMP_CLAUSE_DECL (innerc
);
2858 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2859 OMP_CLAUSE__LOOPTEMP_
);
2860 gcc_assert (innerc
);
2861 t1
= OMP_CLAUSE_DECL (innerc
);
2863 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2864 && TYPE_PRECISION (TREE_TYPE (t0
))
2865 != TYPE_PRECISION (fd
->iter_type
))
2867 /* Avoid casting pointers to integer of a different size. */
2868 tree itype
= signed_type_for (type
);
2869 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2870 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2874 t1
= fold_convert (fd
->iter_type
, t1
);
2875 t0
= fold_convert (fd
->iter_type
, t0
);
2879 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2880 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2883 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2887 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2888 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2892 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2893 8, t0
, t1
, sched_arg
, t
, t3
, t4
,
2896 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2897 9, t0
, t1
, t2
, sched_arg
, t
, t3
, t4
,
2900 else if (fd
->ordered
)
2901 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2902 5, t0
, t1
, t
, t3
, t4
);
2904 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2905 6, t0
, t1
, t2
, t
, t3
, t4
);
2907 else if (fd
->ordered
)
2908 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2911 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2912 5, t0
, t1
, t2
, t3
, t4
);
2920 /* The GOMP_loop_ull_*start functions have additional boolean
2921 argument, true for < loops and false for > loops.
2922 In Fortran, the C bool type can be different from
2923 boolean_type_node. */
2924 bfn_decl
= builtin_decl_explicit (start_fn
);
2925 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2926 t5
= build_int_cst (c_bool_type
,
2927 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2930 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2931 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2932 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2934 t
= build_call_expr (bfn_decl
, 10, t5
, t0
, t1
, t2
, sched_arg
,
2935 t
, t3
, t4
, reductions
, mem
);
2937 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2940 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2941 6, t5
, t0
, t1
, t2
, t3
, t4
);
2944 if (TREE_TYPE (t
) != boolean_type_node
)
2945 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2946 t
, build_int_cst (TREE_TYPE (t
), 0));
2947 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2948 true, GSI_SAME_STMT
);
2949 if (arr
&& !TREE_STATIC (arr
))
2951 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2952 TREE_THIS_VOLATILE (clobber
) = 1;
2953 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2956 if (fd
->have_reductemp
)
2958 gimple
*g
= gsi_stmt (gsi
);
2959 gsi_remove (&gsi
, true);
2960 release_ssa_name (gimple_assign_lhs (g
));
2962 entry_bb
= region
->entry
;
2963 gsi
= gsi_last_nondebug_bb (entry_bb
);
2965 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2967 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2969 /* Remove the GIMPLE_OMP_FOR statement. */
2970 gsi_remove (&gsi
, true);
2972 if (gsi_end_p (gsif
))
2973 gsif
= gsi_after_labels (gsi_bb (gsif
));
2976 /* Iteration setup for sequential loop goes in L0_BB. */
2977 tree startvar
= fd
->loop
.v
;
2978 tree endvar
= NULL_TREE
;
2980 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2982 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2983 && gimple_omp_for_kind (inner_stmt
)
2984 == GF_OMP_FOR_KIND_SIMD
);
2985 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2986 OMP_CLAUSE__LOOPTEMP_
);
2987 gcc_assert (innerc
);
2988 startvar
= OMP_CLAUSE_DECL (innerc
);
2989 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2990 OMP_CLAUSE__LOOPTEMP_
);
2991 gcc_assert (innerc
);
2992 endvar
= OMP_CLAUSE_DECL (innerc
);
2995 gsi
= gsi_start_bb (l0_bb
);
2997 if (fd
->ordered
&& fd
->collapse
== 1)
2998 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2999 fold_convert (fd
->iter_type
, fd
->loop
.step
));
3001 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
3002 if (fd
->ordered
&& fd
->collapse
== 1)
3004 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
3005 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
3006 fd
->loop
.n1
, fold_convert (sizetype
, t
));
3009 t
= fold_convert (TREE_TYPE (startvar
), t
);
3010 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
3016 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
3017 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
3018 t
= fold_convert (TREE_TYPE (startvar
), t
);
3020 t
= force_gimple_operand_gsi (&gsi
, t
,
3022 && TREE_ADDRESSABLE (startvar
),
3023 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3024 assign_stmt
= gimple_build_assign (startvar
, t
);
3025 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3028 if (fd
->ordered
&& fd
->collapse
== 1)
3029 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
3030 fold_convert (fd
->iter_type
, fd
->loop
.step
));
3032 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
3033 if (fd
->ordered
&& fd
->collapse
== 1)
3035 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
3036 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
3037 fd
->loop
.n1
, fold_convert (sizetype
, t
));
3040 t
= fold_convert (TREE_TYPE (startvar
), t
);
3041 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
3047 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
3048 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
3049 t
= fold_convert (TREE_TYPE (startvar
), t
);
3051 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3052 false, GSI_CONTINUE_LINKING
);
3055 assign_stmt
= gimple_build_assign (endvar
, iend
);
3056 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3057 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
3058 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
3060 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
3061 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3063 /* Handle linear clause adjustments. */
3064 tree itercnt
= NULL_TREE
;
3065 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3066 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3067 c
; c
= OMP_CLAUSE_CHAIN (c
))
3068 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3069 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3071 tree d
= OMP_CLAUSE_DECL (c
);
3072 bool is_ref
= omp_is_reference (d
);
3073 tree t
= d
, a
, dest
;
3075 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3076 tree type
= TREE_TYPE (t
);
3077 if (POINTER_TYPE_P (type
))
3079 dest
= unshare_expr (t
);
3080 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
3081 expand_omp_build_assign (&gsif
, v
, t
);
3082 if (itercnt
== NULL_TREE
)
3085 tree n1
= fd
->loop
.n1
;
3086 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
3089 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
3091 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
3093 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
3095 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
3096 itercnt
, fd
->loop
.step
);
3097 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3099 GSI_CONTINUE_LINKING
);
3101 a
= fold_build2 (MULT_EXPR
, type
,
3102 fold_convert (type
, itercnt
),
3103 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3104 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3105 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
3106 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3107 false, GSI_CONTINUE_LINKING
);
3108 assign_stmt
= gimple_build_assign (dest
, t
);
3109 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3111 if (fd
->collapse
> 1)
3112 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3116 /* Until now, counts array contained number of iterations or
3117 variable containing it for ith loop. From now on, we need
3118 those counts only for collapsed loops, and only for the 2nd
3119 till the last collapsed one. Move those one element earlier,
3120 we'll use counts[fd->collapse - 1] for the first source/sink
3121 iteration counter and so on and counts[fd->ordered]
3122 as the array holding the current counter values for
3124 if (fd
->collapse
> 1)
3125 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
3129 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
3131 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
3133 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
3134 fold_convert (type
, fd
->loops
[i
].n1
),
3135 fold_convert (type
, fd
->loops
[i
].n2
));
3136 if (!integer_onep (this_cond
))
3139 if (i
< fd
->ordered
)
3142 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
3143 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
3144 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
3145 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
3146 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3147 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
3148 make_edge (cont_bb
, l1_bb
, 0);
3149 l2_bb
= create_empty_bb (cont_bb
);
3150 broken_loop
= false;
3153 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
3154 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
3155 ordered_lastprivate
);
3156 if (counts
[fd
->collapse
- 1])
3158 gcc_assert (fd
->collapse
== 1);
3159 gsi
= gsi_last_bb (l0_bb
);
3160 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
3162 gsi
= gsi_last_bb (cont_bb
);
3163 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
3164 build_int_cst (fd
->iter_type
, 1));
3165 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
3166 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3167 size_zero_node
, NULL_TREE
, NULL_TREE
);
3168 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
3169 t
= counts
[fd
->collapse
- 1];
3171 else if (fd
->collapse
> 1)
3175 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3176 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3177 t
= fold_convert (fd
->iter_type
, t
);
3179 gsi
= gsi_last_bb (l0_bb
);
3180 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3181 size_zero_node
, NULL_TREE
, NULL_TREE
);
3182 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3183 false, GSI_CONTINUE_LINKING
);
3184 expand_omp_build_assign (&gsi
, aref
, t
, true);
3189 /* Code to control the increment and predicate for the sequential
3190 loop goes in the CONT_BB. */
3191 gsi
= gsi_last_nondebug_bb (cont_bb
);
3192 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3193 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3194 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3195 vback
= gimple_omp_continue_control_def (cont_stmt
);
3197 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3199 if (POINTER_TYPE_P (type
))
3200 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
3202 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
3203 t
= force_gimple_operand_gsi (&gsi
, t
,
3205 && TREE_ADDRESSABLE (vback
),
3206 NULL_TREE
, true, GSI_SAME_STMT
);
3207 assign_stmt
= gimple_build_assign (vback
, t
);
3208 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3210 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3212 if (fd
->collapse
> 1)
3216 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3217 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3218 t
= fold_convert (fd
->iter_type
, t
);
3220 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3221 counts
[fd
->ordered
], size_zero_node
,
3222 NULL_TREE
, NULL_TREE
);
3223 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3224 true, GSI_SAME_STMT
);
3225 expand_omp_build_assign (&gsi
, aref
, t
);
3228 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3229 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3231 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3232 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3235 /* Remove GIMPLE_OMP_CONTINUE. */
3236 gsi_remove (&gsi
, true);
3238 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3239 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3241 /* Emit code to get the next parallel iteration in L2_BB. */
3242 gsi
= gsi_start_bb (l2_bb
);
3244 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3245 build_fold_addr_expr (istart0
),
3246 build_fold_addr_expr (iend0
));
3247 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3248 false, GSI_CONTINUE_LINKING
);
3249 if (TREE_TYPE (t
) != boolean_type_node
)
3250 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3251 t
, build_int_cst (TREE_TYPE (t
), 0));
3252 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3253 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3256 /* Add the loop cleanup function. */
3257 gsi
= gsi_last_nondebug_bb (exit_bb
);
3258 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3259 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3260 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3261 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3263 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3264 gcall
*call_stmt
= gimple_build_call (t
, 0);
3267 tree arr
= counts
[fd
->ordered
];
3268 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3269 TREE_THIS_VOLATILE (clobber
) = 1;
3270 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3273 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3275 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3276 if (fd
->have_reductemp
)
3278 gimple
*g
= gimple_build_assign (reductions
, NOP_EXPR
,
3279 gimple_call_lhs (call_stmt
));
3280 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
3283 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3284 gsi_remove (&gsi
, true);
3286 /* Connect the new blocks. */
3287 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3288 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3294 e
= find_edge (cont_bb
, l3_bb
);
3295 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3297 phis
= phi_nodes (l3_bb
);
3298 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3300 gimple
*phi
= gsi_stmt (gsi
);
3301 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3302 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3306 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3307 e
= find_edge (cont_bb
, l1_bb
);
3310 e
= BRANCH_EDGE (cont_bb
);
3311 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3313 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3318 else if (fd
->collapse
> 1)
3321 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3324 e
->flags
= EDGE_TRUE_VALUE
;
3327 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3328 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
3332 e
= find_edge (cont_bb
, l2_bb
);
3333 e
->flags
= EDGE_FALLTHRU
;
3335 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3337 if (gimple_in_ssa_p (cfun
))
3339 /* Add phis to the outer loop that connect to the phis in the inner,
3340 original loop, and move the loop entry value of the inner phi to
3341 the loop entry value of the outer phi. */
3343 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3347 gphi
*exit_phi
= psi
.phi ();
3349 if (virtual_operand_p (gimple_phi_result (exit_phi
)))
3352 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3353 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3355 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3356 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3358 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3360 tree t
= gimple_phi_result (exit_phi
);
3361 tree new_res
= copy_ssa_name (t
, NULL
);
3362 nphi
= create_phi_node (new_res
, l0_bb
);
3364 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3365 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3366 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3367 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3368 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3370 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3371 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3373 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3377 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3378 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3379 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3380 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3381 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3382 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3383 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3384 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3386 /* We enter expand_omp_for_generic with a loop. This original loop may
3387 have its own loop struct, or it may be part of an outer loop struct
3388 (which may be the fake loop). */
3389 struct loop
*outer_loop
= entry_bb
->loop_father
;
3390 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3392 add_bb_to_loop (l2_bb
, outer_loop
);
3394 /* We've added a new loop around the original loop. Allocate the
3395 corresponding loop struct. */
3396 struct loop
*new_loop
= alloc_loop ();
3397 new_loop
->header
= l0_bb
;
3398 new_loop
->latch
= l2_bb
;
3399 add_loop (new_loop
, outer_loop
);
3401 /* Allocate a loop structure for the original loop unless we already
3403 if (!orig_loop_has_loop_struct
3404 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3406 struct loop
*orig_loop
= alloc_loop ();
3407 orig_loop
->header
= l1_bb
;
3408 /* The loop may have multiple latches. */
3409 add_loop (orig_loop
, new_loop
);
3414 /* A subroutine of expand_omp_for. Generate code for a parallel
3415 loop with static schedule and no specified chunk size. Given
3418 for (V = N1; V cond N2; V += STEP) BODY;
3420 where COND is "<" or ">", we generate pseudocode
3422 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3427 if ((__typeof (V)) -1 > 0 && cond is >)
3428 n = -(adj + N2 - N1) / -STEP;
3430 n = (adj + N2 - N1) / STEP;
3433 if (threadid < tt) goto L3; else goto L4;
3438 s0 = q * threadid + tt;
3441 if (s0 >= e0) goto L2; else goto L0;
3447 if (V cond e) goto L1;
3452 expand_omp_for_static_nochunk (struct omp_region
*region
,
3453 struct omp_for_data
*fd
,
3456 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3457 tree type
, itype
, vmain
, vback
;
3458 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3459 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3461 gimple_stmt_iterator gsi
;
3463 bool broken_loop
= region
->cont
== NULL
;
3464 tree
*counts
= NULL
;
3466 tree reductions
= NULL_TREE
;
3468 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3469 if (POINTER_TYPE_P (type
))
3470 itype
= signed_type_for (type
);
3472 entry_bb
= region
->entry
;
3473 cont_bb
= region
->cont
;
3474 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3475 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3476 gcc_assert (broken_loop
3477 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3478 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3479 body_bb
= single_succ (seq_start_bb
);
3482 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3483 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3484 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3486 exit_bb
= region
->exit
;
3488 /* Iteration space partitioning goes in ENTRY_BB. */
3489 gsi
= gsi_last_nondebug_bb (entry_bb
);
3490 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3492 if (fd
->collapse
> 1)
3494 int first_zero_iter
= -1, dummy
= -1;
3495 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3497 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3498 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3499 fin_bb
, first_zero_iter
,
3500 dummy_bb
, dummy
, l2_dom_bb
);
3503 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3504 t
= integer_one_node
;
3506 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3507 fold_convert (type
, fd
->loop
.n1
),
3508 fold_convert (type
, fd
->loop
.n2
));
3509 if (fd
->collapse
== 1
3510 && TYPE_UNSIGNED (type
)
3511 && (t
== NULL_TREE
|| !integer_onep (t
)))
3513 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3514 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3515 true, GSI_SAME_STMT
);
3516 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3517 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3518 true, GSI_SAME_STMT
);
3519 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3520 NULL_TREE
, NULL_TREE
);
3521 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3522 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3523 expand_omp_regimplify_p
, NULL
, NULL
)
3524 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3525 expand_omp_regimplify_p
, NULL
, NULL
))
3527 gsi
= gsi_for_stmt (cond_stmt
);
3528 gimple_regimplify_operands (cond_stmt
, &gsi
);
3530 ep
= split_block (entry_bb
, cond_stmt
);
3531 ep
->flags
= EDGE_TRUE_VALUE
;
3532 entry_bb
= ep
->dest
;
3533 ep
->probability
= profile_probability::very_likely ();
3534 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3535 ep
->probability
= profile_probability::very_unlikely ();
3536 if (gimple_in_ssa_p (cfun
))
3538 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3539 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3540 !gsi_end_p (gpi
); gsi_next (&gpi
))
3542 gphi
*phi
= gpi
.phi ();
3543 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3544 ep
, UNKNOWN_LOCATION
);
3547 gsi
= gsi_last_bb (entry_bb
);
3550 if (fd
->have_reductemp
)
3552 tree t1
= build_int_cst (long_integer_type_node
, 0);
3553 tree t2
= build_int_cst (long_integer_type_node
, 1);
3554 tree t3
= build_int_cstu (long_integer_type_node
,
3555 (HOST_WIDE_INT_1U
<< 31) + 1);
3556 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
3557 clauses
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
3558 reductions
= OMP_CLAUSE_DECL (clauses
);
3559 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
3560 gimple
*g
= SSA_NAME_DEF_STMT (reductions
);
3561 reductions
= gimple_assign_rhs1 (g
);
3562 OMP_CLAUSE_DECL (clauses
) = reductions
;
3563 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
3565 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START
),
3566 9, t1
, t2
, t2
, t3
, t1
, null_pointer_node
,
3567 null_pointer_node
, reductions
, null_pointer_node
);
3568 force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
3569 true, GSI_SAME_STMT
);
3570 gsi_remove (&gsi2
, true);
3571 release_ssa_name (gimple_assign_lhs (g
));
3573 switch (gimple_omp_for_kind (fd
->for_stmt
))
3575 case GF_OMP_FOR_KIND_FOR
:
3576 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3577 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3579 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3580 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3581 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3586 nthreads
= build_call_expr (nthreads
, 0);
3587 nthreads
= fold_convert (itype
, nthreads
);
3588 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3589 true, GSI_SAME_STMT
);
3590 threadid
= build_call_expr (threadid
, 0);
3591 threadid
= fold_convert (itype
, threadid
);
3592 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3593 true, GSI_SAME_STMT
);
3597 step
= fd
->loop
.step
;
3598 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3600 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3601 OMP_CLAUSE__LOOPTEMP_
);
3602 gcc_assert (innerc
);
3603 n1
= OMP_CLAUSE_DECL (innerc
);
3604 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3605 OMP_CLAUSE__LOOPTEMP_
);
3606 gcc_assert (innerc
);
3607 n2
= OMP_CLAUSE_DECL (innerc
);
3609 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3610 true, NULL_TREE
, true, GSI_SAME_STMT
);
3611 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3612 true, NULL_TREE
, true, GSI_SAME_STMT
);
3613 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3614 true, NULL_TREE
, true, GSI_SAME_STMT
);
3616 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3617 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3618 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3619 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3620 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3621 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3622 fold_build1 (NEGATE_EXPR
, itype
, t
),
3623 fold_build1 (NEGATE_EXPR
, itype
, step
));
3625 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3626 t
= fold_convert (itype
, t
);
3627 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3629 q
= create_tmp_reg (itype
, "q");
3630 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3631 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3632 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3634 tt
= create_tmp_reg (itype
, "tt");
3635 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3636 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3637 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3639 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3640 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3641 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3643 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3644 gsi
= gsi_last_nondebug_bb (second_bb
);
3645 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3647 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3649 gassign
*assign_stmt
3650 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3651 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3653 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3654 gsi
= gsi_last_nondebug_bb (third_bb
);
3655 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3657 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3658 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3659 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3661 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3662 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3664 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3665 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3667 /* Remove the GIMPLE_OMP_FOR statement. */
3668 gsi_remove (&gsi
, true);
3670 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3671 gsi
= gsi_start_bb (seq_start_bb
);
3673 tree startvar
= fd
->loop
.v
;
3674 tree endvar
= NULL_TREE
;
3676 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3678 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3679 ? gimple_omp_parallel_clauses (inner_stmt
)
3680 : gimple_omp_for_clauses (inner_stmt
);
3681 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3682 gcc_assert (innerc
);
3683 startvar
= OMP_CLAUSE_DECL (innerc
);
3684 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3685 OMP_CLAUSE__LOOPTEMP_
);
3686 gcc_assert (innerc
);
3687 endvar
= OMP_CLAUSE_DECL (innerc
);
3688 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3689 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3692 for (i
= 1; i
< fd
->collapse
; i
++)
3694 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3695 OMP_CLAUSE__LOOPTEMP_
);
3696 gcc_assert (innerc
);
3698 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3699 OMP_CLAUSE__LOOPTEMP_
);
3702 /* If needed (distribute parallel for with lastprivate),
3703 propagate down the total number of iterations. */
3704 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3706 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3707 GSI_CONTINUE_LINKING
);
3708 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3709 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3713 t
= fold_convert (itype
, s0
);
3714 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3715 if (POINTER_TYPE_P (type
))
3717 t
= fold_build_pointer_plus (n1
, t
);
3718 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
3719 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
3720 t
= fold_convert (signed_type_for (type
), t
);
3723 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3724 t
= fold_convert (TREE_TYPE (startvar
), t
);
3725 t
= force_gimple_operand_gsi (&gsi
, t
,
3727 && TREE_ADDRESSABLE (startvar
),
3728 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3729 assign_stmt
= gimple_build_assign (startvar
, t
);
3730 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3732 t
= fold_convert (itype
, e0
);
3733 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3734 if (POINTER_TYPE_P (type
))
3736 t
= fold_build_pointer_plus (n1
, t
);
3737 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
3738 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
3739 t
= fold_convert (signed_type_for (type
), t
);
3742 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3743 t
= fold_convert (TREE_TYPE (startvar
), t
);
3744 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3745 false, GSI_CONTINUE_LINKING
);
3748 assign_stmt
= gimple_build_assign (endvar
, e
);
3749 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3750 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3751 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3753 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3754 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3756 /* Handle linear clause adjustments. */
3757 tree itercnt
= NULL_TREE
;
3758 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3759 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3760 c
; c
= OMP_CLAUSE_CHAIN (c
))
3761 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3762 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3764 tree d
= OMP_CLAUSE_DECL (c
);
3765 bool is_ref
= omp_is_reference (d
);
3766 tree t
= d
, a
, dest
;
3768 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3769 if (itercnt
== NULL_TREE
)
3771 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3773 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3774 fold_convert (itype
, n1
),
3775 fold_convert (itype
, fd
->loop
.n1
));
3776 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3777 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3778 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3780 GSI_CONTINUE_LINKING
);
3785 tree type
= TREE_TYPE (t
);
3786 if (POINTER_TYPE_P (type
))
3788 a
= fold_build2 (MULT_EXPR
, type
,
3789 fold_convert (type
, itercnt
),
3790 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3791 dest
= unshare_expr (t
);
3792 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3793 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3794 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3795 false, GSI_CONTINUE_LINKING
);
3796 assign_stmt
= gimple_build_assign (dest
, t
);
3797 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3799 if (fd
->collapse
> 1)
3800 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3804 /* The code controlling the sequential loop replaces the
3805 GIMPLE_OMP_CONTINUE. */
3806 gsi
= gsi_last_nondebug_bb (cont_bb
);
3807 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3808 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3809 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3810 vback
= gimple_omp_continue_control_def (cont_stmt
);
3812 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3814 if (POINTER_TYPE_P (type
))
3815 t
= fold_build_pointer_plus (vmain
, step
);
3817 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3818 t
= force_gimple_operand_gsi (&gsi
, t
,
3820 && TREE_ADDRESSABLE (vback
),
3821 NULL_TREE
, true, GSI_SAME_STMT
);
3822 assign_stmt
= gimple_build_assign (vback
, t
);
3823 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3825 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3826 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3828 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3831 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3832 gsi_remove (&gsi
, true);
3834 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3835 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3838 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3839 gsi
= gsi_last_nondebug_bb (exit_bb
);
3840 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3842 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3843 if (fd
->have_reductemp
)
3847 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3849 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3850 gcall
*g
= gimple_build_call (fn
, 0);
3853 gimple_call_set_lhs (g
, t
);
3854 gsi_insert_after (&gsi
, gimple_build_assign (reductions
,
3858 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
3861 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3863 gsi_remove (&gsi
, true);
3865 /* Connect all the blocks. */
3866 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3867 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
3868 ep
= find_edge (entry_bb
, second_bb
);
3869 ep
->flags
= EDGE_TRUE_VALUE
;
3870 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
3871 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3872 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3876 ep
= find_edge (cont_bb
, body_bb
);
3879 ep
= BRANCH_EDGE (cont_bb
);
3880 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3882 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3887 else if (fd
->collapse
> 1)
3890 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3893 ep
->flags
= EDGE_TRUE_VALUE
;
3894 find_edge (cont_bb
, fin_bb
)->flags
3895 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3898 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3899 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3900 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3902 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3903 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3904 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3905 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3907 struct loop
*loop
= body_bb
->loop_father
;
3908 if (loop
!= entry_bb
->loop_father
)
3910 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3911 gcc_assert (broken_loop
3912 || loop
->latch
== region
->cont
3913 || single_pred (loop
->latch
) == region
->cont
);
3917 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3919 loop
= alloc_loop ();
3920 loop
->header
= body_bb
;
3921 if (collapse_bb
== NULL
)
3922 loop
->latch
= cont_bb
;
3923 add_loop (loop
, body_bb
->loop_father
);
3927 /* Return phi in E->DEST with ARG on edge E. */
3930 find_phi_with_arg_on_edge (tree arg
, edge e
)
3932 basic_block bb
= e
->dest
;
3934 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3938 gphi
*phi
= gpi
.phi ();
3939 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3946 /* A subroutine of expand_omp_for. Generate code for a parallel
3947 loop with static schedule and a specified chunk size. Given
3950 for (V = N1; V cond N2; V += STEP) BODY;
3952 where COND is "<" or ">", we generate pseudocode
3954 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3959 if ((__typeof (V)) -1 > 0 && cond is >)
3960 n = -(adj + N2 - N1) / -STEP;
3962 n = (adj + N2 - N1) / STEP;
3964 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3965 here so that V is defined
3966 if the loop is not entered
3968 s0 = (trip * nthreads + threadid) * CHUNK;
3969 e0 = min (s0 + CHUNK, n);
3970 if (s0 < n) goto L1; else goto L4;
3977 if (V cond e) goto L2; else goto L3;
3985 expand_omp_for_static_chunk (struct omp_region
*region
,
3986 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3988 tree n
, s0
, e0
, e
, t
;
3989 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3990 tree type
, itype
, vmain
, vback
, vextra
;
3991 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3992 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3993 gimple_stmt_iterator gsi
;
3995 bool broken_loop
= region
->cont
== NULL
;
3996 tree
*counts
= NULL
;
3998 tree reductions
= NULL_TREE
;
4000 itype
= type
= TREE_TYPE (fd
->loop
.v
);
4001 if (POINTER_TYPE_P (type
))
4002 itype
= signed_type_for (type
);
4004 entry_bb
= region
->entry
;
4005 se
= split_block (entry_bb
, last_stmt (entry_bb
));
4007 iter_part_bb
= se
->dest
;
4008 cont_bb
= region
->cont
;
4009 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
4010 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
4011 gcc_assert (broken_loop
4012 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
4013 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
4014 body_bb
= single_succ (seq_start_bb
);
4017 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
4018 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
4019 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4020 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
4022 exit_bb
= region
->exit
;
4024 /* Trip and adjustment setup goes in ENTRY_BB. */
4025 gsi
= gsi_last_nondebug_bb (entry_bb
);
4026 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4028 if (fd
->collapse
> 1)
4030 int first_zero_iter
= -1, dummy
= -1;
4031 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
4033 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4034 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4035 fin_bb
, first_zero_iter
,
4036 dummy_bb
, dummy
, l2_dom_bb
);
4039 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4040 t
= integer_one_node
;
4042 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
4043 fold_convert (type
, fd
->loop
.n1
),
4044 fold_convert (type
, fd
->loop
.n2
));
4045 if (fd
->collapse
== 1
4046 && TYPE_UNSIGNED (type
)
4047 && (t
== NULL_TREE
|| !integer_onep (t
)))
4049 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
4050 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
4051 true, GSI_SAME_STMT
);
4052 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
4053 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
4054 true, GSI_SAME_STMT
);
4055 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
4056 NULL_TREE
, NULL_TREE
);
4057 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
4058 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
4059 expand_omp_regimplify_p
, NULL
, NULL
)
4060 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
4061 expand_omp_regimplify_p
, NULL
, NULL
))
4063 gsi
= gsi_for_stmt (cond_stmt
);
4064 gimple_regimplify_operands (cond_stmt
, &gsi
);
4066 se
= split_block (entry_bb
, cond_stmt
);
4067 se
->flags
= EDGE_TRUE_VALUE
;
4068 entry_bb
= se
->dest
;
4069 se
->probability
= profile_probability::very_likely ();
4070 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
4071 se
->probability
= profile_probability::very_unlikely ();
4072 if (gimple_in_ssa_p (cfun
))
4074 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
4075 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
4076 !gsi_end_p (gpi
); gsi_next (&gpi
))
4078 gphi
*phi
= gpi
.phi ();
4079 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
4080 se
, UNKNOWN_LOCATION
);
4083 gsi
= gsi_last_bb (entry_bb
);
4086 if (fd
->have_reductemp
)
4088 tree t1
= build_int_cst (long_integer_type_node
, 0);
4089 tree t2
= build_int_cst (long_integer_type_node
, 1);
4090 tree t3
= build_int_cstu (long_integer_type_node
,
4091 (HOST_WIDE_INT_1U
<< 31) + 1);
4092 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
4093 clauses
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
4094 reductions
= OMP_CLAUSE_DECL (clauses
);
4095 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
4096 gimple
*g
= SSA_NAME_DEF_STMT (reductions
);
4097 reductions
= gimple_assign_rhs1 (g
);
4098 OMP_CLAUSE_DECL (clauses
) = reductions
;
4099 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4101 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START
),
4102 9, t1
, t2
, t2
, t3
, t1
, null_pointer_node
,
4103 null_pointer_node
, reductions
, null_pointer_node
);
4104 force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
4105 true, GSI_SAME_STMT
);
4106 gsi_remove (&gsi2
, true);
4107 release_ssa_name (gimple_assign_lhs (g
));
4109 switch (gimple_omp_for_kind (fd
->for_stmt
))
4111 case GF_OMP_FOR_KIND_FOR
:
4112 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
4113 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
4115 case GF_OMP_FOR_KIND_DISTRIBUTE
:
4116 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
4117 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
4122 nthreads
= build_call_expr (nthreads
, 0);
4123 nthreads
= fold_convert (itype
, nthreads
);
4124 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
4125 true, GSI_SAME_STMT
);
4126 threadid
= build_call_expr (threadid
, 0);
4127 threadid
= fold_convert (itype
, threadid
);
4128 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
4129 true, GSI_SAME_STMT
);
4133 step
= fd
->loop
.step
;
4134 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4136 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4137 OMP_CLAUSE__LOOPTEMP_
);
4138 gcc_assert (innerc
);
4139 n1
= OMP_CLAUSE_DECL (innerc
);
4140 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4141 OMP_CLAUSE__LOOPTEMP_
);
4142 gcc_assert (innerc
);
4143 n2
= OMP_CLAUSE_DECL (innerc
);
4145 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
4146 true, NULL_TREE
, true, GSI_SAME_STMT
);
4147 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
4148 true, NULL_TREE
, true, GSI_SAME_STMT
);
4149 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
4150 true, NULL_TREE
, true, GSI_SAME_STMT
);
4151 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
4152 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
4154 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
4157 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
4158 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
4159 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
4160 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
4161 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
4162 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
4163 fold_build1 (NEGATE_EXPR
, itype
, t
),
4164 fold_build1 (NEGATE_EXPR
, itype
, step
));
4166 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
4167 t
= fold_convert (itype
, t
);
4168 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4169 true, GSI_SAME_STMT
);
4171 trip_var
= create_tmp_reg (itype
, ".trip");
4172 if (gimple_in_ssa_p (cfun
))
4174 trip_init
= make_ssa_name (trip_var
);
4175 trip_main
= make_ssa_name (trip_var
);
4176 trip_back
= make_ssa_name (trip_var
);
4180 trip_init
= trip_var
;
4181 trip_main
= trip_var
;
4182 trip_back
= trip_var
;
4185 gassign
*assign_stmt
4186 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
4187 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4189 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
4190 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4191 if (POINTER_TYPE_P (type
))
4192 t
= fold_build_pointer_plus (n1
, t
);
4194 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4195 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4196 true, GSI_SAME_STMT
);
4198 /* Remove the GIMPLE_OMP_FOR. */
4199 gsi_remove (&gsi
, true);
4201 gimple_stmt_iterator gsif
= gsi
;
4203 /* Iteration space partitioning goes in ITER_PART_BB. */
4204 gsi
= gsi_last_bb (iter_part_bb
);
4206 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
4207 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
4208 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
4209 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4210 false, GSI_CONTINUE_LINKING
);
4212 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
4213 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
4214 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4215 false, GSI_CONTINUE_LINKING
);
4217 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
4218 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
4220 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4221 gsi
= gsi_start_bb (seq_start_bb
);
4223 tree startvar
= fd
->loop
.v
;
4224 tree endvar
= NULL_TREE
;
4226 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4228 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
4229 ? gimple_omp_parallel_clauses (inner_stmt
)
4230 : gimple_omp_for_clauses (inner_stmt
);
4231 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
4232 gcc_assert (innerc
);
4233 startvar
= OMP_CLAUSE_DECL (innerc
);
4234 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4235 OMP_CLAUSE__LOOPTEMP_
);
4236 gcc_assert (innerc
);
4237 endvar
= OMP_CLAUSE_DECL (innerc
);
4238 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
4239 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
4242 for (i
= 1; i
< fd
->collapse
; i
++)
4244 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4245 OMP_CLAUSE__LOOPTEMP_
);
4246 gcc_assert (innerc
);
4248 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4249 OMP_CLAUSE__LOOPTEMP_
);
4252 /* If needed (distribute parallel for with lastprivate),
4253 propagate down the total number of iterations. */
4254 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
4256 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
4257 GSI_CONTINUE_LINKING
);
4258 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4259 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4264 t
= fold_convert (itype
, s0
);
4265 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4266 if (POINTER_TYPE_P (type
))
4268 t
= fold_build_pointer_plus (n1
, t
);
4269 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
4270 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
4271 t
= fold_convert (signed_type_for (type
), t
);
4274 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4275 t
= fold_convert (TREE_TYPE (startvar
), t
);
4276 t
= force_gimple_operand_gsi (&gsi
, t
,
4278 && TREE_ADDRESSABLE (startvar
),
4279 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4280 assign_stmt
= gimple_build_assign (startvar
, t
);
4281 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4283 t
= fold_convert (itype
, e0
);
4284 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
4285 if (POINTER_TYPE_P (type
))
4287 t
= fold_build_pointer_plus (n1
, t
);
4288 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
4289 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
4290 t
= fold_convert (signed_type_for (type
), t
);
4293 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
4294 t
= fold_convert (TREE_TYPE (startvar
), t
);
4295 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4296 false, GSI_CONTINUE_LINKING
);
4299 assign_stmt
= gimple_build_assign (endvar
, e
);
4300 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4301 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
4302 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4304 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4305 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4307 /* Handle linear clause adjustments. */
4308 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4309 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4310 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4311 c
; c
= OMP_CLAUSE_CHAIN (c
))
4312 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4313 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4315 tree d
= OMP_CLAUSE_DECL (c
);
4316 bool is_ref
= omp_is_reference (d
);
4317 tree t
= d
, a
, dest
;
4319 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4320 tree type
= TREE_TYPE (t
);
4321 if (POINTER_TYPE_P (type
))
4323 dest
= unshare_expr (t
);
4324 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4325 expand_omp_build_assign (&gsif
, v
, t
);
4326 if (itercnt
== NULL_TREE
)
4328 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4331 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4332 fold_convert (itype
, fd
->loop
.n1
));
4333 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4336 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4339 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4340 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4342 GSI_CONTINUE_LINKING
);
4347 a
= fold_build2 (MULT_EXPR
, type
,
4348 fold_convert (type
, itercnt
),
4349 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4350 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4351 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4352 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4353 false, GSI_CONTINUE_LINKING
);
4354 assign_stmt
= gimple_build_assign (dest
, t
);
4355 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4357 if (fd
->collapse
> 1)
4358 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4362 /* The code controlling the sequential loop goes in CONT_BB,
4363 replacing the GIMPLE_OMP_CONTINUE. */
4364 gsi
= gsi_last_nondebug_bb (cont_bb
);
4365 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4366 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4367 vback
= gimple_omp_continue_control_def (cont_stmt
);
4369 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4371 if (POINTER_TYPE_P (type
))
4372 t
= fold_build_pointer_plus (vmain
, step
);
4374 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4375 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4376 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4377 true, GSI_SAME_STMT
);
4378 assign_stmt
= gimple_build_assign (vback
, t
);
4379 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4381 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4382 t
= build2 (EQ_EXPR
, boolean_type_node
,
4383 build_int_cst (itype
, 0),
4384 build_int_cst (itype
, 1));
4386 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4387 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4389 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4392 /* Remove GIMPLE_OMP_CONTINUE. */
4393 gsi_remove (&gsi
, true);
4395 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4396 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4398 /* Trip update code goes into TRIP_UPDATE_BB. */
4399 gsi
= gsi_start_bb (trip_update_bb
);
4401 t
= build_int_cst (itype
, 1);
4402 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4403 assign_stmt
= gimple_build_assign (trip_back
, t
);
4404 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4407 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4408 gsi
= gsi_last_nondebug_bb (exit_bb
);
4409 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4411 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4412 if (fd
->have_reductemp
)
4416 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
4418 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
4419 gcall
*g
= gimple_build_call (fn
, 0);
4422 gimple_call_set_lhs (g
, t
);
4423 gsi_insert_after (&gsi
, gimple_build_assign (reductions
,
4427 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
4430 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4432 gsi_remove (&gsi
, true);
4434 /* Connect the new blocks. */
4435 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4436 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4440 se
= find_edge (cont_bb
, body_bb
);
4443 se
= BRANCH_EDGE (cont_bb
);
4444 gcc_assert (single_succ (se
->dest
) == body_bb
);
4446 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4451 else if (fd
->collapse
> 1)
4454 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4457 se
->flags
= EDGE_TRUE_VALUE
;
4458 find_edge (cont_bb
, trip_update_bb
)->flags
4459 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4461 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
4465 if (gimple_in_ssa_p (cfun
))
4473 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4475 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4476 remove arguments of the phi nodes in fin_bb. We need to create
4477 appropriate phi nodes in iter_part_bb instead. */
4478 se
= find_edge (iter_part_bb
, fin_bb
);
4479 re
= single_succ_edge (trip_update_bb
);
4480 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4481 ene
= single_succ_edge (entry_bb
);
4483 psi
= gsi_start_phis (fin_bb
);
4484 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4485 gsi_next (&psi
), ++i
)
4491 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
4492 redirect_edge_var_map_def (vm
), 0))
4495 t
= gimple_phi_result (phi
);
4496 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4498 if (!single_pred_p (fin_bb
))
4499 t
= copy_ssa_name (t
, phi
);
4501 nphi
= create_phi_node (t
, iter_part_bb
);
4503 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4504 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4506 /* A special case -- fd->loop.v is not yet computed in
4507 iter_part_bb, we need to use vextra instead. */
4508 if (t
== fd
->loop
.v
)
4510 add_phi_arg (nphi
, t
, ene
, locus
);
4511 locus
= redirect_edge_var_map_location (vm
);
4512 tree back_arg
= redirect_edge_var_map_def (vm
);
4513 add_phi_arg (nphi
, back_arg
, re
, locus
);
4514 edge ce
= find_edge (cont_bb
, body_bb
);
4517 ce
= BRANCH_EDGE (cont_bb
);
4518 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4519 ce
= single_succ_edge (ce
->dest
);
4521 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4522 gcc_assert (inner_loop_phi
!= NULL
);
4523 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4524 find_edge (seq_start_bb
, body_bb
), locus
);
4526 if (!single_pred_p (fin_bb
))
4527 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4529 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4530 redirect_edge_var_map_clear (re
);
4531 if (single_pred_p (fin_bb
))
4534 psi
= gsi_start_phis (fin_bb
);
4535 if (gsi_end_p (psi
))
4537 remove_phi_node (&psi
, false);
4540 /* Make phi node for trip. */
4541 phi
= create_phi_node (trip_main
, iter_part_bb
);
4542 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4544 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4549 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4550 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4551 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4552 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4553 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4554 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4555 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4556 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4557 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4561 struct loop
*loop
= body_bb
->loop_father
;
4562 struct loop
*trip_loop
= alloc_loop ();
4563 trip_loop
->header
= iter_part_bb
;
4564 trip_loop
->latch
= trip_update_bb
;
4565 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4567 if (loop
!= entry_bb
->loop_father
)
4569 gcc_assert (loop
->header
== body_bb
);
4570 gcc_assert (loop
->latch
== region
->cont
4571 || single_pred (loop
->latch
) == region
->cont
);
4572 trip_loop
->inner
= loop
;
4576 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4578 loop
= alloc_loop ();
4579 loop
->header
= body_bb
;
4580 if (collapse_bb
== NULL
)
4581 loop
->latch
= cont_bb
;
4582 add_loop (loop
, trip_loop
);
4587 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4588 loop. Given parameters:
4590 for (V = N1; V cond N2; V += STEP) BODY;
4592 where COND is "<" or ">", we generate pseudocode
4600 if (V cond N2) goto L0; else goto L2;
4603 For collapsed loops, given parameters:
4605 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4606 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4607 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4610 we generate pseudocode
4616 count3 = (adj + N32 - N31) / STEP3;
4621 count2 = (adj + N22 - N21) / STEP2;
4626 count1 = (adj + N12 - N11) / STEP1;
4627 count = count1 * count2 * count3;
4637 V2 += (V3 cond3 N32) ? 0 : STEP2;
4638 V3 = (V3 cond3 N32) ? V3 : N31;
4639 V1 += (V2 cond2 N22) ? 0 : STEP1;
4640 V2 = (V2 cond2 N22) ? V2 : N21;
4642 if (V < count) goto L0; else goto L2;
4648 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4651 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4652 gimple_stmt_iterator gsi
;
4655 bool broken_loop
= region
->cont
== NULL
;
4657 tree
*counts
= NULL
;
4659 int safelen_int
= INT_MAX
;
4660 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4661 OMP_CLAUSE_SAFELEN
);
4662 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4663 OMP_CLAUSE__SIMDUID_
);
4669 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4670 if (!poly_int_tree_p (safelen
, &val
))
4673 safelen_int
= MIN (constant_lower_bound (val
), INT_MAX
);
4674 if (safelen_int
== 1)
4677 type
= TREE_TYPE (fd
->loop
.v
);
4678 entry_bb
= region
->entry
;
4679 cont_bb
= region
->cont
;
4680 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4681 gcc_assert (broken_loop
4682 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4683 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4686 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4687 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4688 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4689 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4693 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4694 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4695 l2_bb
= single_succ (l1_bb
);
4697 exit_bb
= region
->exit
;
4700 gsi
= gsi_last_nondebug_bb (entry_bb
);
4702 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4703 /* Not needed in SSA form right now. */
4704 gcc_assert (!gimple_in_ssa_p (cfun
));
4705 if (fd
->collapse
> 1)
4707 int first_zero_iter
= -1, dummy
= -1;
4708 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4710 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4711 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4712 zero_iter_bb
, first_zero_iter
,
4713 dummy_bb
, dummy
, l2_dom_bb
);
4715 if (l2_dom_bb
== NULL
)
4720 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4722 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4723 OMP_CLAUSE__LOOPTEMP_
);
4724 gcc_assert (innerc
);
4725 n1
= OMP_CLAUSE_DECL (innerc
);
4726 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4727 OMP_CLAUSE__LOOPTEMP_
);
4728 gcc_assert (innerc
);
4729 n2
= OMP_CLAUSE_DECL (innerc
);
4731 tree step
= fd
->loop
.step
;
4733 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4737 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4738 is_simt
= safelen_int
> 1;
4740 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4743 simt_lane
= create_tmp_var (unsigned_type_node
);
4744 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4745 gimple_call_set_lhs (g
, simt_lane
);
4746 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4747 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4748 fold_convert (TREE_TYPE (step
), simt_lane
));
4749 n1
= fold_convert (type
, n1
);
4750 if (POINTER_TYPE_P (type
))
4751 n1
= fold_build_pointer_plus (n1
, offset
);
4753 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4755 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4756 if (fd
->collapse
> 1)
4757 simt_maxlane
= build_one_cst (unsigned_type_node
);
4758 else if (safelen_int
< omp_max_simt_vf ())
4759 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4761 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4762 unsigned_type_node
, 0);
4764 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4765 vf
= fold_convert (TREE_TYPE (step
), vf
);
4766 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4769 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4770 if (fd
->collapse
> 1)
4772 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4775 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4779 for (i
= 0; i
< fd
->collapse
; i
++)
4781 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4782 if (POINTER_TYPE_P (itype
))
4783 itype
= signed_type_for (itype
);
4784 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4785 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4789 /* Remove the GIMPLE_OMP_FOR statement. */
4790 gsi_remove (&gsi
, true);
4794 /* Code to control the increment goes in the CONT_BB. */
4795 gsi
= gsi_last_nondebug_bb (cont_bb
);
4796 stmt
= gsi_stmt (gsi
);
4797 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4799 if (POINTER_TYPE_P (type
))
4800 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4802 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4803 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4805 if (fd
->collapse
> 1)
4807 i
= fd
->collapse
- 1;
4808 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4810 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4811 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4815 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4817 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4820 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4822 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4824 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4825 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4826 if (POINTER_TYPE_P (itype2
))
4827 itype2
= signed_type_for (itype2
);
4828 t
= fold_convert (itype2
, fd
->loops
[i
- 1].step
);
4829 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4831 t
= build3 (COND_EXPR
, itype2
,
4832 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4834 fold_convert (itype
, fd
->loops
[i
].n2
)),
4835 build_int_cst (itype2
, 0), t
);
4836 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4837 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4839 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4840 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4842 t
= fold_convert (itype
, fd
->loops
[i
].n1
);
4843 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true,
4845 t
= build3 (COND_EXPR
, itype
,
4846 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4848 fold_convert (itype
, fd
->loops
[i
].n2
)),
4850 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4854 /* Remove GIMPLE_OMP_CONTINUE. */
4855 gsi_remove (&gsi
, true);
4858 /* Emit the condition in L1_BB. */
4859 gsi
= gsi_start_bb (l1_bb
);
4861 t
= fold_convert (type
, n2
);
4862 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4863 false, GSI_CONTINUE_LINKING
);
4864 tree v
= fd
->loop
.v
;
4865 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4866 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4867 false, GSI_CONTINUE_LINKING
);
4868 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4869 cond_stmt
= gimple_build_cond_empty (t
);
4870 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4871 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4873 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4876 gsi
= gsi_for_stmt (cond_stmt
);
4877 gimple_regimplify_operands (cond_stmt
, &gsi
);
4880 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4883 gsi
= gsi_start_bb (l2_bb
);
4884 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4885 if (POINTER_TYPE_P (type
))
4886 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4888 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4889 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4892 /* Remove GIMPLE_OMP_RETURN. */
4893 gsi
= gsi_last_nondebug_bb (exit_bb
);
4894 gsi_remove (&gsi
, true);
4896 /* Connect the new blocks. */
4897 remove_edge (FALLTHRU_EDGE (entry_bb
));
4901 remove_edge (BRANCH_EDGE (entry_bb
));
4902 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4904 e
= BRANCH_EDGE (l1_bb
);
4905 ne
= FALLTHRU_EDGE (l1_bb
);
4906 e
->flags
= EDGE_TRUE_VALUE
;
4910 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4912 ne
= single_succ_edge (l1_bb
);
4913 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4916 ne
->flags
= EDGE_FALSE_VALUE
;
4917 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4918 ne
->probability
= e
->probability
.invert ();
4920 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4921 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4925 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4926 NULL_TREE
, NULL_TREE
);
4927 gsi
= gsi_last_bb (entry_bb
);
4928 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4929 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4930 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4931 FALLTHRU_EDGE (entry_bb
)->probability
4932 = profile_probability::guessed_always ().apply_scale (7, 8);
4933 BRANCH_EDGE (entry_bb
)->probability
4934 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
4935 l2_dom_bb
= entry_bb
;
4937 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4941 struct loop
*loop
= alloc_loop ();
4942 loop
->header
= l1_bb
;
4943 loop
->latch
= cont_bb
;
4944 add_loop (loop
, l1_bb
->loop_father
);
4945 loop
->safelen
= safelen_int
;
4948 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4949 cfun
->has_simduid_loops
= true;
4951 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4953 if ((flag_tree_loop_vectorize
4954 || !global_options_set
.x_flag_tree_loop_vectorize
)
4955 && flag_tree_loop_optimize
4956 && loop
->safelen
> 1)
4958 loop
->force_vectorize
= true;
4959 cfun
->has_force_vectorize_loops
= true;
4963 cfun
->has_simduid_loops
= true;
4966 /* Taskloop construct is represented after gimplification with
4967 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4968 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4969 which should just compute all the needed loop temporaries
4970 for GIMPLE_OMP_TASK. */
4973 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4974 struct omp_for_data
*fd
,
4977 tree type
, bias
= NULL_TREE
;
4978 basic_block entry_bb
, cont_bb
, exit_bb
;
4979 gimple_stmt_iterator gsi
;
4980 gassign
*assign_stmt
;
4981 tree
*counts
= NULL
;
4984 gcc_assert (inner_stmt
);
4985 gcc_assert (region
->cont
);
4986 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4987 && gimple_omp_task_taskloop_p (inner_stmt
));
4988 type
= TREE_TYPE (fd
->loop
.v
);
4990 /* See if we need to bias by LLONG_MIN. */
4991 if (fd
->iter_type
== long_long_unsigned_type_node
4992 && TREE_CODE (type
) == INTEGER_TYPE
4993 && !TYPE_UNSIGNED (type
))
4997 if (fd
->loop
.cond_code
== LT_EXPR
)
5000 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5004 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5007 if (TREE_CODE (n1
) != INTEGER_CST
5008 || TREE_CODE (n2
) != INTEGER_CST
5009 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
5010 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
5013 entry_bb
= region
->entry
;
5014 cont_bb
= region
->cont
;
5015 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
5016 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
5017 exit_bb
= region
->exit
;
5019 gsi
= gsi_last_nondebug_bb (entry_bb
);
5020 gimple
*for_stmt
= gsi_stmt (gsi
);
5021 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
5022 if (fd
->collapse
> 1)
5024 int first_zero_iter
= -1, dummy
= -1;
5025 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
5027 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5028 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5029 zero_iter_bb
, first_zero_iter
,
5030 dummy_bb
, dummy
, l2_dom_bb
);
5034 /* Some counts[i] vars might be uninitialized if
5035 some loop has zero iterations. But the body shouldn't
5036 be executed in that case, so just avoid uninit warnings. */
5037 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
5038 if (SSA_VAR_P (counts
[i
]))
5039 TREE_NO_WARNING (counts
[i
]) = 1;
5041 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
5043 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
5044 gsi
= gsi_last_bb (entry_bb
);
5045 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
5046 get_immediate_dominator (CDI_DOMINATORS
,
5054 if (POINTER_TYPE_P (TREE_TYPE (t0
))
5055 && TYPE_PRECISION (TREE_TYPE (t0
))
5056 != TYPE_PRECISION (fd
->iter_type
))
5058 /* Avoid casting pointers to integer of a different size. */
5059 tree itype
= signed_type_for (type
);
5060 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
5061 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
5065 t1
= fold_convert (fd
->iter_type
, t1
);
5066 t0
= fold_convert (fd
->iter_type
, t0
);
5070 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
5071 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
5074 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
5075 OMP_CLAUSE__LOOPTEMP_
);
5076 gcc_assert (innerc
);
5077 tree startvar
= OMP_CLAUSE_DECL (innerc
);
5078 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5079 gcc_assert (innerc
);
5080 tree endvar
= OMP_CLAUSE_DECL (innerc
);
5081 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
5083 gcc_assert (innerc
);
5084 for (i
= 1; i
< fd
->collapse
; i
++)
5086 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5087 OMP_CLAUSE__LOOPTEMP_
);
5088 gcc_assert (innerc
);
5090 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5091 OMP_CLAUSE__LOOPTEMP_
);
5094 /* If needed (inner taskloop has lastprivate clause), propagate
5095 down the total number of iterations. */
5096 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
5098 GSI_CONTINUE_LINKING
);
5099 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
5100 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5104 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
5105 GSI_CONTINUE_LINKING
);
5106 assign_stmt
= gimple_build_assign (startvar
, t0
);
5107 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5109 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
5110 GSI_CONTINUE_LINKING
);
5111 assign_stmt
= gimple_build_assign (endvar
, t1
);
5112 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5113 if (fd
->collapse
> 1)
5114 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5116 /* Remove the GIMPLE_OMP_FOR statement. */
5117 gsi
= gsi_for_stmt (for_stmt
);
5118 gsi_remove (&gsi
, true);
5120 gsi
= gsi_last_nondebug_bb (cont_bb
);
5121 gsi_remove (&gsi
, true);
5123 gsi
= gsi_last_nondebug_bb (exit_bb
);
5124 gsi_remove (&gsi
, true);
5126 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5127 remove_edge (BRANCH_EDGE (entry_bb
));
5128 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
5129 remove_edge (BRANCH_EDGE (cont_bb
));
5130 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
5131 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
5132 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
5135 /* Taskloop construct is represented after gimplification with
5136 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5137 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5138 GOMP_taskloop{,_ull} function arranges for each task to be given just
5139 a single range of iterations. */
5142 expand_omp_taskloop_for_inner (struct omp_region
*region
,
5143 struct omp_for_data
*fd
,
5146 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
5147 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
5149 gimple_stmt_iterator gsi
;
5151 bool broken_loop
= region
->cont
== NULL
;
5152 tree
*counts
= NULL
;
5155 itype
= type
= TREE_TYPE (fd
->loop
.v
);
5156 if (POINTER_TYPE_P (type
))
5157 itype
= signed_type_for (type
);
5159 /* See if we need to bias by LLONG_MIN. */
5160 if (fd
->iter_type
== long_long_unsigned_type_node
5161 && TREE_CODE (type
) == INTEGER_TYPE
5162 && !TYPE_UNSIGNED (type
))
5166 if (fd
->loop
.cond_code
== LT_EXPR
)
5169 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5173 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5176 if (TREE_CODE (n1
) != INTEGER_CST
5177 || TREE_CODE (n2
) != INTEGER_CST
5178 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
5179 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
5182 entry_bb
= region
->entry
;
5183 cont_bb
= region
->cont
;
5184 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
5185 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
5186 gcc_assert (broken_loop
5187 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
5188 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5191 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
5192 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
5194 exit_bb
= region
->exit
;
5196 /* Iteration space partitioning goes in ENTRY_BB. */
5197 gsi
= gsi_last_nondebug_bb (entry_bb
);
5198 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
5200 if (fd
->collapse
> 1)
5202 int first_zero_iter
= -1, dummy
= -1;
5203 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
5205 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5206 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5207 fin_bb
, first_zero_iter
,
5208 dummy_bb
, dummy
, l2_dom_bb
);
5212 t
= integer_one_node
;
5214 step
= fd
->loop
.step
;
5215 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
5216 OMP_CLAUSE__LOOPTEMP_
);
5217 gcc_assert (innerc
);
5218 n1
= OMP_CLAUSE_DECL (innerc
);
5219 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5220 gcc_assert (innerc
);
5221 n2
= OMP_CLAUSE_DECL (innerc
);
5224 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
5225 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
5227 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
5228 true, NULL_TREE
, true, GSI_SAME_STMT
);
5229 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
5230 true, NULL_TREE
, true, GSI_SAME_STMT
);
5231 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
5232 true, NULL_TREE
, true, GSI_SAME_STMT
);
5234 tree startvar
= fd
->loop
.v
;
5235 tree endvar
= NULL_TREE
;
5237 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5239 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
5240 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5241 gcc_assert (innerc
);
5242 startvar
= OMP_CLAUSE_DECL (innerc
);
5243 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5244 OMP_CLAUSE__LOOPTEMP_
);
5245 gcc_assert (innerc
);
5246 endvar
= OMP_CLAUSE_DECL (innerc
);
5248 t
= fold_convert (TREE_TYPE (startvar
), n1
);
5249 t
= force_gimple_operand_gsi (&gsi
, t
,
5251 && TREE_ADDRESSABLE (startvar
),
5252 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5253 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
5254 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5256 t
= fold_convert (TREE_TYPE (startvar
), n2
);
5257 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5258 false, GSI_CONTINUE_LINKING
);
5261 assign_stmt
= gimple_build_assign (endvar
, e
);
5262 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5263 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5264 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5266 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5267 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5269 if (fd
->collapse
> 1)
5270 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5274 /* The code controlling the sequential loop replaces the
5275 GIMPLE_OMP_CONTINUE. */
5276 gsi
= gsi_last_nondebug_bb (cont_bb
);
5277 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5278 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5279 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5280 vback
= gimple_omp_continue_control_def (cont_stmt
);
5282 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5284 if (POINTER_TYPE_P (type
))
5285 t
= fold_build_pointer_plus (vmain
, step
);
5287 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5288 t
= force_gimple_operand_gsi (&gsi
, t
,
5290 && TREE_ADDRESSABLE (vback
),
5291 NULL_TREE
, true, GSI_SAME_STMT
);
5292 assign_stmt
= gimple_build_assign (vback
, t
);
5293 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5295 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5296 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5298 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5301 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5302 gsi_remove (&gsi
, true);
5304 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5305 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5308 /* Remove the GIMPLE_OMP_FOR statement. */
5309 gsi
= gsi_for_stmt (fd
->for_stmt
);
5310 gsi_remove (&gsi
, true);
5312 /* Remove the GIMPLE_OMP_RETURN statement. */
5313 gsi
= gsi_last_nondebug_bb (exit_bb
);
5314 gsi_remove (&gsi
, true);
5316 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
5318 remove_edge (BRANCH_EDGE (entry_bb
));
5321 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5322 region
->outer
->cont
= NULL
;
5325 /* Connect all the blocks. */
5328 ep
= find_edge (cont_bb
, body_bb
);
5329 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5334 else if (fd
->collapse
> 1)
5337 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5340 ep
->flags
= EDGE_TRUE_VALUE
;
5341 find_edge (cont_bb
, fin_bb
)->flags
5342 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5345 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5346 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5348 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5349 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5351 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5353 struct loop
*loop
= alloc_loop ();
5354 loop
->header
= body_bb
;
5355 if (collapse_bb
== NULL
)
5356 loop
->latch
= cont_bb
;
5357 add_loop (loop
, body_bb
->loop_father
);
5361 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5362 partitioned loop. The lowering here is abstracted, in that the
5363 loop parameters are passed through internal functions, which are
5364 further lowered by oacc_device_lower, once we get to the target
5365 compiler. The loop is of the form:
5367 for (V = B; V LTGT E; V += S) {BODY}
5369 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5370 (constant 0 for no chunking) and we will have a GWV partitioning
5371 mask, specifying dimensions over which the loop is to be
5372 partitioned (see note below). We generate code that looks like
5373 (this ignores tiling):
5375 <entry_bb> [incoming FALL->body, BRANCH->exit]
5376 typedef signedintify (typeof (V)) T; // underlying signed integral type
5379 T DIR = LTGT == '<' ? +1 : -1;
5380 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5381 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5383 <head_bb> [created by splitting end of entry_bb]
5384 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5385 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5386 if (!(offset LTGT bound)) goto bottom_bb;
5388 <body_bb> [incoming]
5392 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5394 if (offset LTGT bound) goto body_bb; [*]
5396 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5398 if (chunk < chunk_max) goto head_bb;
5400 <exit_bb> [incoming]
5401 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5403 [*] Needed if V live at end of loop. */
5406 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5408 tree v
= fd
->loop
.v
;
5409 enum tree_code cond_code
= fd
->loop
.cond_code
;
5410 enum tree_code plus_code
= PLUS_EXPR
;
5412 tree chunk_size
= integer_minus_one_node
;
5413 tree gwv
= integer_zero_node
;
5414 tree iter_type
= TREE_TYPE (v
);
5415 tree diff_type
= iter_type
;
5416 tree plus_type
= iter_type
;
5417 struct oacc_collapse
*counts
= NULL
;
5419 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5420 == GF_OMP_FOR_KIND_OACC_LOOP
);
5421 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5422 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5424 if (POINTER_TYPE_P (iter_type
))
5426 plus_code
= POINTER_PLUS_EXPR
;
5427 plus_type
= sizetype
;
5429 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5430 diff_type
= signed_type_for (diff_type
);
5431 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
5432 diff_type
= integer_type_node
;
5434 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5435 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5436 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5437 basic_block bottom_bb
= NULL
;
5439 /* entry_bb has two sucessors; the branch edge is to the exit
5440 block, fallthrough edge to body. */
5441 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5442 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5444 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5445 body_bb, or to a block whose only successor is the body_bb. Its
5446 fallthrough successor is the final block (same as the branch
5447 successor of the entry_bb). */
5450 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5451 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5453 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5454 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5457 gcc_assert (!gimple_in_ssa_p (cfun
));
5459 /* The exit block only has entry_bb and cont_bb as predecessors. */
5460 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5463 tree chunk_max
= NULL_TREE
;
5465 tree step
= create_tmp_var (diff_type
, ".step");
5466 bool up
= cond_code
== LT_EXPR
;
5467 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5468 bool chunking
= !gimple_in_ssa_p (cfun
);
5472 tree tile_size
= NULL_TREE
;
5473 tree element_s
= NULL_TREE
;
5474 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
5475 basic_block elem_body_bb
= NULL
;
5476 basic_block elem_cont_bb
= NULL
;
5478 /* SSA instances. */
5479 tree offset_incr
= NULL_TREE
;
5480 tree offset_init
= NULL_TREE
;
5482 gimple_stmt_iterator gsi
;
5488 edge split
, be
, fte
;
5490 /* Split the end of entry_bb to create head_bb. */
5491 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5492 basic_block head_bb
= split
->dest
;
5493 entry_bb
= split
->src
;
5495 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5496 gsi
= gsi_last_nondebug_bb (entry_bb
);
5497 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5498 loc
= gimple_location (for_stmt
);
5500 if (gimple_in_ssa_p (cfun
))
5502 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5503 gcc_assert (integer_zerop (fd
->loop
.n1
));
5504 /* The SSA parallelizer does gang parallelism. */
5505 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5508 if (fd
->collapse
> 1 || fd
->tiling
)
5510 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
5511 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5512 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5513 TREE_TYPE (fd
->loop
.n2
), loc
);
5515 if (SSA_VAR_P (fd
->loop
.n2
))
5517 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5518 true, GSI_SAME_STMT
);
5519 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5520 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5524 tree b
= fd
->loop
.n1
;
5525 tree e
= fd
->loop
.n2
;
5526 tree s
= fd
->loop
.step
;
5528 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5529 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5531 /* Convert the step, avoiding possible unsigned->signed overflow. */
5532 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5534 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5535 s
= fold_convert (diff_type
, s
);
5537 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5538 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5541 chunk_size
= integer_zero_node
;
5542 expr
= fold_convert (diff_type
, chunk_size
);
5543 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5544 NULL_TREE
, true, GSI_SAME_STMT
);
5548 /* Determine the tile size and element step,
5549 modify the outer loop step size. */
5550 tile_size
= create_tmp_var (diff_type
, ".tile_size");
5551 expr
= build_int_cst (diff_type
, 1);
5552 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
5553 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
5554 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
5555 NULL_TREE
, true, GSI_SAME_STMT
);
5556 ass
= gimple_build_assign (tile_size
, expr
);
5557 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5559 element_s
= create_tmp_var (diff_type
, ".element_s");
5560 ass
= gimple_build_assign (element_s
, s
);
5561 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5563 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
5564 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
5565 NULL_TREE
, true, GSI_SAME_STMT
);
5568 /* Determine the range, avoiding possible unsigned->signed overflow. */
5569 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5570 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5571 fold_convert (plus_type
, negating
? b
: e
),
5572 fold_convert (plus_type
, negating
? e
: b
));
5573 expr
= fold_convert (diff_type
, expr
);
5575 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5576 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5577 NULL_TREE
, true, GSI_SAME_STMT
);
5579 chunk_no
= build_int_cst (diff_type
, 0);
5582 gcc_assert (!gimple_in_ssa_p (cfun
));
5585 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5586 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5588 ass
= gimple_build_assign (chunk_no
, expr
);
5589 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5591 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5592 build_int_cst (integer_type_node
,
5593 IFN_GOACC_LOOP_CHUNKS
),
5594 dir
, range
, s
, chunk_size
, gwv
);
5595 gimple_call_set_lhs (call
, chunk_max
);
5596 gimple_set_location (call
, loc
);
5597 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5600 chunk_size
= chunk_no
;
5602 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5603 build_int_cst (integer_type_node
,
5604 IFN_GOACC_LOOP_STEP
),
5605 dir
, range
, s
, chunk_size
, gwv
);
5606 gimple_call_set_lhs (call
, step
);
5607 gimple_set_location (call
, loc
);
5608 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5610 /* Remove the GIMPLE_OMP_FOR. */
5611 gsi_remove (&gsi
, true);
5613 /* Fixup edges from head_bb. */
5614 be
= BRANCH_EDGE (head_bb
);
5615 fte
= FALLTHRU_EDGE (head_bb
);
5616 be
->flags
|= EDGE_FALSE_VALUE
;
5617 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5619 basic_block body_bb
= fte
->dest
;
5621 if (gimple_in_ssa_p (cfun
))
5623 gsi
= gsi_last_nondebug_bb (cont_bb
);
5624 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5626 offset
= gimple_omp_continue_control_use (cont_stmt
);
5627 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5631 offset
= create_tmp_var (diff_type
, ".offset");
5632 offset_init
= offset_incr
= offset
;
5634 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5636 /* Loop offset & bound go into head_bb. */
5637 gsi
= gsi_start_bb (head_bb
);
5639 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5640 build_int_cst (integer_type_node
,
5641 IFN_GOACC_LOOP_OFFSET
),
5643 chunk_size
, gwv
, chunk_no
);
5644 gimple_call_set_lhs (call
, offset_init
);
5645 gimple_set_location (call
, loc
);
5646 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5648 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5649 build_int_cst (integer_type_node
,
5650 IFN_GOACC_LOOP_BOUND
),
5652 chunk_size
, gwv
, offset_init
);
5653 gimple_call_set_lhs (call
, bound
);
5654 gimple_set_location (call
, loc
);
5655 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5657 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5658 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5659 GSI_CONTINUE_LINKING
);
5661 /* V assignment goes into body_bb. */
5662 if (!gimple_in_ssa_p (cfun
))
5664 gsi
= gsi_start_bb (body_bb
);
5666 expr
= build2 (plus_code
, iter_type
, b
,
5667 fold_convert (plus_type
, offset
));
5668 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5669 true, GSI_SAME_STMT
);
5670 ass
= gimple_build_assign (v
, expr
);
5671 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5673 if (fd
->collapse
> 1 || fd
->tiling
)
5674 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
5678 /* Determine the range of the element loop -- usually simply
5679 the tile_size, but could be smaller if the final
5680 iteration of the outer loop is a partial tile. */
5681 tree e_range
= create_tmp_var (diff_type
, ".e_range");
5683 expr
= build2 (MIN_EXPR
, diff_type
,
5684 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
5685 build2 (MULT_EXPR
, diff_type
, tile_size
,
5687 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5688 true, GSI_SAME_STMT
);
5689 ass
= gimple_build_assign (e_range
, expr
);
5690 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5692 /* Determine bound, offset & step of inner loop. */
5693 e_bound
= create_tmp_var (diff_type
, ".e_bound");
5694 e_offset
= create_tmp_var (diff_type
, ".e_offset");
5695 e_step
= create_tmp_var (diff_type
, ".e_step");
5697 /* Mark these as element loops. */
5698 tree t
, e_gwv
= integer_minus_one_node
;
5699 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
5701 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
5702 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5703 element_s
, chunk
, e_gwv
, chunk
);
5704 gimple_call_set_lhs (call
, e_offset
);
5705 gimple_set_location (call
, loc
);
5706 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5708 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
5709 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
5710 element_s
, chunk
, e_gwv
, e_offset
);
5711 gimple_call_set_lhs (call
, e_bound
);
5712 gimple_set_location (call
, loc
);
5713 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5715 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
5716 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
5717 element_s
, chunk
, e_gwv
);
5718 gimple_call_set_lhs (call
, e_step
);
5719 gimple_set_location (call
, loc
);
5720 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5722 /* Add test and split block. */
5723 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5724 stmt
= gimple_build_cond_empty (expr
);
5725 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5726 split
= split_block (body_bb
, stmt
);
5727 elem_body_bb
= split
->dest
;
5728 if (cont_bb
== body_bb
)
5729 cont_bb
= elem_body_bb
;
5730 body_bb
= split
->src
;
5732 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5734 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5735 if (cont_bb
== NULL
)
5737 edge e
= make_edge (body_bb
, exit_bb
, EDGE_FALSE_VALUE
);
5738 e
->probability
= profile_probability::even ();
5739 split
->probability
= profile_probability::even ();
5742 /* Initialize the user's loop vars. */
5743 gsi
= gsi_start_bb (elem_body_bb
);
5744 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
5748 /* Loop increment goes into cont_bb. If this is not a loop, we
5749 will have spawned threads as if it was, and each one will
5750 execute one iteration. The specification is not explicit about
5751 whether such constructs are ill-formed or not, and they can
5752 occur, especially when noreturn routines are involved. */
5755 gsi
= gsi_last_nondebug_bb (cont_bb
);
5756 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5757 loc
= gimple_location (cont_stmt
);
5761 /* Insert element loop increment and test. */
5762 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
5763 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5764 true, GSI_SAME_STMT
);
5765 ass
= gimple_build_assign (e_offset
, expr
);
5766 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5767 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
5769 stmt
= gimple_build_cond_empty (expr
);
5770 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5771 split
= split_block (cont_bb
, stmt
);
5772 elem_cont_bb
= split
->src
;
5773 cont_bb
= split
->dest
;
5775 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5776 split
->probability
= profile_probability::unlikely ().guessed ();
5778 = make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
5779 latch_edge
->probability
= profile_probability::likely ().guessed ();
5781 edge skip_edge
= make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
5782 skip_edge
->probability
= profile_probability::unlikely ().guessed ();
5783 edge loop_entry_edge
= EDGE_SUCC (body_bb
, 1 - skip_edge
->dest_idx
);
5784 loop_entry_edge
->probability
5785 = profile_probability::likely ().guessed ();
5787 gsi
= gsi_for_stmt (cont_stmt
);
5790 /* Increment offset. */
5791 if (gimple_in_ssa_p (cfun
))
5792 expr
= build2 (plus_code
, iter_type
, offset
,
5793 fold_convert (plus_type
, step
));
5795 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5796 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5797 true, GSI_SAME_STMT
);
5798 ass
= gimple_build_assign (offset_incr
, expr
);
5799 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5800 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5801 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5803 /* Remove the GIMPLE_OMP_CONTINUE. */
5804 gsi_remove (&gsi
, true);
5806 /* Fixup edges from cont_bb. */
5807 be
= BRANCH_EDGE (cont_bb
);
5808 fte
= FALLTHRU_EDGE (cont_bb
);
5809 be
->flags
|= EDGE_TRUE_VALUE
;
5810 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5814 /* Split the beginning of exit_bb to make bottom_bb. We
5815 need to insert a nop at the start, because splitting is
5816 after a stmt, not before. */
5817 gsi
= gsi_start_bb (exit_bb
);
5818 stmt
= gimple_build_nop ();
5819 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5820 split
= split_block (exit_bb
, stmt
);
5821 bottom_bb
= split
->src
;
5822 exit_bb
= split
->dest
;
5823 gsi
= gsi_last_bb (bottom_bb
);
5825 /* Chunk increment and test goes into bottom_bb. */
5826 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5827 build_int_cst (diff_type
, 1));
5828 ass
= gimple_build_assign (chunk_no
, expr
);
5829 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5831 /* Chunk test at end of bottom_bb. */
5832 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5833 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5834 GSI_CONTINUE_LINKING
);
5836 /* Fixup edges from bottom_bb. */
5837 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5838 split
->probability
= profile_probability::unlikely ().guessed ();
5839 edge latch_edge
= make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5840 latch_edge
->probability
= profile_probability::likely ().guessed ();
5844 gsi
= gsi_last_nondebug_bb (exit_bb
);
5845 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5846 loc
= gimple_location (gsi_stmt (gsi
));
5848 if (!gimple_in_ssa_p (cfun
))
5850 /* Insert the final value of V, in case it is live. This is the
5851 value for the only thread that survives past the join. */
5852 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5853 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5854 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5855 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5856 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5857 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5858 true, GSI_SAME_STMT
);
5859 ass
= gimple_build_assign (v
, expr
);
5860 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5863 /* Remove the OMP_RETURN. */
5864 gsi_remove (&gsi
, true);
5868 /* We now have one, two or three nested loops. Update the loop
5870 struct loop
*parent
= entry_bb
->loop_father
;
5871 struct loop
*body
= body_bb
->loop_father
;
5875 struct loop
*chunk_loop
= alloc_loop ();
5876 chunk_loop
->header
= head_bb
;
5877 chunk_loop
->latch
= bottom_bb
;
5878 add_loop (chunk_loop
, parent
);
5879 parent
= chunk_loop
;
5881 else if (parent
!= body
)
5883 gcc_assert (body
->header
== body_bb
);
5884 gcc_assert (body
->latch
== cont_bb
5885 || single_pred (body
->latch
) == cont_bb
);
5891 struct loop
*body_loop
= alloc_loop ();
5892 body_loop
->header
= body_bb
;
5893 body_loop
->latch
= cont_bb
;
5894 add_loop (body_loop
, parent
);
5898 /* Insert tiling's element loop. */
5899 struct loop
*inner_loop
= alloc_loop ();
5900 inner_loop
->header
= elem_body_bb
;
5901 inner_loop
->latch
= elem_cont_bb
;
5902 add_loop (inner_loop
, body_loop
);
5908 /* Expand the OMP loop defined by REGION. */
5911 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5913 struct omp_for_data fd
;
5914 struct omp_for_data_loop
*loops
;
5917 = (struct omp_for_data_loop
*)
5918 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5919 * sizeof (struct omp_for_data_loop
));
5920 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5922 region
->sched_kind
= fd
.sched_kind
;
5923 region
->sched_modifiers
= fd
.sched_modifiers
;
5925 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5926 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5927 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5930 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5931 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5932 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5935 /* If there isn't a continue then this is a degerate case where
5936 the introduction of abnormal edges during lowering will prevent
5937 original loops from being detected. Fix that up. */
5938 loops_state_set (LOOPS_NEED_FIXUP
);
5940 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5941 expand_omp_simd (region
, &fd
);
5942 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5944 gcc_assert (!inner_stmt
);
5945 expand_oacc_for (region
, &fd
);
5947 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5949 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5950 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5952 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5954 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5955 && !fd
.have_ordered
)
5957 if (fd
.chunk_size
== NULL
)
5958 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5960 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5964 int fn_index
, start_ix
, next_ix
;
5965 unsigned HOST_WIDE_INT sched
= 0;
5966 tree sched_arg
= NULL_TREE
;
5968 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5969 == GF_OMP_FOR_KIND_FOR
);
5970 if (fd
.chunk_size
== NULL
5971 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5972 fd
.chunk_size
= integer_zero_node
;
5973 switch (fd
.sched_kind
)
5975 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5976 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
) != 0)
5978 gcc_assert (!fd
.have_ordered
);
5982 else if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0
5983 && !fd
.have_ordered
)
5988 sched
= (HOST_WIDE_INT_1U
<< 31);
5991 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5992 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5993 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0
5994 && !fd
.have_ordered
)
5996 fn_index
= 3 + fd
.sched_kind
;
5997 sched
= (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_GUIDED
) + 2;
6000 fn_index
= fd
.sched_kind
;
6001 sched
= (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_GUIDED
) + 2;
6002 sched
+= (HOST_WIDE_INT_1U
<< 31);
6004 case OMP_CLAUSE_SCHEDULE_STATIC
:
6005 gcc_assert (fd
.have_ordered
);
6007 sched
= (HOST_WIDE_INT_1U
<< 31) + 1;
6013 fn_index
+= fd
.have_ordered
* 8;
6015 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
6017 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
6018 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
6019 if (fd
.have_reductemp
)
6022 start_ix
= (int)BUILT_IN_GOMP_LOOP_DOACROSS_START
;
6023 else if (fd
.have_ordered
)
6024 start_ix
= (int)BUILT_IN_GOMP_LOOP_ORDERED_START
;
6026 start_ix
= (int)BUILT_IN_GOMP_LOOP_START
;
6027 sched_arg
= build_int_cstu (long_integer_type_node
, sched
);
6029 fd
.chunk_size
= integer_zero_node
;
6031 if (fd
.iter_type
== long_long_unsigned_type_node
)
6033 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6034 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
6035 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6036 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
6038 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
6039 (enum built_in_function
) next_ix
, sched_arg
,
6043 if (gimple_in_ssa_p (cfun
))
6044 update_ssa (TODO_update_ssa_only_virtuals
);
6047 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6049 v = GOMP_sections_start (n);
6066 v = GOMP_sections_next ();
6071 If this is a combined parallel sections, replace the call to
6072 GOMP_sections_start with call to GOMP_sections_next. */
6075 expand_omp_sections (struct omp_region
*region
)
6077 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
6079 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
6080 gimple_stmt_iterator si
, switch_si
;
6081 gomp_sections
*sections_stmt
;
6083 gomp_continue
*cont
;
6086 struct omp_region
*inner
;
6088 bool exit_reachable
= region
->cont
!= NULL
;
6090 gcc_assert (region
->exit
!= NULL
);
6091 entry_bb
= region
->entry
;
6092 l0_bb
= single_succ (entry_bb
);
6093 l1_bb
= region
->cont
;
6094 l2_bb
= region
->exit
;
6095 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
6096 l2
= gimple_block_label (l2_bb
);
6099 /* This can happen if there are reductions. */
6100 len
= EDGE_COUNT (l0_bb
->succs
);
6101 gcc_assert (len
> 0);
6102 e
= EDGE_SUCC (l0_bb
, len
- 1);
6103 si
= gsi_last_nondebug_bb (e
->dest
);
6106 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
6107 l2
= gimple_block_label (e
->dest
);
6109 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
6111 si
= gsi_last_nondebug_bb (e
->dest
);
6113 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
6115 l2
= gimple_block_label (e
->dest
);
6121 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
6123 default_bb
= create_empty_bb (l0_bb
);
6125 /* We will build a switch() with enough cases for all the
6126 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6127 and a default case to abort if something goes wrong. */
6128 len
= EDGE_COUNT (l0_bb
->succs
);
6130 /* Use vec::quick_push on label_vec throughout, since we know the size
6132 auto_vec
<tree
> label_vec (len
);
6134 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6135 GIMPLE_OMP_SECTIONS statement. */
6136 si
= gsi_last_nondebug_bb (entry_bb
);
6137 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
6138 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
6139 vin
= gimple_omp_sections_control (sections_stmt
);
6140 tree clauses
= gimple_omp_sections_clauses (sections_stmt
);
6141 tree reductmp
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
6144 tree reductions
= OMP_CLAUSE_DECL (reductmp
);
6145 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
6146 gimple
*g
= SSA_NAME_DEF_STMT (reductions
);
6147 reductions
= gimple_assign_rhs1 (g
);
6148 OMP_CLAUSE_DECL (reductmp
) = reductions
;
6149 gimple_stmt_iterator gsi
= gsi_for_stmt (g
);
6150 t
= build_int_cst (unsigned_type_node
, len
- 1);
6151 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START
);
6152 stmt
= gimple_build_call (u
, 3, t
, reductions
, null_pointer_node
);
6153 gimple_call_set_lhs (stmt
, vin
);
6154 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
6155 gsi_remove (&gsi
, true);
6156 release_ssa_name (gimple_assign_lhs (g
));
6158 else if (!is_combined_parallel (region
))
6160 /* If we are not inside a combined parallel+sections region,
6161 call GOMP_sections_start. */
6162 t
= build_int_cst (unsigned_type_node
, len
- 1);
6163 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
6164 stmt
= gimple_build_call (u
, 1, t
);
6168 /* Otherwise, call GOMP_sections_next. */
6169 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6170 stmt
= gimple_build_call (u
, 0);
6174 gimple_call_set_lhs (stmt
, vin
);
6175 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6177 gsi_remove (&si
, true);
6179 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6181 switch_si
= gsi_last_nondebug_bb (l0_bb
);
6182 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
6185 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
6186 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
6187 vmain
= gimple_omp_continue_control_use (cont
);
6188 vnext
= gimple_omp_continue_control_def (cont
);
6196 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
6197 label_vec
.quick_push (t
);
6200 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6201 for (inner
= region
->inner
, casei
= 1;
6203 inner
= inner
->next
, i
++, casei
++)
6205 basic_block s_entry_bb
, s_exit_bb
;
6207 /* Skip optional reduction region. */
6208 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
6215 s_entry_bb
= inner
->entry
;
6216 s_exit_bb
= inner
->exit
;
6218 t
= gimple_block_label (s_entry_bb
);
6219 u
= build_int_cst (unsigned_type_node
, casei
);
6220 u
= build_case_label (u
, NULL
, t
);
6221 label_vec
.quick_push (u
);
6223 si
= gsi_last_nondebug_bb (s_entry_bb
);
6224 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
6225 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
6226 gsi_remove (&si
, true);
6227 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
6229 if (s_exit_bb
== NULL
)
6232 si
= gsi_last_nondebug_bb (s_exit_bb
);
6233 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6234 gsi_remove (&si
, true);
6236 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
6239 /* Error handling code goes in DEFAULT_BB. */
6240 t
= gimple_block_label (default_bb
);
6241 u
= build_case_label (NULL
, NULL
, t
);
6242 make_edge (l0_bb
, default_bb
, 0);
6243 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
6245 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
6246 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
6247 gsi_remove (&switch_si
, true);
6249 si
= gsi_start_bb (default_bb
);
6250 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
6251 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
6257 /* Code to get the next section goes in L1_BB. */
6258 si
= gsi_last_nondebug_bb (l1_bb
);
6259 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
6261 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
6262 stmt
= gimple_build_call (bfn_decl
, 0);
6263 gimple_call_set_lhs (stmt
, vnext
);
6264 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6265 gsi_remove (&si
, true);
6267 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
6270 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6271 si
= gsi_last_nondebug_bb (l2_bb
);
6272 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
6273 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
6274 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
6275 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
6277 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
6278 stmt
= gimple_build_call (t
, 0);
6279 if (gimple_omp_return_lhs (gsi_stmt (si
)))
6280 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
6281 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
6282 gsi_remove (&si
, true);
6284 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
6287 /* Expand code for an OpenMP single directive. We've already expanded
6288 much of the code, here we simply place the GOMP_barrier call. */
6291 expand_omp_single (struct omp_region
*region
)
6293 basic_block entry_bb
, exit_bb
;
6294 gimple_stmt_iterator si
;
6296 entry_bb
= region
->entry
;
6297 exit_bb
= region
->exit
;
6299 si
= gsi_last_nondebug_bb (entry_bb
);
6300 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
6301 gsi_remove (&si
, true);
6302 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6304 si
= gsi_last_nondebug_bb (exit_bb
);
6305 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
6307 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
6308 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
6310 gsi_remove (&si
, true);
6311 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6314 /* Generic expansion for OpenMP synchronization directives: master,
6315 ordered and critical. All we need to do here is remove the entry
6316 and exit markers for REGION. */
6319 expand_omp_synch (struct omp_region
*region
)
6321 basic_block entry_bb
, exit_bb
;
6322 gimple_stmt_iterator si
;
6324 entry_bb
= region
->entry
;
6325 exit_bb
= region
->exit
;
6327 si
= gsi_last_nondebug_bb (entry_bb
);
6328 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
6329 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
6330 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
6331 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
6332 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
6333 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
6334 if (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
6335 && gimple_omp_teams_host (as_a
<gomp_teams
*> (gsi_stmt (si
))))
6337 expand_omp_taskreg (region
);
6340 gsi_remove (&si
, true);
6341 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6345 si
= gsi_last_nondebug_bb (exit_bb
);
6346 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
6347 gsi_remove (&si
, true);
6348 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6352 /* Translate enum omp_memory_order to enum memmodel. The two enums
6353 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6356 static enum memmodel
6357 omp_memory_order_to_memmodel (enum omp_memory_order mo
)
6361 case OMP_MEMORY_ORDER_RELAXED
: return MEMMODEL_RELAXED
;
6362 case OMP_MEMORY_ORDER_ACQUIRE
: return MEMMODEL_ACQUIRE
;
6363 case OMP_MEMORY_ORDER_RELEASE
: return MEMMODEL_RELEASE
;
6364 case OMP_MEMORY_ORDER_ACQ_REL
: return MEMMODEL_ACQ_REL
;
6365 case OMP_MEMORY_ORDER_SEQ_CST
: return MEMMODEL_SEQ_CST
;
6366 default: gcc_unreachable ();
6370 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6371 operation as a normal volatile load. */
6374 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
6375 tree loaded_val
, int index
)
6377 enum built_in_function tmpbase
;
6378 gimple_stmt_iterator gsi
;
6379 basic_block store_bb
;
6382 tree decl
, call
, type
, itype
;
6384 gsi
= gsi_last_nondebug_bb (load_bb
);
6385 stmt
= gsi_stmt (gsi
);
6386 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6387 loc
= gimple_location (stmt
);
6389 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6390 is smaller than word size, then expand_atomic_load assumes that the load
6391 is atomic. We could avoid the builtin entirely in this case. */
6393 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6394 decl
= builtin_decl_explicit (tmpbase
);
6395 if (decl
== NULL_TREE
)
6398 type
= TREE_TYPE (loaded_val
);
6399 itype
= TREE_TYPE (TREE_TYPE (decl
));
6401 enum omp_memory_order omo
= gimple_omp_atomic_memory_order (stmt
);
6402 tree mo
= build_int_cst (NULL
, omp_memory_order_to_memmodel (omo
));
6403 call
= build_call_expr_loc (loc
, decl
, 2, addr
, mo
);
6404 if (!useless_type_conversion_p (type
, itype
))
6405 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6406 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6408 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6409 gsi_remove (&gsi
, true);
6411 store_bb
= single_succ (load_bb
);
6412 gsi
= gsi_last_nondebug_bb (store_bb
);
6413 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6414 gsi_remove (&gsi
, true);
6416 if (gimple_in_ssa_p (cfun
))
6417 update_ssa (TODO_update_ssa_no_phi
);
6422 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6423 operation as a normal volatile store. */
6426 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6427 tree loaded_val
, tree stored_val
, int index
)
6429 enum built_in_function tmpbase
;
6430 gimple_stmt_iterator gsi
;
6431 basic_block store_bb
= single_succ (load_bb
);
6434 tree decl
, call
, type
, itype
;
6438 gsi
= gsi_last_nondebug_bb (load_bb
);
6439 stmt
= gsi_stmt (gsi
);
6440 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6442 /* If the load value is needed, then this isn't a store but an exchange. */
6443 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6445 gsi
= gsi_last_nondebug_bb (store_bb
);
6446 stmt
= gsi_stmt (gsi
);
6447 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6448 loc
= gimple_location (stmt
);
6450 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6451 is smaller than word size, then expand_atomic_store assumes that the store
6452 is atomic. We could avoid the builtin entirely in this case. */
6454 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6455 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6456 decl
= builtin_decl_explicit (tmpbase
);
6457 if (decl
== NULL_TREE
)
6460 type
= TREE_TYPE (stored_val
);
6462 /* Dig out the type of the function's second argument. */
6463 itype
= TREE_TYPE (decl
);
6464 itype
= TYPE_ARG_TYPES (itype
);
6465 itype
= TREE_CHAIN (itype
);
6466 itype
= TREE_VALUE (itype
);
6467 imode
= TYPE_MODE (itype
);
6469 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6472 if (!useless_type_conversion_p (itype
, type
))
6473 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6474 enum omp_memory_order omo
= gimple_omp_atomic_memory_order (stmt
);
6475 tree mo
= build_int_cst (NULL
, omp_memory_order_to_memmodel (omo
));
6476 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
, mo
);
6479 if (!useless_type_conversion_p (type
, itype
))
6480 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6481 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6484 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6485 gsi_remove (&gsi
, true);
6487 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6488 gsi
= gsi_last_nondebug_bb (load_bb
);
6489 gsi_remove (&gsi
, true);
6491 if (gimple_in_ssa_p (cfun
))
6492 update_ssa (TODO_update_ssa_no_phi
);
6497 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6498 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6499 size of the data type, and thus usable to find the index of the builtin
6500 decl. Returns false if the expression is not of the proper form. */
6503 expand_omp_atomic_fetch_op (basic_block load_bb
,
6504 tree addr
, tree loaded_val
,
6505 tree stored_val
, int index
)
6507 enum built_in_function oldbase
, newbase
, tmpbase
;
6508 tree decl
, itype
, call
;
6510 basic_block store_bb
= single_succ (load_bb
);
6511 gimple_stmt_iterator gsi
;
6514 enum tree_code code
;
6515 bool need_old
, need_new
;
6518 /* We expect to find the following sequences:
6521 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6524 val = tmp OP something; (or: something OP tmp)
6525 GIMPLE_OMP_STORE (val)
6527 ???FIXME: Allow a more flexible sequence.
6528 Perhaps use data flow to pick the statements.
6532 gsi
= gsi_after_labels (store_bb
);
6533 stmt
= gsi_stmt (gsi
);
6534 if (is_gimple_debug (stmt
))
6536 gsi_next_nondebug (&gsi
);
6537 if (gsi_end_p (gsi
))
6539 stmt
= gsi_stmt (gsi
);
6541 loc
= gimple_location (stmt
);
6542 if (!is_gimple_assign (stmt
))
6544 gsi_next_nondebug (&gsi
);
6545 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6547 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6548 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6549 enum omp_memory_order omo
6550 = gimple_omp_atomic_memory_order (last_stmt (load_bb
));
6551 enum memmodel mo
= omp_memory_order_to_memmodel (omo
);
6552 gcc_checking_assert (!need_old
|| !need_new
);
6554 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6557 /* Check for one of the supported fetch-op operations. */
6558 code
= gimple_assign_rhs_code (stmt
);
6562 case POINTER_PLUS_EXPR
:
6563 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6564 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6567 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6568 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6571 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6572 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6575 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6576 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6579 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6580 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6586 /* Make sure the expression is of the proper form. */
6587 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6588 rhs
= gimple_assign_rhs2 (stmt
);
6589 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6590 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6591 rhs
= gimple_assign_rhs1 (stmt
);
6595 tmpbase
= ((enum built_in_function
)
6596 ((need_new
? newbase
: oldbase
) + index
+ 1));
6597 decl
= builtin_decl_explicit (tmpbase
);
6598 if (decl
== NULL_TREE
)
6600 itype
= TREE_TYPE (TREE_TYPE (decl
));
6601 imode
= TYPE_MODE (itype
);
6603 /* We could test all of the various optabs involved, but the fact of the
6604 matter is that (with the exception of i486 vs i586 and xadd) all targets
6605 that support any atomic operaton optab also implements compare-and-swap.
6606 Let optabs.c take care of expanding any compare-and-swap loop. */
6607 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
6610 gsi
= gsi_last_nondebug_bb (load_bb
);
6611 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6613 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6614 It only requires that the operation happen atomically. Thus we can
6615 use the RELAXED memory model. */
6616 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6617 fold_convert_loc (loc
, itype
, rhs
),
6618 build_int_cst (NULL
, mo
));
6620 if (need_old
|| need_new
)
6622 lhs
= need_old
? loaded_val
: stored_val
;
6623 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6624 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6627 call
= fold_convert_loc (loc
, void_type_node
, call
);
6628 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6629 gsi_remove (&gsi
, true);
6631 gsi
= gsi_last_nondebug_bb (store_bb
);
6632 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6633 gsi_remove (&gsi
, true);
6634 gsi
= gsi_last_nondebug_bb (store_bb
);
6635 stmt
= gsi_stmt (gsi
);
6636 gsi_remove (&gsi
, true);
6638 if (gimple_in_ssa_p (cfun
))
6640 release_defs (stmt
);
6641 update_ssa (TODO_update_ssa_no_phi
);
6647 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6651 newval = rhs; // with oldval replacing *addr in rhs
6652 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6653 if (oldval != newval)
6656 INDEX is log2 of the size of the data type, and thus usable to find the
6657 index of the builtin decl. */
6660 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6661 tree addr
, tree loaded_val
, tree stored_val
,
6664 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6665 tree type
, itype
, cmpxchg
, iaddr
, atype
;
6666 gimple_stmt_iterator si
;
6667 basic_block loop_header
= single_succ (load_bb
);
6670 enum built_in_function fncode
;
6672 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6673 order to use the RELAXED memory model effectively. */
6674 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6676 cmpxchg
= builtin_decl_explicit (fncode
);
6677 if (cmpxchg
== NULL_TREE
)
6679 type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
6681 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6683 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
6684 || !can_atomic_load_p (TYPE_MODE (itype
)))
6687 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6688 si
= gsi_last_nondebug_bb (load_bb
);
6689 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6691 /* For floating-point values, we'll need to view-convert them to integers
6692 so that we can perform the atomic compare and swap. Simplify the
6693 following code by always setting up the "i"ntegral variables. */
6694 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6698 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6702 = force_gimple_operand_gsi (&si
,
6703 fold_convert (TREE_TYPE (iaddr
), addr
),
6704 false, NULL_TREE
, true, GSI_SAME_STMT
);
6705 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6706 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6707 loadedi
= create_tmp_var (itype
);
6708 if (gimple_in_ssa_p (cfun
))
6709 loadedi
= make_ssa_name (loadedi
);
6714 loadedi
= loaded_val
;
6717 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6718 tree loaddecl
= builtin_decl_explicit (fncode
);
6721 = fold_convert (atype
,
6722 build_call_expr (loaddecl
, 2, iaddr
,
6723 build_int_cst (NULL_TREE
,
6724 MEMMODEL_RELAXED
)));
6728 = build_int_cst (build_pointer_type_for_mode (atype
, ptr_mode
,
6730 initial
= build2 (MEM_REF
, atype
, iaddr
, off
);
6734 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6737 /* Move the value to the LOADEDI temporary. */
6738 if (gimple_in_ssa_p (cfun
))
6740 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6741 phi
= create_phi_node (loadedi
, loop_header
);
6742 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6746 gsi_insert_before (&si
,
6747 gimple_build_assign (loadedi
, initial
),
6749 if (loadedi
!= loaded_val
)
6751 gimple_stmt_iterator gsi2
;
6754 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6755 gsi2
= gsi_start_bb (loop_header
);
6756 if (gimple_in_ssa_p (cfun
))
6759 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6760 true, GSI_SAME_STMT
);
6761 stmt
= gimple_build_assign (loaded_val
, x
);
6762 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6766 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6767 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6768 true, GSI_SAME_STMT
);
6771 gsi_remove (&si
, true);
6773 si
= gsi_last_nondebug_bb (store_bb
);
6774 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6777 storedi
= stored_val
;
6780 = force_gimple_operand_gsi (&si
,
6781 build1 (VIEW_CONVERT_EXPR
, itype
,
6782 stored_val
), true, NULL_TREE
, true,
6785 /* Build the compare&swap statement. */
6786 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6787 new_storedi
= force_gimple_operand_gsi (&si
,
6788 fold_convert (TREE_TYPE (loadedi
),
6791 true, GSI_SAME_STMT
);
6793 if (gimple_in_ssa_p (cfun
))
6797 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6798 stmt
= gimple_build_assign (old_vali
, loadedi
);
6799 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6801 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6802 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6805 /* Note that we always perform the comparison as an integer, even for
6806 floating point. This allows the atomic operation to properly
6807 succeed even with NaNs and -0.0. */
6808 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
6809 stmt
= gimple_build_cond_empty (ne
);
6810 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6813 e
= single_succ_edge (store_bb
);
6814 e
->flags
&= ~EDGE_FALLTHRU
;
6815 e
->flags
|= EDGE_FALSE_VALUE
;
6816 /* Expect no looping. */
6817 e
->probability
= profile_probability::guessed_always ();
6819 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6820 e
->probability
= profile_probability::guessed_never ();
6822 /* Copy the new value to loadedi (we already did that before the condition
6823 if we are not in SSA). */
6824 if (gimple_in_ssa_p (cfun
))
6826 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6827 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6830 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6831 gsi_remove (&si
, true);
6833 struct loop
*loop
= alloc_loop ();
6834 loop
->header
= loop_header
;
6835 loop
->latch
= store_bb
;
6836 add_loop (loop
, loop_header
->loop_father
);
6838 if (gimple_in_ssa_p (cfun
))
6839 update_ssa (TODO_update_ssa_no_phi
);
6844 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6846 GOMP_atomic_start ();
6850 The result is not globally atomic, but works so long as all parallel
6851 references are within #pragma omp atomic directives. According to
6852 responses received from omp@openmp.org, appears to be within spec.
6853 Which makes sense, since that's how several other compilers handle
6854 this situation as well.
6855 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6856 expanding. STORED_VAL is the operand of the matching
6857 GIMPLE_OMP_ATOMIC_STORE.
6860 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6864 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6869 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6870 tree addr
, tree loaded_val
, tree stored_val
)
6872 gimple_stmt_iterator si
;
6876 si
= gsi_last_nondebug_bb (load_bb
);
6877 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6879 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6880 t
= build_call_expr (t
, 0);
6881 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6883 tree mem
= build_simple_mem_ref (addr
);
6884 TREE_TYPE (mem
) = TREE_TYPE (loaded_val
);
6885 TREE_OPERAND (mem
, 1)
6886 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem
), ptr_mode
,
6888 TREE_OPERAND (mem
, 1));
6889 stmt
= gimple_build_assign (loaded_val
, mem
);
6890 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6891 gsi_remove (&si
, true);
6893 si
= gsi_last_nondebug_bb (store_bb
);
6894 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6896 stmt
= gimple_build_assign (unshare_expr (mem
), stored_val
);
6897 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6899 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6900 t
= build_call_expr (t
, 0);
6901 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6902 gsi_remove (&si
, true);
6904 if (gimple_in_ssa_p (cfun
))
6905 update_ssa (TODO_update_ssa_no_phi
);
6909 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6910 using expand_omp_atomic_fetch_op. If it failed, we try to
6911 call expand_omp_atomic_pipeline, and if it fails too, the
6912 ultimate fallback is wrapping the operation in a mutex
6913 (expand_omp_atomic_mutex). REGION is the atomic region built
6914 by build_omp_regions_1(). */
6917 expand_omp_atomic (struct omp_region
*region
)
6919 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6920 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6921 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6922 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6923 tree addr
= gimple_omp_atomic_load_rhs (load
);
6924 tree stored_val
= gimple_omp_atomic_store_val (store
);
6925 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
6926 HOST_WIDE_INT index
;
6928 /* Make sure the type is one of the supported sizes. */
6929 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6930 index
= exact_log2 (index
);
6931 if (index
>= 0 && index
<= 4)
6933 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6935 /* __sync builtins require strict data alignment. */
6936 if (exact_log2 (align
) >= index
)
6940 if (loaded_val
== stored_val
6941 && (is_int_mode (TYPE_MODE (type
), &smode
)
6942 || is_float_mode (TYPE_MODE (type
), &smode
))
6943 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6944 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6948 if ((is_int_mode (TYPE_MODE (type
), &smode
)
6949 || is_float_mode (TYPE_MODE (type
), &smode
))
6950 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
6951 && store_bb
== single_succ (load_bb
)
6952 && first_stmt (store_bb
) == store
6953 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6957 /* When possible, use specialized atomic update functions. */
6958 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6959 && store_bb
== single_succ (load_bb
)
6960 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6961 loaded_val
, stored_val
, index
))
6964 /* If we don't have specialized __sync builtins, try and implement
6965 as a compare and swap loop. */
6966 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6967 loaded_val
, stored_val
, index
))
6972 /* The ultimate fallback is wrapping the operation in a mutex. */
6973 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6976 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6980 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6981 basic_block region_exit
)
6983 struct loop
*outer
= region_entry
->loop_father
;
6984 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6986 /* Don't parallelize the kernels region if it contains more than one outer
6988 unsigned int nr_outer_loops
= 0;
6989 struct loop
*single_outer
= NULL
;
6990 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6992 gcc_assert (loop_outer (loop
) == outer
);
6994 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6997 if (region_exit
!= NULL
6998 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
7002 single_outer
= loop
;
7004 if (nr_outer_loops
!= 1)
7007 for (struct loop
*loop
= single_outer
->inner
;
7013 /* Mark the loops in the region. */
7014 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
7015 loop
->in_oacc_kernels_region
= true;
7018 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7020 struct GTY(()) grid_launch_attributes_trees
7022 tree kernel_dim_array_type
;
7023 tree kernel_lattrs_dimnum_decl
;
7024 tree kernel_lattrs_grid_decl
;
7025 tree kernel_lattrs_group_decl
;
7026 tree kernel_launch_attributes_type
;
7029 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
7031 /* Create types used to pass kernel launch attributes to target. */
7034 grid_create_kernel_launch_attr_types (void)
7036 if (grid_attr_trees
)
7038 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
7040 tree dim_arr_index_type
7041 = build_index_type (build_int_cst (integer_type_node
, 2));
7042 grid_attr_trees
->kernel_dim_array_type
7043 = build_array_type (uint32_type_node
, dim_arr_index_type
);
7045 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
7046 grid_attr_trees
->kernel_lattrs_dimnum_decl
7047 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
7049 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
7051 grid_attr_trees
->kernel_lattrs_grid_decl
7052 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
7053 grid_attr_trees
->kernel_dim_array_type
);
7054 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
7055 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
7056 grid_attr_trees
->kernel_lattrs_group_decl
7057 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
7058 grid_attr_trees
->kernel_dim_array_type
);
7059 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
7060 = grid_attr_trees
->kernel_lattrs_grid_decl
;
7061 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
7062 "__gomp_kernel_launch_attributes",
7063 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
7066 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7067 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7068 of type uint32_type_node. */
7071 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
7072 tree fld_decl
, int index
, tree value
)
7074 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
7075 build3 (COMPONENT_REF
,
7076 grid_attr_trees
->kernel_dim_array_type
,
7077 range_var
, fld_decl
, NULL_TREE
),
7078 build_int_cst (integer_type_node
, index
),
7079 NULL_TREE
, NULL_TREE
);
7080 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
7083 /* Return a tree representation of a pointer to a structure with grid and
7084 work-group size information. Statements filling that information will be
7085 inserted before GSI, TGT_STMT is the target statement which has the
7086 necessary information in it. */
7089 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
7090 gomp_target
*tgt_stmt
)
7092 grid_create_kernel_launch_attr_types ();
7093 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
7094 "__kernel_launch_attrs");
7096 unsigned max_dim
= 0;
7097 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
7099 clause
= OMP_CLAUSE_CHAIN (clause
))
7101 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
7104 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
7105 max_dim
= MAX (dim
, max_dim
);
7107 grid_insert_store_range_dim (gsi
, lattrs
,
7108 grid_attr_trees
->kernel_lattrs_grid_decl
,
7109 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
7110 grid_insert_store_range_dim (gsi
, lattrs
,
7111 grid_attr_trees
->kernel_lattrs_group_decl
,
7112 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
7115 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
7116 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
7117 gcc_checking_assert (max_dim
<= 2);
7118 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
7119 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
7121 TREE_ADDRESSABLE (lattrs
) = 1;
7122 return build_fold_addr_expr (lattrs
);
7125 /* Build target argument identifier from the DEVICE identifier, value
7126 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7129 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
7131 tree t
= build_int_cst (integer_type_node
, device
);
7132 if (subseqent_param
)
7133 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
7134 build_int_cst (integer_type_node
,
7135 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
7136 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
7137 build_int_cst (integer_type_node
, id
));
7141 /* Like above but return it in type that can be directly stored as an element
7142 of the argument array. */
7145 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
7147 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
7148 return fold_convert (ptr_type_node
, t
);
7151 /* Return a target argument consisting of DEVICE identifier, value identifier
7152 ID, and the actual VALUE. */
7155 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
7158 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
7159 fold_convert (integer_type_node
, value
),
7160 build_int_cst (unsigned_type_node
,
7161 GOMP_TARGET_ARG_VALUE_SHIFT
));
7162 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
7163 get_target_argument_identifier_1 (device
, false, id
));
7164 t
= fold_convert (ptr_type_node
, t
);
7165 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
7168 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7169 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7170 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7174 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
7175 int id
, tree value
, vec
<tree
> *args
)
7177 if (tree_fits_shwi_p (value
)
7178 && tree_to_shwi (value
) > -(1 << 15)
7179 && tree_to_shwi (value
) < (1 << 15))
7180 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
7183 args
->quick_push (get_target_argument_identifier (device
, true, id
));
7184 value
= fold_convert (ptr_type_node
, value
);
7185 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
7187 args
->quick_push (value
);
7191 /* Create an array of arguments that is then passed to GOMP_target. */
7194 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
7196 auto_vec
<tree
, 6> args
;
7197 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
7198 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
7200 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
7202 t
= integer_minus_one_node
;
7203 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
7204 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
7206 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
7208 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
7210 t
= integer_minus_one_node
;
7211 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
7212 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
7215 /* Add HSA-specific grid sizes, if available. */
7216 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7217 OMP_CLAUSE__GRIDDIM_
))
7219 int id
= GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
;
7220 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true, id
);
7221 args
.quick_push (t
);
7222 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
7225 /* Produce more, perhaps device specific, arguments here. */
7227 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
7228 args
.length () + 1),
7229 ".omp_target_args");
7230 for (unsigned i
= 0; i
< args
.length (); i
++)
7232 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7233 build_int_cst (integer_type_node
, i
),
7234 NULL_TREE
, NULL_TREE
);
7235 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
7238 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
7239 build_int_cst (integer_type_node
, args
.length ()),
7240 NULL_TREE
, NULL_TREE
);
7241 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
7243 TREE_ADDRESSABLE (argarray
) = 1;
7244 return build_fold_addr_expr (argarray
);
7247 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7250 expand_omp_target (struct omp_region
*region
)
7252 basic_block entry_bb
, exit_bb
, new_bb
;
7253 struct function
*child_cfun
;
7254 tree child_fn
, block
, t
;
7255 gimple_stmt_iterator gsi
;
7256 gomp_target
*entry_stmt
;
7259 bool offloaded
, data_region
;
7261 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
7262 new_bb
= region
->entry
;
7264 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
7265 switch (gimple_omp_target_kind (entry_stmt
))
7267 case GF_OMP_TARGET_KIND_REGION
:
7268 case GF_OMP_TARGET_KIND_UPDATE
:
7269 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7270 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7271 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7272 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7273 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7274 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7275 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7276 data_region
= false;
7278 case GF_OMP_TARGET_KIND_DATA
:
7279 case GF_OMP_TARGET_KIND_OACC_DATA
:
7280 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7287 child_fn
= NULL_TREE
;
7291 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
7292 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
7295 /* Supported by expand_omp_taskreg, but not here. */
7296 if (child_cfun
!= NULL
)
7297 gcc_checking_assert (!child_cfun
->cfg
);
7298 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
7300 entry_bb
= region
->entry
;
7301 exit_bb
= region
->exit
;
7303 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
7305 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
7307 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7308 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7309 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7310 DECL_ATTRIBUTES (child_fn
)
7311 = tree_cons (get_identifier ("oacc kernels"),
7312 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
7317 unsigned srcidx
, dstidx
, num
;
7319 /* If the offloading region needs data sent from the parent
7320 function, then the very first statement (except possible
7321 tree profile counter updates) of the offloading body
7322 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7323 &.OMP_DATA_O is passed as an argument to the child function,
7324 we need to replace it with the argument as seen by the child
7327 In most cases, this will end up being the identity assignment
7328 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7329 a function call that has been inlined, the original PARM_DECL
7330 .OMP_DATA_I may have been converted into a different local
7331 variable. In which case, we need to keep the assignment. */
7332 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
7335 basic_block entry_succ_bb
= single_succ (entry_bb
);
7336 gimple_stmt_iterator gsi
;
7338 gimple
*tgtcopy_stmt
= NULL
;
7339 tree sender
= TREE_VEC_ELT (data_arg
, 0);
7341 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
7343 gcc_assert (!gsi_end_p (gsi
));
7344 stmt
= gsi_stmt (gsi
);
7345 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7348 if (gimple_num_ops (stmt
) == 2)
7350 tree arg
= gimple_assign_rhs1 (stmt
);
7352 /* We're ignoring the subcode because we're
7353 effectively doing a STRIP_NOPS. */
7355 if (TREE_CODE (arg
) == ADDR_EXPR
7356 && TREE_OPERAND (arg
, 0) == sender
)
7358 tgtcopy_stmt
= stmt
;
7364 gcc_assert (tgtcopy_stmt
!= NULL
);
7365 arg
= DECL_ARGUMENTS (child_fn
);
7367 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
7368 gsi_remove (&gsi
, true);
7371 /* Declare local variables needed in CHILD_CFUN. */
7372 block
= DECL_INITIAL (child_fn
);
7373 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
7374 /* The gimplifier could record temporaries in the offloading block
7375 rather than in containing function's local_decls chain,
7376 which would mean cgraph missed finalizing them. Do it now. */
7377 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
7378 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
7379 varpool_node::finalize_decl (t
);
7380 DECL_SAVED_TREE (child_fn
) = NULL
;
7381 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7382 gimple_set_body (child_fn
, NULL
);
7383 TREE_USED (block
) = 1;
7385 /* Reset DECL_CONTEXT on function arguments. */
7386 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
7387 DECL_CONTEXT (t
) = child_fn
;
7389 /* Split ENTRY_BB at GIMPLE_*,
7390 so that it can be moved to the child function. */
7391 gsi
= gsi_last_nondebug_bb (entry_bb
);
7392 stmt
= gsi_stmt (gsi
);
7394 && gimple_code (stmt
) == gimple_code (entry_stmt
));
7395 e
= split_block (entry_bb
, stmt
);
7396 gsi_remove (&gsi
, true);
7398 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7400 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7403 gsi
= gsi_last_nondebug_bb (exit_bb
);
7404 gcc_assert (!gsi_end_p (gsi
)
7405 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7406 stmt
= gimple_build_return (NULL
);
7407 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7408 gsi_remove (&gsi
, true);
7411 /* Move the offloading region into CHILD_CFUN. */
7413 block
= gimple_block (entry_stmt
);
7415 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7417 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7418 /* When the OMP expansion process cannot guarantee an up-to-date
7419 loop tree arrange for the child function to fixup loops. */
7420 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7421 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7423 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7424 num
= vec_safe_length (child_cfun
->local_decls
);
7425 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7427 t
= (*child_cfun
->local_decls
)[srcidx
];
7428 if (DECL_CONTEXT (t
) == cfun
->decl
)
7430 if (srcidx
!= dstidx
)
7431 (*child_cfun
->local_decls
)[dstidx
] = t
;
7435 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7437 /* Inform the callgraph about the new function. */
7438 child_cfun
->curr_properties
= cfun
->curr_properties
;
7439 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7440 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7441 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7442 node
->parallelized_function
= 1;
7443 cgraph_node::add_new_function (child_fn
, true);
7445 /* Add the new function to the offload table. */
7446 if (ENABLE_OFFLOADING
)
7449 DECL_PRESERVE_P (child_fn
) = 1;
7450 vec_safe_push (offload_funcs
, child_fn
);
7453 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7454 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7456 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7457 fixed in a following pass. */
7458 push_cfun (child_cfun
);
7460 assign_assembler_name_if_needed (child_fn
);
7461 cgraph_edge::rebuild_edges ();
7463 /* Some EH regions might become dead, see PR34608. If
7464 pass_cleanup_cfg isn't the first pass to happen with the
7465 new child, these dead EH edges might cause problems.
7466 Clean them up now. */
7467 if (flag_exceptions
)
7470 bool changed
= false;
7472 FOR_EACH_BB_FN (bb
, cfun
)
7473 changed
|= gimple_purge_dead_eh_edges (bb
);
7475 cleanup_tree_cfg ();
7477 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7478 verify_loop_structure ();
7481 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7483 omp_any_child_fn_dumped
= true;
7484 dump_function_header (dump_file
, child_fn
, dump_flags
);
7485 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7488 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
7491 /* Emit a library call to launch the offloading region, or do data
7493 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7494 enum built_in_function start_ix
;
7495 location_t clause_loc
;
7496 unsigned int flags_i
= 0;
7498 switch (gimple_omp_target_kind (entry_stmt
))
7500 case GF_OMP_TARGET_KIND_REGION
:
7501 start_ix
= BUILT_IN_GOMP_TARGET
;
7503 case GF_OMP_TARGET_KIND_DATA
:
7504 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7506 case GF_OMP_TARGET_KIND_UPDATE
:
7507 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7509 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7510 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7512 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7513 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7514 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7516 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7517 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7518 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7520 case GF_OMP_TARGET_KIND_OACC_DATA
:
7521 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7522 start_ix
= BUILT_IN_GOACC_DATA_START
;
7524 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7525 start_ix
= BUILT_IN_GOACC_UPDATE
;
7527 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7528 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7530 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7531 start_ix
= BUILT_IN_GOACC_DECLARE
;
7537 clauses
= gimple_omp_target_clauses (entry_stmt
);
7539 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7540 library choose) and there is no conditional. */
7542 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7544 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7546 cond
= OMP_CLAUSE_IF_EXPR (c
);
7548 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7551 /* Even if we pass it to all library function calls, it is currently only
7552 defined/used for the OpenMP target ones. */
7553 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7554 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7555 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7556 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7558 device
= OMP_CLAUSE_DEVICE_ID (c
);
7559 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7562 clause_loc
= gimple_location (entry_stmt
);
7564 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7566 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7568 /* Ensure 'device' is of the correct type. */
7569 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7571 /* If we found the clause 'if (cond)', build
7572 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7575 cond
= gimple_boolify (cond
);
7577 basic_block cond_bb
, then_bb
, else_bb
;
7581 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7583 e
= split_block_after_labels (new_bb
);
7586 gsi
= gsi_last_nondebug_bb (new_bb
);
7588 e
= split_block (new_bb
, gsi_stmt (gsi
));
7594 then_bb
= create_empty_bb (cond_bb
);
7595 else_bb
= create_empty_bb (then_bb
);
7596 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7597 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7599 stmt
= gimple_build_cond_empty (cond
);
7600 gsi
= gsi_last_bb (cond_bb
);
7601 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7603 gsi
= gsi_start_bb (then_bb
);
7604 stmt
= gimple_build_assign (tmp_var
, device
);
7605 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7607 gsi
= gsi_start_bb (else_bb
);
7608 stmt
= gimple_build_assign (tmp_var
,
7609 build_int_cst (integer_type_node
,
7610 GOMP_DEVICE_HOST_FALLBACK
));
7611 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7613 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7614 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7615 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7616 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7617 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7618 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7621 gsi
= gsi_last_nondebug_bb (new_bb
);
7625 gsi
= gsi_last_nondebug_bb (new_bb
);
7626 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7627 true, GSI_SAME_STMT
);
7630 t
= gimple_omp_target_data_arg (entry_stmt
);
7633 t1
= size_zero_node
;
7634 t2
= build_zero_cst (ptr_type_node
);
7640 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7641 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7642 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7643 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7644 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7648 bool tagging
= false;
7649 /* The maximum number used by any start_ix, without varargs. */
7650 auto_vec
<tree
, 11> args
;
7651 args
.quick_push (device
);
7653 args
.quick_push (build_fold_addr_expr (child_fn
));
7654 args
.quick_push (t1
);
7655 args
.quick_push (t2
);
7656 args
.quick_push (t3
);
7657 args
.quick_push (t4
);
7660 case BUILT_IN_GOACC_DATA_START
:
7661 case BUILT_IN_GOACC_DECLARE
:
7662 case BUILT_IN_GOMP_TARGET_DATA
:
7664 case BUILT_IN_GOMP_TARGET
:
7665 case BUILT_IN_GOMP_TARGET_UPDATE
:
7666 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7667 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7668 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7670 depend
= OMP_CLAUSE_DECL (c
);
7672 depend
= build_int_cst (ptr_type_node
, 0);
7673 args
.quick_push (depend
);
7674 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7675 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7677 case BUILT_IN_GOACC_PARALLEL
:
7678 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
7681 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7682 case BUILT_IN_GOACC_UPDATE
:
7684 tree t_async
= NULL_TREE
;
7686 /* If present, use the value specified by the respective
7687 clause, making sure that is of the correct type. */
7688 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7690 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7692 OMP_CLAUSE_ASYNC_EXPR (c
));
7694 /* Default values for t_async. */
7695 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7697 build_int_cst (integer_type_node
,
7699 if (tagging
&& t_async
)
7701 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7703 if (TREE_CODE (t_async
) == INTEGER_CST
)
7705 /* See if we can pack the async arg in to the tag's
7707 i_async
= TREE_INT_CST_LOW (t_async
);
7708 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7709 t_async
= NULL_TREE
;
7711 i_async
= GOMP_LAUNCH_OP_MAX
;
7713 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7717 args
.safe_push (t_async
);
7719 /* Save the argument index, and ... */
7720 unsigned t_wait_idx
= args
.length ();
7721 unsigned num_waits
= 0;
7722 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7724 /* ... push a placeholder. */
7725 args
.safe_push (integer_zero_node
);
7727 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7728 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7730 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7732 OMP_CLAUSE_WAIT_EXPR (c
)));
7736 if (!tagging
|| num_waits
)
7740 /* Now that we know the number, update the placeholder. */
7742 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7744 len
= build_int_cst (integer_type_node
, num_waits
);
7745 len
= fold_convert_loc (gimple_location (entry_stmt
),
7746 unsigned_type_node
, len
);
7747 args
[t_wait_idx
] = len
;
7755 /* Push terminal marker - zero. */
7756 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7758 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7759 gimple_set_location (g
, gimple_location (entry_stmt
));
7760 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7764 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7765 gsi_remove (&gsi
, true);
7767 if (data_region
&& region
->exit
)
7769 gsi
= gsi_last_nondebug_bb (region
->exit
);
7771 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7772 gsi_remove (&gsi
, true);
7776 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7777 iteration variable derived from the thread number. INTRA_GROUP means this
7778 is an expansion of a loop iterating over work-items within a separate
7779 iteration over groups. */
7782 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7784 gimple_stmt_iterator gsi
;
7785 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7786 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7787 == GF_OMP_FOR_KIND_GRID_LOOP
);
7788 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7789 struct omp_for_data_loop
*loops
7790 = XALLOCAVEC (struct omp_for_data_loop
,
7791 gimple_omp_for_collapse (for_stmt
));
7792 struct omp_for_data fd
;
7794 remove_edge (BRANCH_EDGE (kfor
->entry
));
7795 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7797 gcc_assert (kfor
->cont
);
7798 omp_extract_for_data (for_stmt
, &fd
, loops
);
7800 gsi
= gsi_start_bb (body_bb
);
7802 for (size_t dim
= 0; dim
< collapse
; dim
++)
7805 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7806 if (POINTER_TYPE_P (type
))
7807 itype
= signed_type_for (type
);
7809 tree n1
= fd
.loops
[dim
].n1
;
7810 tree step
= fd
.loops
[dim
].step
;
7811 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7812 true, NULL_TREE
, true, GSI_SAME_STMT
);
7813 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7814 true, NULL_TREE
, true, GSI_SAME_STMT
);
7816 if (gimple_omp_for_grid_group_iter (for_stmt
))
7818 gcc_checking_assert (!intra_group
);
7819 threadid
= build_call_expr (builtin_decl_explicit
7820 (BUILT_IN_HSA_WORKGROUPID
), 1,
7821 build_int_cstu (unsigned_type_node
, dim
));
7823 else if (intra_group
)
7824 threadid
= build_call_expr (builtin_decl_explicit
7825 (BUILT_IN_HSA_WORKITEMID
), 1,
7826 build_int_cstu (unsigned_type_node
, dim
));
7828 threadid
= build_call_expr (builtin_decl_explicit
7829 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7830 build_int_cstu (unsigned_type_node
, dim
));
7831 threadid
= fold_convert (itype
, threadid
);
7832 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7833 true, GSI_SAME_STMT
);
7835 tree startvar
= fd
.loops
[dim
].v
;
7836 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7837 if (POINTER_TYPE_P (type
))
7838 t
= fold_build_pointer_plus (n1
, t
);
7840 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7841 t
= fold_convert (type
, t
);
7842 t
= force_gimple_operand_gsi (&gsi
, t
,
7844 && TREE_ADDRESSABLE (startvar
),
7845 NULL_TREE
, true, GSI_SAME_STMT
);
7846 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7847 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7849 /* Remove the omp for statement. */
7850 gsi
= gsi_last_nondebug_bb (kfor
->entry
);
7851 gsi_remove (&gsi
, true);
7853 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7854 gsi
= gsi_last_nondebug_bb (kfor
->cont
);
7855 gcc_assert (!gsi_end_p (gsi
)
7856 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7857 gsi_remove (&gsi
, true);
7859 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7860 gsi
= gsi_last_nondebug_bb (kfor
->exit
);
7861 gcc_assert (!gsi_end_p (gsi
)
7862 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7864 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7865 gsi_remove (&gsi
, true);
7867 /* Fixup the much simpler CFG. */
7868 remove_edge (find_edge (kfor
->cont
, body_bb
));
7870 if (kfor
->cont
!= body_bb
)
7871 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7872 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7875 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7878 struct grid_arg_decl_map
7884 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7885 pertaining to kernel function. */
7888 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7890 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7891 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7894 if (t
== adm
->old_arg
)
7896 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7900 /* If TARGET region contains a kernel body for loop, remove its region from the
7901 TARGET and expand it in HSA gridified kernel fashion. */
7904 grid_expand_target_grid_body (struct omp_region
*target
)
7906 if (!hsa_gen_requested_p ())
7909 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7910 struct omp_region
**pp
;
7912 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7913 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7916 struct omp_region
*gpukernel
= *pp
;
7918 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7921 /* HSA cannot handle OACC stuff. */
7922 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7924 gcc_checking_assert (orig_child_fndecl
);
7925 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7926 OMP_CLAUSE__GRIDDIM_
));
7927 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7929 hsa_register_kernel (n
);
7933 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7934 OMP_CLAUSE__GRIDDIM_
));
7936 = gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7937 *pp
= gpukernel
->next
;
7938 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7939 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7942 struct omp_region
*kfor
= *pp
;
7944 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7945 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7949 if (gimple_omp_for_grid_group_iter (for_stmt
))
7951 struct omp_region
**next_pp
;
7952 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7954 next_pp
= &(*pp
)->next
;
7955 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7957 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7958 gcc_assert (gimple_omp_for_kind (inner
)
7959 == GF_OMP_FOR_KIND_GRID_LOOP
);
7960 grid_expand_omp_for_loop (*pp
, true);
7965 expand_omp (kfor
->inner
);
7967 if (gpukernel
->inner
)
7968 expand_omp (gpukernel
->inner
);
7970 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7971 DECL_NAME (kern_fndecl
) = clone_function_name_numbered (kern_fndecl
,
7973 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7974 tree tgtblock
= gimple_block (tgt_stmt
);
7975 tree fniniblock
= make_node (BLOCK
);
7976 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = BLOCK_ORIGIN (tgtblock
);
7977 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7978 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7979 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7980 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7981 push_struct_function (kern_fndecl
);
7982 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7983 init_tree_ssa (cfun
);
7986 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7987 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7988 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7989 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7990 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7991 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7992 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7993 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7994 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7995 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7997 grid_expand_omp_for_loop (kfor
, false);
7999 /* Remove the omp for statement. */
8000 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (gpukernel
->entry
);
8001 gsi_remove (&gsi
, true);
8002 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8004 gsi
= gsi_last_nondebug_bb (gpukernel
->exit
);
8005 gcc_assert (!gsi_end_p (gsi
)
8006 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
8007 gimple
*ret_stmt
= gimple_build_return (NULL
);
8008 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
8009 gsi_remove (&gsi
, true);
8011 /* Statements in the first BB in the target construct have been produced by
8012 target lowering and must be copied inside the GPUKERNEL, with the two
8013 exceptions of the first OMP statement and the OMP_DATA assignment
8015 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
8016 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
8017 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
8018 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
8019 !gsi_end_p (tsi
); gsi_next (&tsi
))
8021 gimple
*stmt
= gsi_stmt (tsi
);
8022 if (is_gimple_omp (stmt
))
8025 && is_gimple_assign (stmt
)
8026 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
8027 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
8029 gimple
*copy
= gimple_copy (stmt
);
8030 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
8031 gimple_set_block (copy
, fniniblock
);
8034 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
8035 gpukernel
->exit
, inside_block
);
8037 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
8038 kcn
->mark_force_output ();
8039 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
8041 hsa_register_kernel (kcn
, orig_child
);
8043 cgraph_node::add_new_function (kern_fndecl
, true);
8044 push_cfun (kern_cfun
);
8045 cgraph_edge::rebuild_edges ();
8047 /* Re-map any mention of the PARM_DECL of the original function to the
8048 PARM_DECL of the new one.
8050 TODO: It would be great if lowering produced references into the GPU
8051 kernel decl straight away and we did not have to do this. */
8052 struct grid_arg_decl_map adm
;
8053 adm
.old_arg
= old_parm_decl
;
8054 adm
.new_arg
= new_parm_decl
;
8056 FOR_EACH_BB_FN (bb
, kern_cfun
)
8058 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
8060 gimple
*stmt
= gsi_stmt (gsi
);
8061 struct walk_stmt_info wi
;
8062 memset (&wi
, 0, sizeof (wi
));
8064 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
8072 /* Expand the parallel region tree rooted at REGION. Expansion
8073 proceeds in depth-first order. Innermost regions are expanded
8074 first. This way, parallel regions that require a new function to
8075 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8076 internal dependencies in their body. */
8079 expand_omp (struct omp_region
*region
)
8081 omp_any_child_fn_dumped
= false;
8084 location_t saved_location
;
8085 gimple
*inner_stmt
= NULL
;
8087 /* First, determine whether this is a combined parallel+workshare
8089 if (region
->type
== GIMPLE_OMP_PARALLEL
)
8090 determine_parallel_type (region
);
8091 else if (region
->type
== GIMPLE_OMP_TARGET
)
8092 grid_expand_target_grid_body (region
);
8094 if (region
->type
== GIMPLE_OMP_FOR
8095 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
8096 inner_stmt
= last_stmt (region
->inner
->entry
);
8099 expand_omp (region
->inner
);
8101 saved_location
= input_location
;
8102 if (gimple_has_location (last_stmt (region
->entry
)))
8103 input_location
= gimple_location (last_stmt (region
->entry
));
8105 switch (region
->type
)
8107 case GIMPLE_OMP_PARALLEL
:
8108 case GIMPLE_OMP_TASK
:
8109 expand_omp_taskreg (region
);
8112 case GIMPLE_OMP_FOR
:
8113 expand_omp_for (region
, inner_stmt
);
8116 case GIMPLE_OMP_SECTIONS
:
8117 expand_omp_sections (region
);
8120 case GIMPLE_OMP_SECTION
:
8121 /* Individual omp sections are handled together with their
8122 parent GIMPLE_OMP_SECTIONS region. */
8125 case GIMPLE_OMP_SINGLE
:
8126 expand_omp_single (region
);
8129 case GIMPLE_OMP_ORDERED
:
8131 gomp_ordered
*ord_stmt
8132 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
8133 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
8136 /* We'll expand these when expanding corresponding
8137 worksharing region with ordered(n) clause. */
8138 gcc_assert (region
->outer
8139 && region
->outer
->type
== GIMPLE_OMP_FOR
);
8140 region
->ord_stmt
= ord_stmt
;
8145 case GIMPLE_OMP_MASTER
:
8146 case GIMPLE_OMP_TASKGROUP
:
8147 case GIMPLE_OMP_CRITICAL
:
8148 case GIMPLE_OMP_TEAMS
:
8149 expand_omp_synch (region
);
8152 case GIMPLE_OMP_ATOMIC_LOAD
:
8153 expand_omp_atomic (region
);
8156 case GIMPLE_OMP_TARGET
:
8157 expand_omp_target (region
);
8164 input_location
= saved_location
;
8165 region
= region
->next
;
8167 if (omp_any_child_fn_dumped
)
8170 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
8171 omp_any_child_fn_dumped
= false;
8175 /* Helper for build_omp_regions. Scan the dominator tree starting at
8176 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8177 true, the function ends once a single tree is built (otherwise, whole
8178 forest of OMP constructs may be built). */
8181 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
8184 gimple_stmt_iterator gsi
;
8188 gsi
= gsi_last_nondebug_bb (bb
);
8189 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
8191 struct omp_region
*region
;
8192 enum gimple_code code
;
8194 stmt
= gsi_stmt (gsi
);
8195 code
= gimple_code (stmt
);
8196 if (code
== GIMPLE_OMP_RETURN
)
8198 /* STMT is the return point out of region PARENT. Mark it
8199 as the exit point and make PARENT the immediately
8200 enclosing region. */
8201 gcc_assert (parent
);
8204 parent
= parent
->outer
;
8206 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
8208 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8209 GIMPLE_OMP_RETURN, but matches with
8210 GIMPLE_OMP_ATOMIC_LOAD. */
8211 gcc_assert (parent
);
8212 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
8215 parent
= parent
->outer
;
8217 else if (code
== GIMPLE_OMP_CONTINUE
)
8219 gcc_assert (parent
);
8222 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
8224 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8225 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8229 region
= new_omp_region (bb
, code
, parent
);
8231 if (code
== GIMPLE_OMP_TARGET
)
8233 switch (gimple_omp_target_kind (stmt
))
8235 case GF_OMP_TARGET_KIND_REGION
:
8236 case GF_OMP_TARGET_KIND_DATA
:
8237 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8238 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8239 case GF_OMP_TARGET_KIND_OACC_DATA
:
8240 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8242 case GF_OMP_TARGET_KIND_UPDATE
:
8243 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8244 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8245 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8246 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8247 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8248 /* ..., other than for those stand-alone directives... */
8255 else if (code
== GIMPLE_OMP_ORDERED
8256 && omp_find_clause (gimple_omp_ordered_clauses
8257 (as_a
<gomp_ordered
*> (stmt
)),
8259 /* #pragma omp ordered depend is also just a stand-alone
8262 else if (code
== GIMPLE_OMP_TASK
8263 && gimple_omp_task_taskwait_p (stmt
))
8264 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8266 /* ..., this directive becomes the parent for a new region. */
8272 if (single_tree
&& !parent
)
8275 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
8277 son
= next_dom_son (CDI_DOMINATORS
, son
))
8278 build_omp_regions_1 (son
, parent
, single_tree
);
8281 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8285 build_omp_regions_root (basic_block root
)
8287 gcc_assert (root_omp_region
== NULL
);
8288 build_omp_regions_1 (root
, NULL
, true);
8289 gcc_assert (root_omp_region
!= NULL
);
8292 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8295 omp_expand_local (basic_block head
)
8297 build_omp_regions_root (head
);
8298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
8300 fprintf (dump_file
, "\nOMP region tree\n\n");
8301 dump_omp_region (dump_file
, root_omp_region
, 0);
8302 fprintf (dump_file
, "\n");
8305 remove_exit_barriers (root_omp_region
);
8306 expand_omp (root_omp_region
);
8308 omp_free_regions ();
8311 /* Scan the CFG and build a tree of OMP regions. Return the root of
8312 the OMP region tree. */
8315 build_omp_regions (void)
8317 gcc_assert (root_omp_region
== NULL
);
8318 calculate_dominance_info (CDI_DOMINATORS
);
8319 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
8322 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8325 execute_expand_omp (void)
8327 build_omp_regions ();
8329 if (!root_omp_region
)
8334 fprintf (dump_file
, "\nOMP region tree\n\n");
8335 dump_omp_region (dump_file
, root_omp_region
, 0);
8336 fprintf (dump_file
, "\n");
8339 remove_exit_barriers (root_omp_region
);
8341 expand_omp (root_omp_region
);
8343 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
8344 verify_loop_structure ();
8345 cleanup_tree_cfg ();
8347 omp_free_regions ();
8352 /* OMP expansion -- the default pass, run before creation of SSA form. */
8356 const pass_data pass_data_expand_omp
=
8358 GIMPLE_PASS
, /* type */
8359 "ompexp", /* name */
8360 OPTGROUP_OMP
, /* optinfo_flags */
8361 TV_NONE
, /* tv_id */
8362 PROP_gimple_any
, /* properties_required */
8363 PROP_gimple_eomp
, /* properties_provided */
8364 0, /* properties_destroyed */
8365 0, /* todo_flags_start */
8366 0, /* todo_flags_finish */
8369 class pass_expand_omp
: public gimple_opt_pass
8372 pass_expand_omp (gcc::context
*ctxt
)
8373 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
8376 /* opt_pass methods: */
8377 virtual unsigned int execute (function
*)
8379 bool gate
= ((flag_openacc
!= 0 || flag_openmp
!= 0
8380 || flag_openmp_simd
!= 0)
8383 /* This pass always runs, to provide PROP_gimple_eomp.
8384 But often, there is nothing to do. */
8388 return execute_expand_omp ();
8391 }; // class pass_expand_omp
8396 make_pass_expand_omp (gcc::context
*ctxt
)
8398 return new pass_expand_omp (ctxt
);
8403 const pass_data pass_data_expand_omp_ssa
=
8405 GIMPLE_PASS
, /* type */
8406 "ompexpssa", /* name */
8407 OPTGROUP_OMP
, /* optinfo_flags */
8408 TV_NONE
, /* tv_id */
8409 PROP_cfg
| PROP_ssa
, /* properties_required */
8410 PROP_gimple_eomp
, /* properties_provided */
8411 0, /* properties_destroyed */
8412 0, /* todo_flags_start */
8413 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8416 class pass_expand_omp_ssa
: public gimple_opt_pass
8419 pass_expand_omp_ssa (gcc::context
*ctxt
)
8420 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8423 /* opt_pass methods: */
8424 virtual bool gate (function
*fun
)
8426 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8428 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8429 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8431 }; // class pass_expand_omp_ssa
8436 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8438 return new pass_expand_omp_ssa (ctxt
);
8441 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8445 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8448 gimple
*last
= last_stmt (bb
);
8449 enum gimple_code code
= gimple_code (last
);
8450 struct omp_region
*cur_region
= *region
;
8451 bool fallthru
= false;
8455 case GIMPLE_OMP_PARALLEL
:
8456 case GIMPLE_OMP_FOR
:
8457 case GIMPLE_OMP_SINGLE
:
8458 case GIMPLE_OMP_TEAMS
:
8459 case GIMPLE_OMP_MASTER
:
8460 case GIMPLE_OMP_TASKGROUP
:
8461 case GIMPLE_OMP_CRITICAL
:
8462 case GIMPLE_OMP_SECTION
:
8463 case GIMPLE_OMP_GRID_BODY
:
8464 cur_region
= new_omp_region (bb
, code
, cur_region
);
8468 case GIMPLE_OMP_TASK
:
8469 cur_region
= new_omp_region (bb
, code
, cur_region
);
8471 if (gimple_omp_task_taskwait_p (last
))
8472 cur_region
= cur_region
->outer
;
8475 case GIMPLE_OMP_ORDERED
:
8476 cur_region
= new_omp_region (bb
, code
, cur_region
);
8478 if (omp_find_clause (gimple_omp_ordered_clauses
8479 (as_a
<gomp_ordered
*> (last
)),
8481 cur_region
= cur_region
->outer
;
8484 case GIMPLE_OMP_TARGET
:
8485 cur_region
= new_omp_region (bb
, code
, cur_region
);
8487 switch (gimple_omp_target_kind (last
))
8489 case GF_OMP_TARGET_KIND_REGION
:
8490 case GF_OMP_TARGET_KIND_DATA
:
8491 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8492 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8493 case GF_OMP_TARGET_KIND_OACC_DATA
:
8494 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8496 case GF_OMP_TARGET_KIND_UPDATE
:
8497 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8498 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8499 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8500 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8501 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8502 cur_region
= cur_region
->outer
;
8509 case GIMPLE_OMP_SECTIONS
:
8510 cur_region
= new_omp_region (bb
, code
, cur_region
);
8514 case GIMPLE_OMP_SECTIONS_SWITCH
:
8518 case GIMPLE_OMP_ATOMIC_LOAD
:
8519 case GIMPLE_OMP_ATOMIC_STORE
:
8523 case GIMPLE_OMP_RETURN
:
8524 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8525 somewhere other than the next block. This will be
8527 cur_region
->exit
= bb
;
8528 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8529 /* Add an edge corresponding to not scheduling the task
8531 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8532 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8533 cur_region
= cur_region
->outer
;
8536 case GIMPLE_OMP_CONTINUE
:
8537 cur_region
->cont
= bb
;
8538 switch (cur_region
->type
)
8540 case GIMPLE_OMP_FOR
:
8541 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8542 succs edges as abnormal to prevent splitting
8544 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8545 /* Make the loopback edge. */
8546 make_edge (bb
, single_succ (cur_region
->entry
),
8549 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8550 corresponds to the case that the body of the loop
8551 is not executed at all. */
8552 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8553 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8557 case GIMPLE_OMP_SECTIONS
:
8558 /* Wire up the edges into and out of the nested sections. */
8560 basic_block switch_bb
= single_succ (cur_region
->entry
);
8562 struct omp_region
*i
;
8563 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8565 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8566 make_edge (switch_bb
, i
->entry
, 0);
8567 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8570 /* Make the loopback edge to the block with
8571 GIMPLE_OMP_SECTIONS_SWITCH. */
8572 make_edge (bb
, switch_bb
, 0);
8574 /* Make the edge from the switch to exit. */
8575 make_edge (switch_bb
, bb
->next_bb
, 0);
8580 case GIMPLE_OMP_TASK
:
8593 if (*region
!= cur_region
)
8595 *region
= cur_region
;
8597 *region_idx
= cur_region
->entry
->index
;
8605 #include "gt-omp-expand.h"