2019-07-03 Jakub Jelinek <jakub@redhat.com>
+ * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
+ clause.
+ * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
+ OMP_CLAUSE__CONDTEMP_ as range's upper bound.
+ (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
+ * tree.c (omp_clause_num_ops, omp_clause_code_name): Add
+ OMP_CLAUSE__SCANTEMP_ entry.
+ (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
+ * tree-pretty-print.c (dump_omp_clause): Likewise.
+ * tree-nested.c (convert_nonlocal_omp_clauses,
+ convert_local_omp_clauses): Likewise.
+ * omp-general.h (struct omp_for_data): Add have_scantemp and
+ have_nonctrl_scantemp members.
+ * omp-general.c (omp_extract_for_data): Initialize them.
+ * omp-low.c (struct omp_context): Add scan_exclusive member.
+ (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
+ result again with GF_OMP_FOR_KIND_MASK. Initialize also
+ ctx->scan_exclusive.
+ (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
+ of !ctx->scan_inclusive.
+ (lower_rec_input_clauses): Simplify gimplification of dtors using
+ gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
+ rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
+ loops. Don't add barrier for reduction_omp_orig_ref if
+ ctx->scan_??xclusive.
+ (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
+ (lower_omp_scan): Use ctx->scan_exclusive instead
+ of !ctx->scan_inclusive. Handle worksharing loops with inscan
+ reductions. Use new_vard != new_var instead of repeated
+ omp_is_reference calls.
+ (omp_find_scan, lower_omp_for_scan): New functions.
+ (lower_omp_for): Call lower_omp_for_scan for worksharing loops with
+ inscan reductions.
+ * omp-expand.c (expand_omp_scantemp_alloc): New function.
+ (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
+ and fd->have_scantemp.
+
* gimplify.c (gimplify_scan_omp_clauses): For inscan reductions
on worksharing loop propagate it as shared clause to containing
combined parallel.
}
}
+/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
+ compute needed allocation size. If !ALLOC of team allocations,
+ if ALLOC of thread allocation. SZ is the initial needed size for
+ other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
+ CNT number of elements of each array, for !ALLOC this is
+ omp_get_num_threads (), for ALLOC number of iterations handled by the
+ current thread. If PTR is non-NULL, it is the start of the allocation
+ and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
+ clauses pointers to the corresponding arrays. */
+
+static tree
+expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
+ unsigned HOST_WIDE_INT alloc_align, tree cnt,
+ gimple_stmt_iterator *gsi, bool alloc)
+{
+ tree eltsz = NULL_TREE;
+ unsigned HOST_WIDE_INT preval = 0;
+ if (ptr && sz)
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+ ptr, size_int (sz));
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
+ && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
+ {
+ tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+ unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
+ if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+ {
+ unsigned HOST_WIDE_INT szl
+ = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
+ szl = least_bit_hwi (szl);
+ if (szl)
+ al = MIN (al, szl);
+ }
+ if (ptr == NULL_TREE)
+ {
+ if (eltsz == NULL_TREE)
+ eltsz = TYPE_SIZE_UNIT (pointee_type);
+ else
+ eltsz = size_binop (PLUS_EXPR, eltsz,
+ TYPE_SIZE_UNIT (pointee_type));
+ }
+ if (preval == 0 && al <= alloc_align)
+ {
+ unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
+ sz += diff;
+ if (diff && ptr)
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+ ptr, size_int (diff));
+ }
+ else if (al > preval)
+ {
+ if (ptr)
+ {
+ ptr = fold_convert (pointer_sized_int_node, ptr);
+ ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
+ build_int_cst (pointer_sized_int_node,
+ al - 1));
+ ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
+ build_int_cst (pointer_sized_int_node,
+ -(HOST_WIDE_INT) al));
+ ptr = fold_convert (ptr_type_node, ptr);
+ }
+ else
+ sz += al - 1;
+ }
+ if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+ preval = al;
+ else
+ preval = 1;
+ if (ptr)
+ {
+ expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
+ ptr = OMP_CLAUSE_DECL (c);
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
+ size_binop (MULT_EXPR, cnt,
+ TYPE_SIZE_UNIT (pointee_type)));
+ }
+ }
+
+ if (ptr == NULL_TREE)
+ {
+ eltsz = size_binop (MULT_EXPR, eltsz, cnt);
+ if (sz)
+ eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
+ return eltsz;
+ }
+ else
+ return ptr;
+}
+
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given
parameters:
struct omp_for_data *fd,
gimple *inner_stmt)
{
- tree n, q, s0, e0, e, t, tt, nthreads, threadid;
+ tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
tree type, itype, vmain, vback;
basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
basic_block body_bb, cont_bb, collapse_bb = NULL;
- basic_block fin_bb;
+ basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
+ basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
gimple_stmt_iterator gsi, gsip;
edge ep;
bool broken_loop = region->cont == NULL;
c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
cond_var = OMP_CLAUSE_DECL (c);
}
- if (fd->have_reductemp || fd->have_pointer_condtemp)
+ if (fd->have_reductemp
+ || fd->have_pointer_condtemp
+ || fd->have_nonctrl_scantemp)
{
tree t1 = build_int_cst (long_integer_type_node, 0);
tree t2 = build_int_cst (long_integer_type_node, 1);
gimple_stmt_iterator gsi2 = gsi_none ();
gimple *g = NULL;
tree mem = null_pointer_node, memv = NULL_TREE;
+ unsigned HOST_WIDE_INT condtemp_sz = 0;
+ unsigned HOST_WIDE_INT alloc_align = 0;
if (fd->have_reductemp)
{
+ gcc_assert (!fd->have_nonctrl_scantemp);
tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
reductions = OMP_CLAUSE_DECL (c);
gcc_assert (TREE_CODE (reductions) == SSA_NAME);
gsi2 = gsip;
reductions = null_pointer_node;
}
- if (fd->have_pointer_condtemp)
+ if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
{
- tree type = TREE_TYPE (condtemp);
+ tree type;
+ if (fd->have_pointer_condtemp)
+ type = TREE_TYPE (condtemp);
+ else
+ type = ptr_type_node;
memv = create_tmp_var (type);
TREE_ADDRESSABLE (memv) = 1;
- unsigned HOST_WIDE_INT sz
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
- sz *= fd->lastprivate_conditional;
- expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
- false);
+ unsigned HOST_WIDE_INT sz = 0;
+ tree size = NULL_TREE;
+ if (fd->have_pointer_condtemp)
+ {
+ sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+ sz *= fd->lastprivate_conditional;
+ condtemp_sz = sz;
+ }
+ if (fd->have_nonctrl_scantemp)
+ {
+ nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
+ gimple *g = gimple_build_call (nthreads, 0);
+ nthreads = create_tmp_var (integer_type_node);
+ gimple_call_set_lhs (g, nthreads);
+ gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
+ nthreads = fold_convert (sizetype, nthreads);
+ alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
+ size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
+ alloc_align, nthreads, NULL,
+ false);
+ size = fold_convert (type, size);
+ }
+ else
+ size = build_int_cst (type, sz);
+ expand_omp_build_assign (&gsi2, memv, size, false);
mem = build_fold_addr_expr (memv);
}
tree t
true, GSI_SAME_STMT);
if (fd->have_pointer_condtemp)
expand_omp_build_assign (&gsi2, condtemp, memv, false);
+ if (fd->have_nonctrl_scantemp)
+ {
+ tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
+ expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
+ alloc_align, nthreads, &gsi2, false);
+ }
if (fd->have_reductemp)
{
gsi_remove (&gsi2, true);
gsi = gsi_last_nondebug_bb (third_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+ if (fd->have_nonctrl_scantemp)
+ {
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ tree controlp = NULL_TREE, controlb = NULL_TREE;
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+ {
+ if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+ controlb = OMP_CLAUSE_DECL (c);
+ else
+ controlp = OMP_CLAUSE_DECL (c);
+ if (controlb && controlp)
+ break;
+ }
+ gcc_assert (controlp && controlb);
+ tree cnt = create_tmp_var (sizetype);
+ gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
+ tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
+ alloc_align, cnt, NULL, true);
+ tree size = create_tmp_var (sizetype);
+ expand_omp_build_assign (&gsi, size, sz, false);
+ tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
+ size, size_int (16384));
+ expand_omp_build_assign (&gsi, controlb, cmp);
+ g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ fourth_bb = split_block (third_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (fourth_bb);
+ /* FIXME: Once we have allocators, this should use allocator. */
+ g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
+ gimple_call_set_lhs (g, controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
+ &gsi, true);
+ gsi_prev (&gsi);
+ g = gsi_stmt (gsi);
+ fifth_bb = split_block (fourth_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (fifth_bb);
+
+ g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
+ gimple_call_set_lhs (g, controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__ALLOC (c))
+ {
+ tree tmp = create_tmp_var (sizetype);
+ tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+ g = gimple_build_assign (tmp, MULT_EXPR, cnt,
+ TYPE_SIZE_UNIT (pointee_type));
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ g = gimple_build_call (alloca_decl, 2, tmp,
+ size_int (TYPE_ALIGN (pointee_type)));
+ gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ }
+
+ sixth_bb = split_block (fifth_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (sixth_bb);
+ }
+
t = build2 (MULT_EXPR, itype, q, threadid);
t = build2 (PLUS_EXPR, itype, t, tt);
s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
- if (fd->have_reductemp || fd->have_pointer_condtemp)
+ if (fd->have_reductemp
+ || ((fd->have_pointer_condtemp || fd->have_scantemp)
+ && !fd->have_nonctrl_scantemp))
{
tree fn;
if (t)
gcall *g = gimple_build_call (fn, 0);
gsi_insert_after (&gsi, g, GSI_SAME_STMT);
}
+ if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
+ {
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ tree controlp = NULL_TREE, controlb = NULL_TREE;
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+ {
+ if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+ controlb = OMP_CLAUSE_DECL (c);
+ else
+ controlp = OMP_CLAUSE_DECL (c);
+ if (controlb && controlp)
+ break;
+ }
+ gcc_assert (controlp && controlb);
+ gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit1_bb = split_block (exit_bb, g)->dest;
+ gsi = gsi_after_labels (exit1_bb);
+ g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
+ controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit2_bb = split_block (exit1_bb, g)->dest;
+ gsi = gsi_after_labels (exit2_bb);
+ g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
+ controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit3_bb = split_block (exit2_bb, g)->dest;
+ gsi = gsi_after_labels (exit3_bb);
+ }
gsi_remove (&gsi, true);
/* Connect all the blocks. */
ep = find_edge (entry_bb, second_bb);
ep->flags = EDGE_TRUE_VALUE;
ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
- find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
- find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+ if (fourth_bb)
+ {
+ ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (third_bb, fourth_bb);
+ ep->flags = EDGE_TRUE_VALUE;
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (fourth_bb, fifth_bb);
+ redirect_edge_and_branch (ep, sixth_bb);
+ }
+ else
+ sixth_bb = third_bb;
+ find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
+ find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+ if (exit1_bb)
+ {
+ ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (exit_bb, exit1_bb);
+ ep->flags = EDGE_TRUE_VALUE;
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (exit1_bb, exit2_bb);
+ redirect_edge_and_branch (ep, exit3_bb);
+ }
if (!broken_loop)
{
set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
- set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
+ if (fourth_bb)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
+ set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
+ }
+ set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb));
set_immediate_dominator (CDI_DOMINATORS, fin_bb,
recompute_dominator (CDI_DOMINATORS, fin_bb));
+ if (exit1_bb)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
+ set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
+ }
struct loop *loop = body_bb->loop_father;
if (loop != entry_bb->loop_father)
fd->have_ordered = false;
fd->have_reductemp = false;
fd->have_pointer_condtemp = false;
+ fd->have_scantemp = false;
+ fd->have_nonctrl_scantemp = false;
fd->lastprivate_conditional = 0;
fd->tiling = NULL_TREE;
fd->collapse = 1;
if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
fd->have_pointer_condtemp = true;
break;
+ case OMP_CLAUSE__SCANTEMP_:
+ fd->have_scantemp = true;
+ if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
+ && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
+ fd->have_nonctrl_scantemp = true;
+ break;
default:
break;
}
int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */
int ordered;
bool have_nowait, have_ordered, simd_schedule, have_reductemp;
- bool have_pointer_condtemp;
+ bool have_pointer_condtemp, have_scantemp, have_nonctrl_scantemp;
int lastprivate_conditional;
unsigned char sched_modifiers;
enum omp_clause_schedule_kind sched_kind;
/* True if there is nested scan context with inclusive clause. */
bool scan_inclusive;
+
+ /* True if there is nested scan context with exclusive clause. */
+ bool scan_exclusive;
};
static splay_tree all_contexts;
break;
case GIMPLE_OMP_FOR:
- if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
- & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD)
+ if ((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
+ == GF_OMP_FOR_KIND_SIMD)
&& omp_maybe_offloaded_ctx (ctx)
&& omp_max_simt_vf ())
scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx);
case GIMPLE_OMP_SCAN:
if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
- if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
- ctx->scan_inclusive = true;
+ {
+ if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
+ ctx->scan_inclusive = true;
+ else if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_EXCLUSIVE)
+ ctx->scan_exclusive = true;
+ }
/* FALLTHRU */
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
sctx->lastlane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (*rvar) = 1;
- if (!ctx->scan_inclusive)
+ if (ctx->scan_exclusive)
{
/* And for exclusive scan yet another one, which will
hold the value during the scan phase. */
lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
omp_context *ctx, struct omp_for_data *fd)
{
- tree c, dtor, copyin_seq, x, ptr;
+ tree c, copyin_seq, x, ptr;
bool copyin_by_ref = false;
bool lastprivate_firstprivate = false;
bool reduction_omp_orig_ref = false;
x = lang_hooks.decls.omp_clause_dtor
(c, build_simple_mem_ref (y2));
if (x)
- {
- gimple_seq tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
}
}
else
{
y = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (y)
- {
- gimple_seq tseq = NULL;
-
- dtor = y;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (y, &llist[1]);
}
break;
}
do_dtor:
x = lang_hooks.decls.omp_clause_dtor (c, new_var);
if (x)
- {
- gimple_seq tseq = NULL;
-
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
break;
case OMP_CLAUSE_LINEAR:
gimplify_and_add (x, &llist[0]);
x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x)
- {
- gimple_seq tseq = NULL;
-
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (x, &llist[1]);
break;
}
if (omp_is_reference (var))
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
}
tree ref = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (x, &llist[1]);
tree ivar2 = unshare_expr (lvar);
TREE_OPERAND (ivar2, 1) = sctx.idx;
x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (x, &llist[1]);
if (rvar2)
{
x = lang_hooks.decls.omp_clause_dtor (c, rvar2);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (x, &llist[1]);
}
break;
}
build_fold_addr_expr (lvar));
x = lang_hooks.decls.omp_clause_dtor (c, ivar);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (&llist[1], tseq);
- }
+ gimplify_and_add (x, &llist[1]);
break;
}
/* If this is a reference to constant size reduction var
if (x)
gimplify_and_add (x, ilist);
- if (rvarp)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
{
- if (x)
+ if (x || (!is_simd
+ && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)))
{
tree nv = create_tmp_var_raw (TREE_TYPE (new_var));
gimple_add_tmp_var (nv);
ctx->cb.decl_map->put (new_vard, nv);
x = lang_hooks.decls.omp_clause_default_ctor
(c, nv, build_outer_var_ref (var, ctx));
- gimplify_and_add (x, ilist);
+ if (x)
+ gimplify_and_add (x, ilist);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
gimple_seq_add_seq (ilist, tseq);
}
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
- if (!ctx->scan_inclusive)
+ if (is_simd && ctx->scan_exclusive)
{
tree nv2
= create_tmp_var_raw (TREE_TYPE (new_var));
gimplify_and_add (x, ilist);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
}
x = lang_hooks.decls.omp_clause_dtor (c, nv);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
}
- else if (!ctx->scan_inclusive
+ else if (is_simd
+ && ctx->scan_exclusive
&& TREE_ADDRESSABLE (TREE_TYPE (new_var)))
{
tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var));
ctx->cb.decl_map->put (new_vard, nv2);
x = lang_hooks.decls.omp_clause_dtor (c, nv2);
if (x)
- {
- tseq = NULL;
- dtor = x;
- gimplify_stmt (&dtor, &tseq);
- gimple_seq_add_seq (dlist, tseq);
- }
+ gimplify_and_add (x, dlist);
}
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
goto do_dtor;
{
if (omp_is_reference (var) && is_simd)
handle_simd_reference (clause_loc, new_vard, ilist);
- if (rvarp)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
break;
gimplify_assign (new_var, x, ilist);
if (is_simd)
lastprivate clauses we need to ensure the lastprivate copying
happens after firstprivate copying in all threads. And similarly
for UDRs if initializer expression refers to omp_orig. */
- if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
+ if (copyin_by_ref || lastprivate_firstprivate
+ || (reduction_omp_orig_ref
+ && !ctx->scan_inclusive
+ && !ctx->scan_exclusive))
{
/* Don't add any barrier for #pragma omp simd or
#pragma omp distribute. */
&& gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
return;
+ /* inscan reductions are handled elsewhere. */
+ if (ctx->scan_inclusive || ctx->scan_exclusive)
+ return;
+
/* First see if there is exactly one reduction clause. Use OMP_ATOMIC
update in that case, otherwise use a lock. */
for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
gimple_seq before = NULL;
omp_context *octx = ctx->outer;
gcc_assert (octx);
- if (!octx->scan_inclusive && !has_clauses)
+ if (octx->scan_exclusive && !has_clauses)
{
gimple_stmt_iterator gsi2 = *gsi_p;
gsi_next (&gsi2);
}
bool input_phase = has_clauses ^ octx->scan_inclusive;
- if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
- && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
- && !gimple_omp_for_combined_into_p (octx->stmt))
+ bool is_simd = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
+ && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
+ && !gimple_omp_for_combined_into_p (octx->stmt));
+ bool is_for = (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
+ && gimple_omp_for_kind (octx->stmt) == GF_OMP_FOR_KIND_FOR
+ && !gimple_omp_for_combined_p (octx->stmt));
+ if (is_simd)
+ if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
+ OMP_CLAUSE__SIMDUID_))
+ {
+ tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
+ lane = create_tmp_var (unsigned_type_node);
+ tree t = build_int_cst (integer_type_node,
+ input_phase ? 1
+ : octx->scan_inclusive ? 2 : 3);
+ gimple *g
+ = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
+ gimple_call_set_lhs (g, lane);
+ gimple_seq_add_stmt (&before, g);
+ }
+
+ if (is_simd || is_for)
{
- if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
- OMP_CLAUSE__SIMDUID_))
- {
- tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
- lane = create_tmp_var (unsigned_type_node);
- tree t = build_int_cst (integer_type_node,
- input_phase ? 1
- : octx->scan_inclusive ? 2 : 3);
- gimple *g
- = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
- gimple_call_set_lhs (g, lane);
- gimple_seq_add_stmt (&before, g);
- }
for (tree c = gimple_omp_for_clauses (octx->stmt);
c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
if (DECL_HAS_VALUE_EXPR_P (new_vard))
{
val = DECL_VALUE_EXPR (new_vard);
- if (omp_is_reference (var))
+ if (new_vard != new_var)
{
gcc_assert (TREE_CODE (val) == ADDR_EXPR);
val = TREE_OPERAND (val, 0);
lane0 = TREE_OPERAND (val, 1);
TREE_OPERAND (val, 1) = lane;
var2 = lookup_decl (v, octx);
- if (!octx->scan_inclusive)
+ if (octx->scan_exclusive)
var4 = lookup_decl (var2, octx);
if (input_phase
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
var2 = build4 (ARRAY_REF, TREE_TYPE (val),
var2, lane, NULL_TREE, NULL_TREE);
TREE_THIS_NOTRAP (var2) = 1;
- if (!octx->scan_inclusive)
+ if (octx->scan_exclusive)
{
var4 = build4 (ARRAY_REF, TREE_TYPE (val),
var4, lane, NULL_TREE,
var3 = maybe_lookup_decl (new_vard, octx);
if (var3 == new_vard || var3 == NULL_TREE)
var3 = NULL_TREE;
- else if (!octx->scan_inclusive && !input_phase)
+ else if (is_simd && octx->scan_exclusive && !input_phase)
{
var4 = maybe_lookup_decl (var3, octx);
if (var4 == var3 || var4 == NULL_TREE)
}
}
}
- if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE)
+ if (is_simd
+ && octx->scan_exclusive
+ && !input_phase
+ && var4 == NULL_TREE)
var4 = create_tmp_var (TREE_TYPE (val));
}
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
/* Otherwise, assign to it the identity element. */
gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ if (is_for)
+ tseq = copy_gimple_seq_and_replace_locals (tseq);
tree ref = build_outer_var_ref (var, octx);
tree x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
if (x)
{
- if (omp_is_reference (var))
+ if (new_vard != new_var)
val = build_fold_addr_expr_loc (clause_loc, val);
SET_DECL_VALUE_EXPR (new_vard, val);
}
SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
gimple_seq_add_seq (&before, tseq);
- OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ if (is_simd)
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
}
}
- else
+ else if (is_simd)
{
tree x;
- if (!octx->scan_inclusive)
+ if (octx->scan_exclusive)
{
tree v4 = unshare_expr (var4);
tree v2 = unshare_expr (var2);
x = (DECL_HAS_VALUE_EXPR_P (new_vard)
? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
tree vexpr = val;
- if (x && omp_is_reference (var))
+ if (x && new_vard != new_var)
vexpr = build_fold_addr_expr_loc (clause_loc, val);
if (x)
SET_DECL_VALUE_EXPR (new_vard, vexpr);
tree x = omp_reduction_init (c, TREE_TYPE (new_var));
gimplify_assign (val, x, &before);
}
- else
+ else if (is_simd)
{
/* scan phase. */
enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
}
}
}
- if (!octx->scan_inclusive && !input_phase && lane0)
+ if (octx->scan_exclusive && !input_phase && lane0)
{
tree vexpr = unshare_expr (var4);
TREE_OPERAND (vexpr, 1) = lane0;
- if (omp_is_reference (var))
+ if (new_vard != new_var)
vexpr = build_fold_addr_expr_loc (clause_loc, vexpr);
SET_DECL_VALUE_EXPR (new_vard, vexpr);
}
else if (has_clauses)
sorry_at (gimple_location (stmt),
"%<#pragma omp scan%> not supported yet");
- gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
- gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
- gsi_replace (gsi_p, gimple_build_nop (), true);
+ if (!is_for)
+ {
+ gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
+ gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
+ gsi_replace (gsi_p, gimple_build_nop (), true);
+ }
+ else if (before)
+ {
+ gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (stmt));
+ gsi_insert_seq_before (&gsi, before, GSI_SAME_STMT);
+ }
}
}
}
+/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
+
+tree
+omp_find_scan (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
+ struct walk_stmt_info *wi)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+
+ *handled_ops_p = true;
+ switch (gimple_code (stmt))
+ {
+ WALK_SUBSTMTS;
+
+ case GIMPLE_OMP_SCAN:
+ *(gimple_stmt_iterator *) (wi->info) = *gsi_p;
+ return integer_zero_node;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+/* Helper function for lower_omp_for, add transformations for a worksharing
+ loop with scan directives inside of it.
+ For worksharing loop not combined with simd, transform:
+ #pragma omp for reduction(inscan,+:r) private(i)
+ for (i = 0; i < n; i = i + 1)
+ {
+ {
+ update (r);
+ }
+ #pragma omp scan inclusive(r)
+ {
+ use (r);
+ }
+ }
+
+ into two worksharing loops + code to merge results:
+
+ num_threads = omp_get_num_threads ();
+ thread_num = omp_get_thread_num ();
+ if (thread_num == 0) goto <D.2099>; else goto <D.2100>;
+ <D.2099>:
+ var2 = r;
+ goto <D.2101>;
+ <D.2100>:
+ // For UDRs this is UDR init, or if ctors are needed, copy from
+ // var3 that has been constructed to contain the neutral element.
+ var2 = 0;
+ <D.2101>:
+ ivar = 0;
+ // The _scantemp_ clauses will arrange for rpriva to be initialized to
+ // a shared array with num_threads elements and rprivb to a local array
+ // number of elements equal to the number of (contiguous) iterations the
+ // current thread will perform. controlb and controlp variables are
+ // temporaries to handle deallocation of rprivb at the end of second
+ // GOMP_FOR.
+ #pragma omp for _scantemp_(rpriva) _scantemp_(rprivb) _scantemp_(controlb) \
+ _scantemp_(controlp) reduction(inscan,+:r) private(i) nowait
+ for (i = 0; i < n; i = i + 1)
+ {
+ {
+ // For UDRs this is UDR init or copy from var3.
+ r = 0;
+ // This is the input phase from user code.
+ update (r);
+ }
+ {
+ // For UDRs this is UDR merge.
+ var2 = var2 + r;
+ // Rather than handing it over to the user, save to local thread's
+ // array.
+ rprivb[ivar] = var2;
+ // For exclusive scan, the above two statements are swapped.
+ ivar = ivar + 1;
+ }
+ }
+ // And remember the final value from this thread's into the shared
+ // rpriva array.
+ rpriva[(sizetype) thread_num] = var2;
+ // If more than one thread, compute using Work-Efficient prefix sum
+ // the inclusive parallel scan of the rpriva array.
+ if (num_threads > 1) goto <D.2102>; else goto <D.2103>;
+ <D.2102>:
+ GOMP_barrier ();
+ down = 0;
+ k = 1;
+ num_threadsu = (unsigned int) num_threads;
+ thread_numup1 = (unsigned int) thread_num + 1;
+ <D.2108>:
+ twok = k << 1;
+ if (twok > num_threadsu) goto <D.2110>; else goto <D.2111>;
+ <D.2110>:
+ down = 4294967295;
+ k = k >> 1;
+ if (k == num_threadsu) goto <D.2112>; else goto <D.2111>;
+ <D.2112>:
+ k = k >> 1;
+ <D.2111>:
+ twok = k << 1;
+ cplx = .MUL_OVERFLOW (thread_nump1, twok);
+ mul = REALPART_EXPR <cplx>;
+ ovf = IMAGPART_EXPR <cplx>;
+ if (ovf == 0) goto <D.2116>; else goto <D.2117>;
+ <D.2116>:
+ andv = k & down;
+ andvm1 = andv + 4294967295;
+ l = mul + andvm1;
+ if (l < num_threadsu) goto <D.2120>; else goto <D.2117>;
+ <D.2120>:
+ // For UDRs this is UDR merge, performed using var2 variable as temporary,
+ // i.e. var2 = rpriva[l - k]; UDR merge (var2, rpriva[l]); rpriva[l] = var2;
+ rpriva[l] = rpriva[l - k] + rpriva[l];
+ <D.2117>:
+ if (down == 0) goto <D.2121>; else goto <D.2122>;
+ <D.2121>:
+ k = k << 1;
+ goto <D.2123>;
+ <D.2122>:
+ k = k >> 1;
+ <D.2123>:
+ GOMP_barrier ();
+ if (k != 0) goto <D.2108>; else goto <D.2103>;
+ <D.2103>:
+ if (thread_num == 0) goto <D.2124>; else goto <D.2125>;
+ <D.2124>:
+ // For UDRs this is UDR init or copy from var3.
+ var2 = 0;
+ goto <D.2126>;
+ <D.2125>:
+ var2 = rpriva[thread_num - 1];
+ <D.2126>:
+ ivar = 0;
+ #pragma omp for _scantemp_(controlb) _scantemp_(controlp) \
+ reduction(inscan,+:r) private(i)
+ for (i = 0; i < n; i = i + 1)
+ {
+ {
+ // For UDRs, this is UDR merge (rprivb[ivar], var2); r = rprivb[ivar];
+ r = rprivb[ivar] + var2;
+ }
+ {
+ // This is the scan phase from user code.
+ use (r);
+ // Plus a bump of the iterator.
+ ivar = ivar + 1;
+ }
+ } */
+
+static void
+lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
+ struct omp_for_data *fd, omp_context *ctx)
+{
+ gcc_assert (ctx->scan_inclusive || ctx->scan_exclusive);
+
+ gimple_seq body = gimple_omp_body (stmt);
+ gimple_stmt_iterator input1_gsi = gsi_none ();
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.val_only = true;
+ wi.info = (void *) &input1_gsi;
+ walk_gimple_seq_mod (&body, omp_find_scan, NULL, &wi);
+ gcc_assert (!gsi_end_p (input1_gsi));
+
+ gimple *input_stmt1 = gsi_stmt (input1_gsi);
+ gimple_stmt_iterator gsi = input1_gsi;
+ gsi_next (&gsi);
+ gimple_stmt_iterator scan1_gsi = gsi;
+ gimple *scan_stmt1 = gsi_stmt (gsi);
+ gcc_assert (scan_stmt1 && gimple_code (scan_stmt1) == GIMPLE_OMP_SCAN);
+
+ gimple_seq input_body = gimple_omp_body (input_stmt1);
+ gimple_seq scan_body = gimple_omp_body (scan_stmt1);
+ gimple_omp_set_body (input_stmt1, NULL);
+ gimple_omp_set_body (scan_stmt1, NULL);
+ gimple_omp_set_body (stmt, NULL);
+
+ gomp_for *new_stmt = as_a <gomp_for *> (gimple_copy (stmt));
+ gimple_seq new_body = copy_gimple_seq_and_replace_locals (body);
+ gimple_omp_set_body (stmt, body);
+ gimple_omp_set_body (input_stmt1, input_body);
+
+ gimple_stmt_iterator input2_gsi = gsi_none ();
+ memset (&wi, 0, sizeof (wi));
+ wi.val_only = true;
+ wi.info = (void *) &input2_gsi;
+ walk_gimple_seq_mod (&new_body, omp_find_scan, NULL, &wi);
+ gcc_assert (!gsi_end_p (input2_gsi));
+
+ gimple *input_stmt2 = gsi_stmt (input2_gsi);
+ gsi = input2_gsi;
+ gsi_next (&gsi);
+ gimple_stmt_iterator scan2_gsi = gsi;
+ gimple *scan_stmt2 = gsi_stmt (gsi);
+ gcc_assert (scan_stmt2 && gimple_code (scan_stmt2) == GIMPLE_OMP_SCAN);
+ gimple_omp_set_body (scan_stmt2, scan_body);
+
+ tree num_threads = create_tmp_var (integer_type_node);
+ tree thread_num = create_tmp_var (integer_type_node);
+ tree nthreads_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
+ tree threadnum_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
+ gimple *g = gimple_build_call (nthreads_decl, 0);
+ gimple_call_set_lhs (g, num_threads);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_call (threadnum_decl, 0);
+ gimple_call_set_lhs (g, thread_num);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree ivar = create_tmp_var (sizetype);
+ tree new_clauses1 = NULL_TREE, new_clauses2 = NULL_TREE;
+ tree *cp1 = &new_clauses1, *cp2 = &new_clauses2;
+ tree k = create_tmp_var (unsigned_type_node);
+ tree l = create_tmp_var (unsigned_type_node);
+
+ gimple_seq clist = NULL, mdlist = NULL;
+ gimple_seq thr01_list = NULL, thrn1_list = NULL;
+ gimple_seq thr02_list = NULL, thrn2_list = NULL;
+ gimple_seq scan1_list = NULL, input2_list = NULL;
+ gimple_seq last_list = NULL, reduc_list = NULL;
+ for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
+ {
+ location_t clause_loc = OMP_CLAUSE_LOCATION (c);
+ tree var = OMP_CLAUSE_DECL (c);
+ tree new_var = lookup_decl (var, ctx);
+ tree var3 = NULL_TREE;
+ tree new_vard = new_var;
+ if (omp_is_reference (var))
+ new_var = build_simple_mem_ref_loc (clause_loc, new_var);
+ if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ {
+ var3 = maybe_lookup_decl (new_vard, ctx);
+ if (var3 == new_vard)
+ var3 = NULL_TREE;
+ }
+
+ tree ptype = build_pointer_type (TREE_TYPE (new_var));
+ tree rpriva = create_tmp_var (ptype);
+ tree nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = rpriva;
+ *cp1 = nc;
+ cp1 = &OMP_CLAUSE_CHAIN (nc);
+
+ tree rprivb = create_tmp_var (ptype);
+ nc = build_omp_clause (clause_loc, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = rprivb;
+ OMP_CLAUSE__SCANTEMP__ALLOC (nc) = 1;
+ *cp1 = nc;
+ cp1 = &OMP_CLAUSE_CHAIN (nc);
+
+ tree var2 = create_tmp_var_raw (TREE_TYPE (new_var));
+ if (new_vard != new_var)
+ TREE_ADDRESSABLE (var2) = 1;
+ gimple_add_tmp_var (var2);
+
+ tree x = fold_convert_loc (clause_loc, sizetype, thread_num);
+ x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
+ TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
+ x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
+ tree rpriva_ref = build_simple_mem_ref_loc (clause_loc, x);
+
+ x = fold_build2_loc (clause_loc, PLUS_EXPR, integer_type_node,
+ thread_num, integer_minus_one_node);
+ x = fold_convert_loc (clause_loc, sizetype, x);
+ x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
+ TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
+ x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
+ tree rprivam1_ref = build_simple_mem_ref_loc (clause_loc, x);
+
+ x = fold_convert_loc (clause_loc, sizetype, l);
+ x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
+ TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
+ x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
+ tree rprival_ref = build_simple_mem_ref_loc (clause_loc, x);
+
+ x = fold_build2_loc (clause_loc, MINUS_EXPR, unsigned_type_node, l, k);
+ x = fold_convert_loc (clause_loc, sizetype, x);
+ x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, x,
+ TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
+ x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rpriva), rpriva, x);
+ tree rprivalmk_ref = build_simple_mem_ref_loc (clause_loc, x);
+
+ x = fold_build2_loc (clause_loc, MULT_EXPR, sizetype, ivar,
+ TYPE_SIZE_UNIT (TREE_TYPE (ptype)));
+ x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (rprivb), rprivb, x);
+ tree rprivb_ref = build_simple_mem_ref_loc (clause_loc, x);
+
+ if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ {
+ tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
+ tree val = var2;
+ if (new_vard != new_var)
+ val = build_fold_addr_expr_loc (clause_loc, val);
+
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, var2, build_outer_var_ref (var, ctx));
+ if (x)
+ gimplify_and_add (x, &clist);
+
+ x = build_outer_var_ref (var, ctx);
+ x = lang_hooks.decls.omp_clause_assign_op (c, var2, x);
+ gimplify_and_add (x, &thr01_list);
+
+ tree y = (DECL_HAS_VALUE_EXPR_P (new_vard)
+ ? DECL_VALUE_EXPR (new_vard) : NULL_TREE);
+ if (var3)
+ {
+ x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
+ gimplify_and_add (x, &thrn1_list);
+ x = lang_hooks.decls.omp_clause_assign_op (c, var2, var3);
+ gimplify_and_add (x, &thr02_list);
+ }
+ else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+ {
+ /* Otherwise, assign to it the identity element. */
+ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ tseq = copy_gimple_seq_and_replace_locals (tseq);
+ SET_DECL_VALUE_EXPR (new_vard, val);
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+ SET_DECL_VALUE_EXPR (placeholder, error_mark_node);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+ lower_omp (&tseq, ctx);
+ gimple_seq_add_seq (&thrn1_list, tseq);
+ tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ lower_omp (&tseq, ctx);
+ gimple_seq_add_seq (&thr02_list, tseq);
+ SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ if (y)
+ SET_DECL_VALUE_EXPR (new_vard, y);
+ else
+ {
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
+ SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
+ }
+ }
+
+ x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivam1_ref);
+ gimplify_and_add (x, &thrn2_list);
+
+ if (ctx->scan_exclusive)
+ {
+ x = unshare_expr (rprivb_ref);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
+ gimplify_and_add (x, &scan1_list);
+ }
+
+ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+ tseq = copy_gimple_seq_and_replace_locals (tseq);
+ SET_DECL_VALUE_EXPR (placeholder, var2);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+ lower_omp (&tseq, ctx);
+ gimple_seq_add_seq (&scan1_list, tseq);
+
+ if (ctx->scan_inclusive)
+ {
+ x = unshare_expr (rprivb_ref);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
+ gimplify_and_add (x, &scan1_list);
+ }
+
+ x = unshare_expr (rpriva_ref);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, var2);
+ gimplify_and_add (x, &mdlist);
+
+ tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+ tseq = copy_gimple_seq_and_replace_locals (tseq);
+ SET_DECL_VALUE_EXPR (new_vard, val);
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+ SET_DECL_VALUE_EXPR (placeholder, rprivb_ref);
+ lower_omp (&tseq, ctx);
+ if (y)
+ SET_DECL_VALUE_EXPR (new_vard, y);
+ else
+ {
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
+ SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
+ }
+ gimple_seq_add_seq (&input2_list, tseq);
+
+ x = unshare_expr (new_var);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, rprivb_ref);
+ gimplify_and_add (x, &input2_list);
+
+ x = build_outer_var_ref (var, ctx);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x, rpriva_ref);
+ gimplify_and_add (x, &last_list);
+
+ x = lang_hooks.decls.omp_clause_assign_op (c, var2, rprivalmk_ref);
+ gimplify_and_add (x, &reduc_list);
+ tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+ tseq = copy_gimple_seq_and_replace_locals (tseq);
+ val = rprival_ref;
+ if (new_vard != new_var)
+ val = build_fold_addr_expr_loc (clause_loc, val);
+ SET_DECL_VALUE_EXPR (new_vard, val);
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+ SET_DECL_VALUE_EXPR (placeholder, var2);
+ lower_omp (&tseq, ctx);
+ OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
+ SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ if (y)
+ SET_DECL_VALUE_EXPR (new_vard, y);
+ else
+ {
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 0;
+ SET_DECL_VALUE_EXPR (new_vard, NULL_TREE);
+ }
+ gimple_seq_add_seq (&reduc_list, tseq);
+ x = lang_hooks.decls.omp_clause_assign_op (c, rprival_ref, var2);
+ gimplify_and_add (x, &reduc_list);
+
+ x = lang_hooks.decls.omp_clause_dtor (c, var2);
+ if (x)
+ gimplify_and_add (x, dlist);
+ }
+ else
+ {
+ x = build_outer_var_ref (var, ctx);
+ gimplify_assign (var2, x, &thr01_list);
+
+ x = omp_reduction_init (c, TREE_TYPE (new_var));
+ gimplify_assign (var2, unshare_expr (x), &thrn1_list);
+ gimplify_assign (var2, x, &thr02_list);
+
+ gimplify_assign (var2, rprivam1_ref, &thrn2_list);
+
+ enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+ if (code == MINUS_EXPR)
+ code = PLUS_EXPR;
+
+ if (ctx->scan_exclusive)
+ gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
+ x = build2 (code, TREE_TYPE (new_var), var2, new_var);
+ gimplify_assign (var2, x, &scan1_list);
+ if (ctx->scan_inclusive)
+ gimplify_assign (unshare_expr (rprivb_ref), var2, &scan1_list);
+
+ gimplify_assign (unshare_expr (rpriva_ref), var2, &mdlist);
+
+ x = build2 (code, TREE_TYPE (new_var), rprivb_ref, var2);
+ gimplify_assign (new_var, x, &input2_list);
+
+ gimplify_assign (build_outer_var_ref (var, ctx), rpriva_ref,
+ &last_list);
+
+ x = build2 (code, TREE_TYPE (new_var), rprivalmk_ref,
+ unshare_expr (rprival_ref));
+ gimplify_assign (rprival_ref, x, &reduc_list);
+ }
+ }
+
+ g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
+ gimple_seq_add_stmt (&scan1_list, g);
+ g = gimple_build_assign (ivar, PLUS_EXPR, ivar, size_one_node);
+ gimple_seq_add_stmt (gimple_omp_body_ptr (scan_stmt2), g);
+
+ tree controlb = create_tmp_var (boolean_type_node);
+ tree controlp = create_tmp_var (ptr_type_node);
+ tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = controlb;
+ OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
+ *cp1 = nc;
+ cp1 = &OMP_CLAUSE_CHAIN (nc);
+ nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = controlp;
+ OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
+ *cp1 = nc;
+ cp1 = &OMP_CLAUSE_CHAIN (nc);
+ nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = controlb;
+ OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
+ *cp2 = nc;
+ cp2 = &OMP_CLAUSE_CHAIN (nc);
+ nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SCANTEMP_);
+ OMP_CLAUSE_DECL (nc) = controlp;
+ OMP_CLAUSE__SCANTEMP__CONTROL (nc) = 1;
+ *cp2 = nc;
+ cp2 = &OMP_CLAUSE_CHAIN (nc);
+
+ *cp1 = gimple_omp_for_clauses (stmt);
+ gimple_omp_for_set_clauses (stmt, new_clauses1);
+ *cp2 = gimple_omp_for_clauses (new_stmt);
+ gimple_omp_for_set_clauses (new_stmt, new_clauses2);
+
+ gimple_omp_set_body (scan_stmt1, scan1_list);
+ gimple_omp_set_body (input_stmt2, input2_list);
+
+ gsi_insert_seq_after (&input1_gsi, gimple_omp_body (input_stmt1),
+ GSI_SAME_STMT);
+ gsi_remove (&input1_gsi, true);
+ gsi_insert_seq_after (&scan1_gsi, gimple_omp_body (scan_stmt1),
+ GSI_SAME_STMT);
+ gsi_remove (&scan1_gsi, true);
+ gsi_insert_seq_after (&input2_gsi, gimple_omp_body (input_stmt2),
+ GSI_SAME_STMT);
+ gsi_remove (&input2_gsi, true);
+ gsi_insert_seq_after (&scan2_gsi, gimple_omp_body (scan_stmt2),
+ GSI_SAME_STMT);
+ gsi_remove (&scan2_gsi, true);
+
+ gimple_seq_add_seq (body_p, clist);
+
+ tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab3 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab1);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, thr01_list);
+ g = gimple_build_goto (lab3);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab2);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, thrn1_list);
+ g = gimple_build_label (lab3);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (ivar, size_zero_node);
+ gimple_seq_add_stmt (body_p, g);
+
+ gimple_seq_add_stmt (body_p, stmt);
+ gimple_seq_add_seq (body_p, body);
+ gimple_seq_add_stmt (body_p, gimple_build_omp_continue (fd->loop.v,
+ fd->loop.v));
+
+ g = gimple_build_omp_return (true);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, mdlist);
+
+ lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (GT_EXPR, num_threads, integer_one_node, lab1, lab2);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab1);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = omp_build_barrier (NULL);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree down = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (down, build_zero_cst (unsigned_type_node));
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (k, build_one_cst (unsigned_type_node));
+ gimple_seq_add_stmt (body_p, g);
+
+ tree num_threadsu = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (num_threadsu, NOP_EXPR, num_threads);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree thread_numu = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (thread_numu, NOP_EXPR, thread_num);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree thread_nump1 = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (thread_nump1, PLUS_EXPR, thread_numu,
+ build_int_cst (unsigned_type_node, 1));
+ gimple_seq_add_stmt (body_p, g);
+
+ lab3 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_label (lab3);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree twok = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (GT_EXPR, twok, num_threadsu, lab4, lab5);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab4);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_assign (down, build_all_ones_cst (unsigned_type_node));
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_cond (EQ_EXPR, k, num_threadsu, lab6, lab5);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab6);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_label (lab5);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (twok, LSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree cplx = create_tmp_var (build_complex_type (unsigned_type_node, false));
+ g = gimple_build_call_internal (IFN_MUL_OVERFLOW, 2, thread_nump1, twok);
+ gimple_call_set_lhs (g, cplx);
+ gimple_seq_add_stmt (body_p, g);
+ tree mul = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (mul, REALPART_EXPR,
+ build1 (REALPART_EXPR, unsigned_type_node, cplx));
+ gimple_seq_add_stmt (body_p, g);
+ tree ovf = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (ovf, IMAGPART_EXPR,
+ build1 (IMAGPART_EXPR, unsigned_type_node, cplx));
+ gimple_seq_add_stmt (body_p, g);
+
+ tree lab7 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab8 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (EQ_EXPR, ovf, build_zero_cst (unsigned_type_node),
+ lab7, lab8);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab7);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree andv = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (andv, BIT_AND_EXPR, k, down);
+ gimple_seq_add_stmt (body_p, g);
+ tree andvm1 = create_tmp_var (unsigned_type_node);
+ g = gimple_build_assign (andvm1, PLUS_EXPR, andv,
+ build_minus_one_cst (unsigned_type_node));
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (l, PLUS_EXPR, mul, andvm1);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree lab9 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (LT_EXPR, l, num_threadsu, lab9, lab8);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab9);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, reduc_list);
+ g = gimple_build_label (lab8);
+ gimple_seq_add_stmt (body_p, g);
+
+ tree lab10 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab11 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab12 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (EQ_EXPR, down, build_zero_cst (unsigned_type_node),
+ lab10, lab11);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab10);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_assign (k, LSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_goto (lab12);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab11);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_assign (k, RSHIFT_EXPR, k, integer_one_node);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab12);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = omp_build_barrier (NULL);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_cond (NE_EXPR, k, build_zero_cst (unsigned_type_node),
+ lab3, lab2);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_label (lab2);
+ gimple_seq_add_stmt (body_p, g);
+
+ lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ lab3 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (EQ_EXPR, thread_num, integer_zero_node, lab1, lab2);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab1);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, thr02_list);
+ g = gimple_build_goto (lab3);
+ gimple_seq_add_stmt (body_p, g);
+ g = gimple_build_label (lab2);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_seq (body_p, thrn2_list);
+ g = gimple_build_label (lab3);
+ gimple_seq_add_stmt (body_p, g);
+
+ g = gimple_build_assign (ivar, size_zero_node);
+ gimple_seq_add_stmt (body_p, g);
+ gimple_seq_add_stmt (body_p, new_stmt);
+ gimple_seq_add_seq (body_p, new_body);
+
+ gimple_seq new_dlist = NULL;
+ lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ tree num_threadsm1 = create_tmp_var (integer_type_node);
+ g = gimple_build_assign (num_threadsm1, PLUS_EXPR, num_threads,
+ integer_minus_one_node);
+ gimple_seq_add_stmt (&new_dlist, g);
+ g = gimple_build_cond (EQ_EXPR, thread_num, num_threadsm1, lab1, lab2);
+ gimple_seq_add_stmt (&new_dlist, g);
+ g = gimple_build_label (lab1);
+ gimple_seq_add_stmt (&new_dlist, g);
+ gimple_seq_add_seq (&new_dlist, last_list);
+ g = gimple_build_label (lab2);
+ gimple_seq_add_stmt (&new_dlist, g);
+ gimple_seq_add_seq (&new_dlist, *dlist);
+ *dlist = new_dlist;
+}
/* Lower code for an OMP loop directive. */
bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
&& gimple_omp_for_grid_phony (stmt));
- if (!phony_loop)
- gimple_seq_add_stmt (&body, stmt);
- gimple_seq_add_seq (&body, gimple_omp_body (stmt));
+ if ((ctx->scan_inclusive || ctx->scan_exclusive)
+ && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
+ {
+ gcc_assert (!phony_loop);
+ lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
+ }
+ else
+ {
+ if (!phony_loop)
+ gimple_seq_add_stmt (&body, stmt);
+ gimple_seq_add_seq (&body, gimple_omp_body (stmt));
+ }
if (!phony_loop)
gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
2019-07-03 Jakub Jelinek <jakub@redhat.com>
+ * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
+ * c-c++-common/gomp/scan-5.c (foo): Likewise.
+
* c-c++-common/gomp/scan-5.c: New test.
* c-c++-common/gomp/lastprivate-conditional-5.c: New test.
for (i = 0; i < 64; i++)
{
d[i] = a;
- #pragma omp scan inclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
+ #pragma omp scan inclusive (a)
a += c[i];
}
}
for (int i = 0; i < 64; i++)
{
r += a[i];
- #pragma omp scan inclusive (r) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */
+ #pragma omp scan inclusive (r)
b[i] = r;
}
return r;
/* Internal clause: temporary for lastprivate(conditional:). */
OMP_CLAUSE__CONDTEMP_,
+ /* Internal clause: temporary for inscan reductions. */
+ OMP_CLAUSE__SCANTEMP_,
+
/* OpenACC/OpenMP clause: if (scalar-expression). */
OMP_CLAUSE_IF,
case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE__CONDTEMP_:
+ case OMP_CLAUSE__SCANTEMP_:
break;
/* The following clause belongs to the OpenACC cache directive, which
case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE__CONDTEMP_:
+ case OMP_CLAUSE__SCANTEMP_:
break;
/* The following clause belongs to the OpenACC cache directive, which
case OMP_CLAUSE__CONDTEMP_:
name = "_condtemp_";
goto print_remap;
+ case OMP_CLAUSE__SCANTEMP_:
+ name = "_scantemp_";
+ goto print_remap;
case OMP_CLAUSE_TO_DECLARE:
name = "to";
goto print_remap;
1, /* OMP_CLAUSE__LOOPTEMP_ */
1, /* OMP_CLAUSE__REDUCTEMP_ */
1, /* OMP_CLAUSE__CONDTEMP_ */
+ 1, /* OMP_CLAUSE__SCANTEMP_ */
1, /* OMP_CLAUSE_IF */
1, /* OMP_CLAUSE_NUM_THREADS */
1, /* OMP_CLAUSE_SCHEDULE */
"_looptemp_",
"_reductemp_",
"_condtemp_",
+ "_scantemp_",
"if",
"num_threads",
"schedule",
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_:
case OMP_CLAUSE__CONDTEMP_:
+ case OMP_CLAUSE__SCANTEMP_:
case OMP_CLAUSE__SIMDUID_:
WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
/* FALLTHRU */
#define OMP_CLAUSE_DECL(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \
OMP_CLAUSE_PRIVATE, \
- OMP_CLAUSE__CONDTEMP_), 0)
+ OMP_CLAUSE__SCANTEMP_), 0)
#define OMP_CLAUSE_HAS_LOCATION(NODE) \
(LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) \
!= UNKNOWN_LOCATION)
#define OMP_CLAUSE__CONDTEMP__ITER(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag)
+/* _SCANTEMP_ holding temporary with pointer to thread's local array;
+ allocation. */
+#define OMP_CLAUSE__SCANTEMP__ALLOC(NODE) \
+ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_)->base.public_flag)
+
+/* _SCANTEMP_ holding temporary with a control variable for deallocation;
+ one boolean_type_node for test whether alloca was used, another one
+ to pass to __builtin_stack_restore or free. */
+#define OMP_CLAUSE__SCANTEMP__CONTROL(NODE) \
+ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__SCANTEMP_))
+
/* SSA_NAME accessors. */
/* Whether SSA_NAME NODE is a virtual operand. This simply caches the
+2019-07-03 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/libgomp.c++/scan-1.C: New test.
+ * testsuite/libgomp.c++/scan-2.C: New test.
+ * testsuite/libgomp.c++/scan-3.C: New test.
+ * testsuite/libgomp.c++/scan-4.C: New test.
+ * testsuite/libgomp.c++/scan-5.C: New test.
+ * testsuite/libgomp.c++/scan-6.C: New test.
+ * testsuite/libgomp.c++/scan-7.C: New test.
+ * testsuite/libgomp.c++/scan-8.C: New test.
+ * testsuite/libgomp.c/scan-1.c: New test.
+ * testsuite/libgomp.c/scan-2.c: New test.
+ * testsuite/libgomp.c/scan-3.c: New test.
+ * testsuite/libgomp.c/scan-4.c: New test.
+ * testsuite/libgomp.c/scan-5.c: New test.
+ * testsuite/libgomp.c/scan-6.c: New test.
+ * testsuite/libgomp.c/scan-7.c: New test.
+ * testsuite/libgomp.c/scan-8.c: New test.
+
2019-06-18 Thomas Schwinge <thomas@codesourcery.com>
* testsuite/libgomp.oacc-c++/firstprivate-mappings-1.C: New file.
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+ s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+static inline void
+ini (S &x)
+{
+ x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+ S s;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return S (s);
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+ S s;
+ #pragma omp parallel for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return S (s);
+}
+
+int
+main ()
+{
+ S s;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (bar ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+ r.s = 0;
+ baz (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+int r, a[1024], b[1024], q;
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+ #pragma omp for reduction (inscan, +:r) nowait
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s) nowait
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+int r, a[1024], b[1024], q;
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+ #pragma omp for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+ #pragma omp parallel for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+ s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+static inline void
+ini (S &x)
+{
+ x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b, S &r)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+bar ()
+{
+ S s;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b, S &r)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+qux ()
+{
+ S s;
+ #pragma omp parallel for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ S s;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (bar ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+ r.s = 0;
+ baz (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+
+template <typename T>
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ T s;
+};
+
+template <typename T>
+S<T>::S () : s (0)
+{
+}
+
+template <typename T>
+S<T>::~S ()
+{
+}
+
+template <typename T>
+S<T>::S (const S &x)
+{
+ s = x.s;
+}
+
+template <typename T>
+S<T> &
+S<T>::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+template <typename T>
+static inline void
+ini (S<T> &x)
+{
+ x.s = 0;
+}
+
+S<int> r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+template <typename T>
+__attribute__((noipa)) void
+foo (S<T> *a, S<T> *b)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) S<T>
+bar (void)
+{
+ S<T> s;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return S<T> (s);
+}
+
+__attribute__((noipa)) void
+baz (S<int> *a, S<int> *b)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S<int>
+qux (void)
+{
+ S<int> s;
+ #pragma omp parallel for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return S<int> (s);
+}
+
+int
+main ()
+{
+ S<int> s;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (bar<int> ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ r.s = 0;
+ baz (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+int r, a[1024], b[1024], q;
+
+template <typename T, typename U>
+__attribute__((noipa)) void
+foo (T a, T b, U r)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) T
+bar ()
+{
+ T &s = q;
+ q = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+template <typename T>
+__attribute__((noipa)) void
+baz (T *a, T *b, T &r)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (T i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+template <typename T>
+__attribute__((noipa)) int
+qux ()
+{
+ T s = q;
+ q = 0;
+ #pragma omp parallel for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo<int *, int &> (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar<int> () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz<int> (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux<int &> () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+int r, a[1024], b[1024], q;
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b, int &r)
+{
+ #pragma omp for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, foo:s) nowait
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, int &r)
+{
+ #pragma omp parallel for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int &s = q;
+ q = 0;
+ #pragma omp parallel for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b, r);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+
+extern "C" void abort ();
+
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+ s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+static inline void
+ini (S &x)
+{
+ x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b, S &r)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+ S s;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b, S &r)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r.s += a[i].s;
+ }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+ S s;
+ #pragma omp parallel for reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s.s += 2 * a[i].s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ S s;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (bar ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+ r.s = 0;
+ baz (a, b, r);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ s.s += i;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i].s != s.s)
+ abort ();
+ s.s += 2 * i;
+ }
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp parallel for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp parallel for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp parallel for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+float r = 1.0f, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (float *a, float *b)
+{
+ #pragma omp for reduction (inscan, *:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r *= a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) float
+bar (void)
+{
+ float s = -__builtin_inff ();
+ #pragma omp parallel for reduction (inscan, max:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s = s > a[i] ? s : a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ float s = 1.0f;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (i < 80)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 200)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else if (i < 280)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 380)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else
+ switch (i % 6)
+ {
+ case 0: a[i] = 0.25f; break;
+ case 1: a[i] = 2.0f; break;
+ case 2: a[i] = -1.0f; break;
+ case 3: a[i] = -4.0f; break;
+ case 4: a[i] = 0.5f; break;
+ case 5: a[i] = 1.0f; break;
+ default: a[i] = 0.0f; break;
+ }
+ b[i] = -19.0f;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r * 16384.0f != 0.125f)
+ abort ();
+ float m = -175.25f;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s *= a[i];
+ if (b[i] != s)
+ abort ();
+ else
+ {
+ a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
+ b[i] = -231.75f;
+ m += 0.75f;
+ }
+ }
+ if (bar () != 592.0f)
+ abort ();
+ s = -__builtin_inff ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (s < a[i])
+ s = a[i];
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp for reduction (inscan, +:r, r2, r3)
+ for (int i = 0; i < 1024; i++)
+ {
+ { r += a[i]; r2 += a[i]; r3 += a[i]; }
+ #pragma omp scan inclusive(r, r2, r3)
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ }
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s, s2, s3)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ s += 2 * a[i];
+ s2 += 2 * a[i];
+ s3 += 2 * a[i];
+ }
+ #pragma omp scan inclusive(s, s2, s3)
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp parallel for reduction (inscan, +:r, r2, r3)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ r += a[i];
+ r2 += a[i];
+ r3 += a[i];
+ }
+ #pragma omp scan inclusive(r, r2, r3)
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ }
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp parallel for reduction (inscan, +:s, s2, s3)
+ for (int i = 0; i < 1024; i++)
+ {
+ { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+ #pragma omp scan inclusive(s, s2, s3)
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ unsigned short s2;
+ unsigned char s3;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ }
+ if (bar (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ }
+ }
+ r = 0;
+ r2 = 0;
+ r3 = 0;
+ baz (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ }
+ s2 = 0;
+ s3 = 0;
+ if (qux (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp parallel for reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp parallel for reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp parallel for reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r += a[i];
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp parallel for reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s += 2 * a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ s += i;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ s += 2 * i;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ s += i;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+float r = 1.0f, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (float *a, float *b)
+{
+ #pragma omp for reduction (inscan, *:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = r;
+ #pragma omp scan exclusive(r)
+ r *= a[i];
+ }
+}
+
+__attribute__((noipa)) float
+bar (void)
+{
+ float s = -__builtin_inff ();
+ #pragma omp parallel for reduction (inscan, max:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ b[i] = s;
+ #pragma omp scan exclusive(s)
+ s = s > a[i] ? s : a[i];
+ }
+ return s;
+}
+
+int
+main ()
+{
+ float s = 1.0f;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (i < 80)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 200)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else if (i < 280)
+ a[i] = (i & 1) ? 0.25f : 0.5f;
+ else if (i < 380)
+ a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
+ else
+ switch (i % 6)
+ {
+ case 0: a[i] = 0.25f; break;
+ case 1: a[i] = 2.0f; break;
+ case 2: a[i] = -1.0f; break;
+ case 3: a[i] = -4.0f; break;
+ case 4: a[i] = 0.5f; break;
+ case 5: a[i] = 1.0f; break;
+ default: a[i] = 0.0f; break;
+ }
+ b[i] = -19.0f;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b);
+ if (r * 16384.0f != 0.125f)
+ abort ();
+ float m = -175.25f;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -231.75f;
+ s *= a[i];
+ a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
+ m += 0.75f;
+ }
+ if (bar () != 592.0f)
+ abort ();
+ s = -__builtin_inff ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s)
+ abort ();
+ if (s < a[i])
+ s = a[i];
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+
+extern void abort (void);
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp for reduction (inscan, +:r, r2, r3)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ #pragma omp scan exclusive(r, r2, r3)
+ { r += a[i]; r2 += a[i]; r3 += a[i]; }
+ }
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp parallel
+ #pragma omp for reduction (inscan, +:s, s2, s3)
+ for (int i = 0; i < 1024; i++)
+ {
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ #pragma omp scan exclusive(s, s2, s3)
+ {
+ s += 2 * a[i];
+ s2 += 2 * a[i];
+ s3 += 2 * a[i];
+ }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+ #pragma omp parallel for reduction (inscan, +:r, r2, r3)
+ for (int i = 0; i < 1024; i++)
+ {
+ {
+ b[i] = r;
+ b2[i] = r2;
+ b3[i] = r3;
+ }
+ #pragma omp scan exclusive(r, r2, r3)
+ {
+ r += a[i];
+ r2 += a[i];
+ r3 += a[i];
+ }
+ }
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+ int s = 0;
+ unsigned short s2 = 0;
+ unsigned char s3 = 0;
+ #pragma omp parallel for reduction (inscan, +:s, s2, s3)
+ for (int i = 0; i < 1024; i++)
+ {
+ { b[i] = s; b2[i] = s2; b3[i] = s3; }
+ #pragma omp scan exclusive(s, s2, s3)
+ { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+ }
+ *s2p = s2;
+ *s3p = s3;
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ unsigned short s2;
+ unsigned char s3;
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ #pragma omp parallel
+ foo (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ s += i;
+ }
+ if (bar (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = -1;
+ b2[i] = -1;
+ b3[i] = -1;
+ }
+ s += 2 * i;
+ }
+ r = 0;
+ r2 = 0;
+ r3 = 0;
+ baz (a, b, b2, b3);
+ if (r != 1024 * 1023 / 2
+ || r2 != (unsigned short) r
+ || r3 != (unsigned char) r)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ else
+ {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+ }
+ s += i;
+ }
+ s2 = 0;
+ s3 = 0;
+ if (qux (&s2, &s3) != 1024 * 1023)
+ abort ();
+ if (s2 != (unsigned short) (1024 * 1023)
+ || s3 != (unsigned char) (1024 * 1023))
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+ abort ();
+ s += 2 * i;
+ }
+ return 0;
+}