* omp-low.c (struct omp_context): Add scan_inclusive field.
(scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
if inclusive scan.
(struct omplow_simd_context): Add lastlane member.
(lower_rec_simd_input_clauses): Add rvar argument, handle inscan
reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
1 or 2 argument.
(lower_rec_input_clauses): Handle inscan reductions in simd contexts.
(lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
(lower_omp_scan): New function.
(lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
* tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
check 3rd argument if present rather than 2nd.
* tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
(struct _stmt_vec_info): Change simd_lane_access_p from bool into
2-bit bitfield.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
than 2nd.
(_loop_vec_info::~_loop_vec_info): Delete scan_map.
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
init.
(vect_find_stmt_data_reference): Encode in ->aux the 2nd
IFN_GOMP_SIMD_LANE argument.
(vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
encoded ->aux value.
* tree-vect-stmts.c: Include attribs.h.
(vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
(scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
functions.
(vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
(vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
cp/
* semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
set need_copy_assignment.
testsuite/
* gcc.dg/vect/vect-simd-8.c: New test.
* gcc.dg/vect/vect-simd-9.c: New test.
* g++.dg/vect/simd-2.cc: New test.
* g++.dg/gomp/scan-1.C: New test.
From-SVN: r272399
+2019-06-17 Jakub Jelinek <jakub@redhat.com>
+
+ * omp-low.c (struct omp_context): Add scan_inclusive field.
+ (scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
+ if inclusive scan.
+ (struct omplow_simd_context): Add lastlane member.
+ (lower_rec_simd_input_clauses): Add rvar argument, handle inscan
+ reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
+ 1 or 2 argument.
+ (lower_rec_input_clauses): Handle inscan reductions in simd contexts.
+ (lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
+ (lower_omp_scan): New function.
+ (lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
+ * tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
+ check 3rd argument if present rather than 2nd.
+ * tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
+ (struct _stmt_vec_info): Change simd_lane_access_p from bool into
+ 2-bit bitfield.
+ * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
+ scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
+ than 2nd.
+ (_loop_vec_info::~_loop_vec_info): Delete scan_map.
+ * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
+ different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
+ init.
+ (vect_find_stmt_data_reference): Encode in ->aux the 2nd
+ IFN_GOMP_SIMD_LANE argument.
+ (vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
+ encoded ->aux value.
+ * tree-vect-stmts.c: Include attribs.h.
+ (vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
+ (scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
+ functions.
+ (vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
+ (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
+
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
PR target/62055
+2019-06-17 Jakub Jelinek <jakub@redhat.com>
+
+ * semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
+ set need_copy_assignment.
+
2019-06-17 Marek Polacek <polacek@redhat.com>
PR c++/83820 - excessive attribute arguments not detected.
case OMP_CLAUSE_REDUCTION:
if (reduction_seen == -2)
OMP_CLAUSE_REDUCTION_INSCAN (c) = 0;
+ if (OMP_CLAUSE_REDUCTION_INSCAN (c))
+ need_copy_assignment = true;
need_implicitly_determined = true;
break;
case OMP_CLAUSE_IN_REDUCTION:
/* True if lower_omp_1 should look up lastprivate conditional in parent
context. */
bool combined_into_simd_safelen0;
+
+ /* True if there is nested scan context with inclusive clause. */
+ bool scan_inclusive;
};
static splay_tree all_contexts;
scan_omp_single (as_a <gomp_single *> (stmt), ctx);
break;
+ case GIMPLE_OMP_SCAN:
+ if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
+ if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
+ ctx->scan_inclusive = true;
+ /* FALLTHRU */
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
- case GIMPLE_OMP_SCAN:
case GIMPLE_OMP_GRID_BODY:
ctx = new_omp_context (stmt, ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
omplow_simd_context () { memset (this, 0, sizeof (*this)); }
tree idx;
tree lane;
+ tree lastlane;
vec<tree, va_heap> simt_eargs;
gimple_seq simt_dlist;
poly_uint64_pod max_vf;
static bool
lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
- omplow_simd_context *sctx, tree &ivar, tree &lvar)
+ omplow_simd_context *sctx, tree &ivar,
+ tree &lvar, tree *rvar = NULL)
{
if (known_eq (sctx->max_vf, 0U))
{
= tree_cons (get_identifier ("omp simd array"), NULL,
DECL_ATTRIBUTES (avar));
gimple_add_tmp_var (avar);
- ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+ tree iavar = avar;
+ if (rvar)
+ {
+ /* For inscan reductions, create another array temporary,
+ which will hold the reduced value. */
+ iavar = create_tmp_var_raw (atype);
+ if (TREE_ADDRESSABLE (new_var))
+ TREE_ADDRESSABLE (iavar) = 1;
+ DECL_ATTRIBUTES (iavar)
+ = tree_cons (get_identifier ("omp simd array"), NULL,
+ tree_cons (get_identifier ("omp simd inscan"), NULL,
+ DECL_ATTRIBUTES (iavar)));
+ gimple_add_tmp_var (iavar);
+ ctx->cb.decl_map->put (avar, iavar);
+ if (sctx->lastlane == NULL_TREE)
+ sctx->lastlane = create_tmp_var (unsigned_type_node);
+ *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
+ sctx->lastlane, NULL_TREE, NULL_TREE);
+ TREE_THIS_NOTRAP (*rvar) = 1;
+ }
+ ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
NULL_TREE, NULL_TREE);
lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
NULL_TREE, NULL_TREE);
omplow_simd_context sctx = omplow_simd_context ();
tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
- gimple_seq llist[3] = { };
+ gimple_seq llist[4] = { };
tree nonconst_simd_if = NULL_TREE;
copyin_seq = NULL;
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
+ tree rvar = NULL_TREE, *rvarp = NULL;
+ if (is_simd
+ && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
+ rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
- ivar, lvar))
+ ivar, lvar, rvarp))
{
if (new_vard == new_var)
{
x = lang_hooks.decls.omp_clause_default_ctor
(c, unshare_expr (ivar),
build_outer_var_ref (var, ctx));
+ if (rvarp)
+ {
+ if (x)
+ {
+ gimplify_and_add (x, &llist[0]);
+
+ tree ivar2 = unshare_expr (lvar);
+ TREE_OPERAND (ivar2, 1) = sctx.idx;
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, ivar2, build_outer_var_ref (var, ctx));
+ gimplify_and_add (x, &llist[0]);
+
+ /* For types that need construction, add another
+ private var which will be default constructed
+ and optionally initialized with
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT, as in the
+ loop we want to assign this value instead of
+ constructing and destructing it in each
+ iteration. */
+ tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
+ gimple_add_tmp_var (nv);
+ ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
+ nv);
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, nv, build_outer_var_ref (var, ctx));
+ gimplify_and_add (x, ilist);
+
+ if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+ {
+ tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ x = DECL_VALUE_EXPR (new_var);
+ SET_DECL_VALUE_EXPR (new_var, nv);
+ lower_omp (&tseq, ctx);
+ SET_DECL_VALUE_EXPR (new_var, x);
+ gimple_seq_add_seq (ilist, tseq);
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ }
+
+ x = lang_hooks.decls.omp_clause_dtor (c, nv);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (dlist, tseq);
+ }
+ }
+
+ tree ref = build_outer_var_ref (var, ctx);
+ x = unshare_expr (ivar);
+ x = lang_hooks.decls.omp_clause_assign_op (c, x,
+ ref);
+ gimplify_and_add (x, &llist[0]);
+
+ ref = build_outer_var_ref (var, ctx);
+ x = lang_hooks.decls.omp_clause_assign_op (c, ref,
+ rvar);
+ gimplify_and_add (x, &llist[3]);
+
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ if (new_vard == new_var)
+ SET_DECL_VALUE_EXPR (new_var, lvar);
+ else
+ SET_DECL_VALUE_EXPR (new_vard,
+ build_fold_addr_expr (lvar));
+
+ x = lang_hooks.decls.omp_clause_dtor (c, ivar);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (&llist[1], tseq);
+ }
+
+ tree ivar2 = unshare_expr (lvar);
+ TREE_OPERAND (ivar2, 1) = sctx.idx;
+ x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (&llist[1], tseq);
+ }
+ break;
+ }
if (x)
gimplify_and_add (x, &llist[0]);
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
: build_outer_var_ref (var, ctx));
if (x)
gimplify_and_add (x, ilist);
+
+ if (rvarp)
+ {
+ if (x)
+ {
+ tree nv = create_tmp_var_raw (TREE_TYPE (new_vard));
+ gimple_add_tmp_var (nv);
+ ctx->cb.decl_map->put (new_var, nv);
+ x = lang_hooks.decls.omp_clause_default_ctor
+ (c, nv, build_outer_var_ref (var, ctx));
+ gimplify_and_add (x, ilist);
+ if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+ {
+ tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ SET_DECL_VALUE_EXPR (new_var, nv);
+ DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+ lower_omp (&tseq, ctx);
+ SET_DECL_VALUE_EXPR (new_var, NULL_TREE);
+ DECL_HAS_VALUE_EXPR_P (new_var) = 0;
+ gimple_seq_add_seq (ilist, tseq);
+ }
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ x = lang_hooks.decls.omp_clause_dtor (c, nv);
+ if (x)
+ {
+ tseq = NULL;
+ dtor = x;
+ gimplify_stmt (&dtor, &tseq);
+ gimple_seq_add_seq (dlist, tseq);
+ }
+ }
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ goto do_dtor;
+ }
+
if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
{
tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
new_vard = TREE_OPERAND (new_var, 0);
gcc_assert (DECL_P (new_vard));
}
+ tree rvar = NULL_TREE, *rvarp = NULL;
+ if (is_simd
+ && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
+ rvarp = &rvar;
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, &sctx,
- ivar, lvar))
+ ivar, lvar, rvarp))
{
+ if (new_vard != new_var)
+ {
+ SET_DECL_VALUE_EXPR (new_vard,
+ build_fold_addr_expr (lvar));
+ DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+ }
+
tree ref = build_outer_var_ref (var, ctx);
+ if (rvarp)
+ {
+ gimplify_assign (ivar, ref, &llist[0]);
+ ref = build_outer_var_ref (var, ctx);
+ gimplify_assign (ref, rvar, &llist[3]);
+ break;
+ }
+
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
if (sctx.is_simt)
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &llist[1]);
- if (new_vard != new_var)
- {
- SET_DECL_VALUE_EXPR (new_vard,
- build_fold_addr_expr (lvar));
- DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
- }
}
- else
+ else if (rvarp == NULL)
{
if (omp_is_reference (var) && is_simd)
handle_simd_reference (clause_loc, new_vard, ilist);
if (sctx.lane)
{
gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
- 1 + (nonconst_simd_if != NULL),
- uid, nonconst_simd_if);
+ 2 + (nonconst_simd_if != NULL),
+ uid, integer_zero_node,
+ nonconst_simd_if);
gimple_call_set_lhs (g, sctx.lane);
gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
g = gimple_build_assign (sctx.lane, INTEGER_CST,
build_int_cst (unsigned_type_node, 0));
gimple_seq_add_stmt (ilist, g);
+ if (sctx.lastlane)
+ {
+ g = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
+ 2, uid, sctx.lane);
+ gimple_call_set_lhs (g, sctx.lastlane);
+ gimple_seq_add_stmt (dlist, g);
+ gimple_seq_add_seq (dlist, llist[3]);
+ }
/* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */
if (llist[2])
{
new_var = build4 (ARRAY_REF, TREE_TYPE (val),
TREE_OPERAND (val, 0), lastlane,
NULL_TREE, NULL_TREE);
+ TREE_THIS_NOTRAP (new_var) = 1;
}
}
else if (maybe_simt)
}
+/* Expand code for an OpenMP scan directive and the structured block
+ before the scan directive. */
+
+static void
+lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+ bool has_clauses
+ = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)) != NULL;
+ tree lane = NULL_TREE;
+ gimple_seq before = NULL;
+ omp_context *octx = ctx->outer;
+ gcc_assert (octx);
+ bool input_phase = has_clauses ^ octx->scan_inclusive;
+ if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
+ && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
+ && !gimple_omp_for_combined_into_p (octx->stmt)
+ && octx->scan_inclusive)
+ {
+ if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
+ OMP_CLAUSE__SIMDUID_))
+ {
+ tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
+ lane = create_tmp_var (unsigned_type_node);
+ tree t = build_int_cst (integer_type_node, 1 + !input_phase);
+ gimple *g
+ = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
+ gimple_call_set_lhs (g, lane);
+ gimple_seq_add_stmt (&before, g);
+ }
+ for (tree c = gimple_omp_for_clauses (octx->stmt);
+ c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_INSCAN (c))
+ {
+ tree var = OMP_CLAUSE_DECL (c);
+ tree new_var = lookup_decl (var, octx);
+ tree val = new_var;
+ tree var2 = NULL_TREE;
+ tree var3 = NULL_TREE;
+ if (DECL_HAS_VALUE_EXPR_P (new_var))
+ {
+ val = DECL_VALUE_EXPR (new_var);
+ if (TREE_CODE (val) == ARRAY_REF
+ && VAR_P (TREE_OPERAND (val, 0)))
+ {
+ tree v = TREE_OPERAND (val, 0);
+ if (lookup_attribute ("omp simd array",
+ DECL_ATTRIBUTES (v)))
+ {
+ val = unshare_expr (val);
+ TREE_OPERAND (val, 1) = lane;
+ var2 = lookup_decl (v, octx);
+ if (input_phase
+ && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ var3 = maybe_lookup_decl (var2, octx);
+ if (!input_phase)
+ {
+ var2 = build4 (ARRAY_REF, TREE_TYPE (val),
+ var2, lane, NULL_TREE, NULL_TREE);
+ TREE_THIS_NOTRAP (var2) = 1;
+ }
+ else
+ var2 = val;
+ }
+ }
+ }
+ else
+ {
+ var2 = build_outer_var_ref (var, octx);
+ if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ {
+ var3 = maybe_lookup_decl (new_var, octx);
+ if (var3 == new_var)
+ var3 = NULL_TREE;
+ }
+ }
+ if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ {
+ tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
+ if (input_phase)
+ {
+ if (var3)
+ {
+ /* If we've added a separate identity element
+ variable, copy it over into val. */
+ tree x = lang_hooks.decls.omp_clause_assign_op (c, val,
+ var3);
+ gimplify_and_add (x, &before);
+ }
+ else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+ {
+ /* Otherwise, assign to it the identity element. */
+ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+ tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
+ ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
+ tree ref = build_outer_var_ref (var, octx);
+ SET_DECL_VALUE_EXPR (new_var, val);
+ SET_DECL_VALUE_EXPR (placeholder, ref);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+ lower_omp (&tseq, octx);
+ SET_DECL_VALUE_EXPR (new_var, x);
+ SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ if (x == NULL_TREE)
+ DECL_HAS_VALUE_EXPR_P (new_var) = 0;
+ gimple_seq_add_seq (&before, tseq);
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+ }
+ }
+ else
+ {
+ gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+ tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
+ ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
+ SET_DECL_VALUE_EXPR (new_var, val);
+ SET_DECL_VALUE_EXPR (placeholder, var2);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+ lower_omp (&tseq, octx);
+ gimple_seq_add_seq (&before, tseq);
+ OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
+ SET_DECL_VALUE_EXPR (new_var, x);
+ SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+ DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+ x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
+ gimplify_and_add (x, &before);
+ }
+ }
+ else
+ {
+ if (input_phase)
+ {
+ /* input phase. Set val to initializer before
+ the body. */
+ tree x = omp_reduction_init (c, TREE_TYPE (new_var));
+ gimplify_assign (val, x, &before);
+ }
+ else
+ {
+ /* scan phase. */
+ enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+ if (code == MINUS_EXPR)
+ code = PLUS_EXPR;
+
+ tree x = build2 (code, TREE_TYPE (var2),
+ unshare_expr (var2), unshare_expr (val));
+ gimplify_assign (unshare_expr (var2), x, &before);
+ gimplify_assign (val, var2, &before);
+ }
+ }
+ }
+ }
+ else if (has_clauses)
+ sorry_at (gimple_location (stmt),
+ "%<#pragma omp scan%> not supported yet");
+ gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
+ gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
+ gsi_replace (gsi_p, gimple_build_nop (), true);
+}
+
+
/* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
substitution of a couple of function calls. But in the NAMED case,
requires that languages coordinate a symbol name. It is therefore
case GIMPLE_OMP_SCAN:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
- gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
- if (gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
- sorry_at (gimple_location (stmt),
- "%<#pragma omp scan%> not supported yet");
- gsi_replace (gsi_p, gimple_build_nop (), true);
+ lower_omp_scan (gsi_p, ctx);
break;
case GIMPLE_OMP_CRITICAL:
ctx = maybe_lookup_ctx (stmt);
+2019-06-17 Jakub Jelinek <jakub@redhat.com>
+
+ * gcc.dg/vect/vect-simd-8.c: New test.
+ * gcc.dg/vect/vect-simd-9.c: New test.
+ * g++.dg/vect/simd-2.cc: New test.
+ * g++.dg/gomp/scan-1.C: New test.
+
2019-06-17 Uroš Bizjak <ubizjak@gmail.com>
PR target/62055
--- /dev/null
+// { dg-do compile { target c++11 } }
+
+struct S { S (); ~S (); S &operator = (const S &) = delete; int s; }; // { dg-message "declared here" }
+#pragma omp declare reduction (+ : S : omp_out.s += omp_in.s)
+
+S s;
+
+void
+foo (void)
+{
+ #pragma omp simd reduction (+: s)
+ for (int i = 0; i < 64; ++i)
+ s.s += i;
+}
+
+void
+bar (int *x)
+{
+ #pragma omp simd reduction (inscan, +: s) // { dg-error "use of deleted function" }
+ for (int i = 0; i < 64; ++i)
+ {
+ s.s += i;
+ #pragma omp scan inclusive (s) // { dg-error "" }
+ x[i] = s.s;
+ }
+}
--- /dev/null
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+struct S {
+ inline S ();
+ inline ~S ();
+ inline S (const S &);
+ inline S & operator= (const S &);
+ int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+ s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+ s = x.s;
+ return *this;
+}
+
+static inline void
+ini (S &x)
+{
+ x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+ S s;
+ #pragma omp simd reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return S (s);
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b)
+{
+ #pragma omp simd reduction (inscan, +:r) simdlen(1)
+ for (int i = 0; i < 1024; i++)
+ {
+ r.s += a[i].s;
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+ S s;
+ #pragma omp simd if (0) reduction (inscan, plus:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s.s += 2 * a[i].s;
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return S (s);
+}
+
+int
+main ()
+{
+ S s;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i].s = i;
+ b[i].s = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (bar ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+ r.s = 0;
+ baz (a, b);
+ if (r.s != 1024 * 1023 / 2)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += i;
+ if (b[i].s != s.s)
+ abort ();
+ else
+ b[i].s = 25;
+ }
+ if (qux ().s != 1024 * 1023)
+ abort ();
+ s.s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s.s += 2 * i;
+ if (b[i].s != s.s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, +:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, +:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, +:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, foo:r)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, foo:s)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+ #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+ for (int i = 0; i < 1024; i++)
+ {
+ r += a[i];
+ #pragma omp scan inclusive(r)
+ b[i] = r;
+ }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+ int s = 0;
+ #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+ for (int i = 0; i < 1024; i++)
+ {
+ s += 2 * a[i];
+ #pragma omp scan inclusive(s)
+ b[i] = s;
+ }
+ return s;
+}
+
+int
+main ()
+{
+ int s = 0;
+ check_vect ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ a[i] = i;
+ b[i] = -1;
+ asm ("" : "+g" (i));
+ }
+ foo (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = 25;
+ }
+ if (bar () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -1;
+ }
+ r = 0;
+ baz (a, b);
+ if (r != 1024 * 1023 / 2)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += i;
+ if (b[i] != s)
+ abort ();
+ else
+ b[i] = -25;
+ }
+ if (qux () != 1024 * 1023)
+ abort ();
+ s = 0;
+ for (int i = 0; i < 1024; ++i)
+ {
+ s += 2 * i;
+ if (b[i] != s)
+ abort ();
+ }
+ return 0;
+}
update_stmt (stmt);
release_ssa_name (name);
- /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
+ /* GOMP_SIMD_LANE (unless three argument) or ASAN_POISON
without lhs is not needed. */
if (gimple_call_internal_p (stmt))
switch (gimple_call_internal_fn (stmt))
{
case IFN_GOMP_SIMD_LANE:
- if (gimple_call_num_args (stmt) >= 2
- && !integer_nonzerop (gimple_call_arg (stmt, 1)))
+ if (gimple_call_num_args (stmt) >= 3
+ && !integer_nonzerop (gimple_call_arg (stmt, 2)))
break;
/* FALLTHRU */
case IFN_ASAN_POISON:
|| TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
break;
+ /* Different .GOMP_SIMD_LANE calls still give the same lane,
+ just hold extra information. */
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
+ && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
+ && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
+ break;
+
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
DR_STEP_ALIGNMENT (newdr)
= highest_pow2_factor (step);
/* Mark as simd-lane access. */
- newdr->aux = (void *)-1;
+ tree arg2 = gimple_call_arg (def, 1);
+ newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
free_data_ref (dr);
datarefs->safe_push (newdr);
return opt_result::success ();
}
/* See if this was detected as SIMD lane access. */
- if (dr->aux == (void *)-1)
+ if (dr->aux == (void *)-1
+ || dr->aux == (void *)-2
+ || dr->aux == (void *)-3)
{
if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt,
"not vectorized:"
" data ref analysis failed: %G",
stmt_info->stmt);
- STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
+ STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
+ = -(uintptr_t) dr->aux;
}
tree base = get_base_address (DR_REF (dr));
peeling_for_alignment (0),
ptr_mask (0),
ivexpr_map (NULL),
+ scan_map (NULL),
slp_unrolling_factor (1),
single_scalar_iteration_cost (0),
vectorizable (false),
gimple *stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0);
add_stmt (stmt);
- /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the
- second argument is the #pragma omp simd if (x) condition, when 0,
+ /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
+ third argument is the #pragma omp simd if (x) condition, when 0,
loop shouldn't be vectorized, when non-zero constant, it should
be vectorized normally, otherwise versioned with vectorized loop
done if the condition is non-zero at runtime. */
&& is_gimple_call (stmt)
&& gimple_call_internal_p (stmt)
&& gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
- && gimple_call_num_args (stmt) >= 2
+ && gimple_call_num_args (stmt) >= 3
&& TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
&& (loop_in->simduid
== SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
{
- tree arg = gimple_call_arg (stmt, 1);
+ tree arg = gimple_call_arg (stmt, 2);
if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
simd_if_cond = arg;
else
release_vec_loop_masks (&masks);
delete ivexpr_map;
+ delete scan_map;
loop->aux = NULL;
}
#include "tree-ssa-loop-niter.h"
#include "gimple-fold.h"
#include "regs.h"
+#include "attribs.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
if (nargs == 0 || nargs > 4)
return false;
- /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
+ /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
combined_fn cfn = gimple_call_combined_fn (stmt);
if (cfn == CFN_GOMP_SIMD_LANE)
{
}
+/* Function scan_operand_equal_p.
+
+ Helper function for check_scan_store. Compare two references
+ with .GOMP_SIMD_LANE bases. */
+
+static bool
+scan_operand_equal_p (tree ref1, tree ref2)
+{
+ machine_mode mode1, mode2;
+ poly_int64 bitsize1, bitsize2, bitpos1, bitpos2;
+ tree offset1, offset2;
+ int unsignedp1, unsignedp2, reversep1, reversep2;
+ int volatilep1 = 0, volatilep2 = 0;
+ tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1,
+ &mode1, &unsignedp1, &reversep1,
+ &volatilep1);
+ tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2,
+ &mode2, &unsignedp2, &reversep2,
+ &volatilep2);
+ if (reversep1 || reversep2 || volatilep1 || volatilep2)
+ return false;
+ if (!operand_equal_p (base1, base2, 0))
+ return false;
+ if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0))
+ return false;
+ if (maybe_ne (bitsize1, bitsize2))
+ return false;
+ if (!operand_equal_p (offset1, offset2, 0))
+ return false;
+ return true;
+}
+
+
+/* Function check_scan_store.
+
+ Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
+
+static bool
+check_scan_store (stmt_vec_info stmt_info, tree vectype,
+ enum vect_def_type rhs_dt, bool slp, tree mask,
+ vect_memory_access_type memory_access_type)
+{
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
+ tree ref_type;
+
+ gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
+ if (slp
+ || mask
+ || memory_access_type != VMAT_CONTIGUOUS
+ || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
+ || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
+ || loop_vinfo == NULL
+ || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ || !integer_zerop (DR_OFFSET (dr_info->dr))
+ || !integer_zerop (DR_INIT (dr_info->dr))
+ || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
+ || !alias_sets_conflict_p (get_alias_set (vectype),
+ get_alias_set (TREE_TYPE (ref_type))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported OpenMP scan store.\n");
+ return false;
+ }
+
+ /* We need to pattern match code built by OpenMP lowering and simplified
+ by following optimizations into something we can handle.
+ #pragma omp simd reduction(inscan,+:r)
+ for (...)
+ {
+ r += something ();
+ #pragma omp scan inclusive (r)
+ use (r);
+ }
+ shall have body with:
+ // Initialization for input phase, store the reduction initializer:
+ _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
+ _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
+ D.2042[_21] = 0;
+ // Actual input phase:
+ ...
+ r.0_5 = D.2042[_20];
+ _6 = _4 + r.0_5;
+ D.2042[_20] = _6;
+ // Initialization for scan phase:
+ _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
+ _26 = D.2043[_25];
+ _27 = D.2042[_25];
+ _28 = _26 + _27;
+ D.2043[_25] = _28;
+ D.2042[_25] = _28;
+ // Actual scan phase:
+ ...
+ r.1_8 = D.2042[_20];
+ ...
+ The "omp simd array" variable D.2042 holds the privatized copy used
+ inside of the loop and D.2043 is another one that holds copies of
+ the current original list item. The separate GOMP_SIMD_LANE ifn
+ kinds are there in order to allow optimizing the initializer store
+ and combiner sequence, e.g. if it is originally some C++ish user
+ defined reduction, but allow the vectorizer to pattern recognize it
+ and turn into the appropriate vectorized scan. */
+
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
+ {
+ /* Match the D.2042[_21] = 0; store above. Just require that
+ it is a constant or external definition store. */
+ if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
+ {
+ fail_init:
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported OpenMP scan initializer store.\n");
+ return false;
+ }
+
+ if (! loop_vinfo->scan_map)
+ loop_vinfo->scan_map = new hash_map<tree, tree>;
+ tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+ tree &cached = loop_vinfo->scan_map->get_or_insert (var);
+ if (cached)
+ goto fail_init;
+ cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
+
+ /* These stores can be vectorized normally. */
+ return true;
+ }
+
+ if (rhs_dt != vect_internal_def)
+ {
+ fail:
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported OpenMP scan combiner pattern.\n");
+ return false;
+ }
+
+ gimple *stmt = STMT_VINFO_STMT (stmt_info);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (TREE_CODE (rhs) != SSA_NAME)
+ goto fail;
+
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ gimple *other_store_stmt = NULL;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (use_stmt == stmt || is_gimple_debug (use_stmt))
+ continue;
+ if (gimple_bb (use_stmt) != gimple_bb (stmt)
+ || !gimple_store_p (use_stmt)
+ || other_store_stmt)
+ goto fail;
+ other_store_stmt = use_stmt;
+ }
+ if (other_store_stmt == NULL)
+ goto fail;
+ stmt_vec_info other_store_stmt_info
+ = loop_vinfo->lookup_stmt (other_store_stmt);
+ if (other_store_stmt_info == NULL
+ || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
+ goto fail;
+
+ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+ if (gimple_bb (def_stmt) != gimple_bb (stmt)
+ || !is_gimple_assign (def_stmt)
+ || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
+ goto fail;
+
+ enum tree_code code = gimple_assign_rhs_code (def_stmt);
+ /* For pointer addition, we should use the normal plus for the vector
+ operation. */
+ switch (code)
+ {
+ case POINTER_PLUS_EXPR:
+ code = PLUS_EXPR;
+ break;
+ case MULT_HIGHPART_EXPR:
+ goto fail;
+ default:
+ break;
+ }
+ if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
+ goto fail;
+
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+ if (TREE_CODE (rhs1) != SSA_NAME
+ || TREE_CODE (rhs2) != SSA_NAME)
+ goto fail;
+
+ gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
+ gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
+ if (gimple_bb (load1_stmt) != gimple_bb (stmt)
+ || !gimple_assign_load_p (load1_stmt)
+ || gimple_bb (load2_stmt) != gimple_bb (stmt)
+ || !gimple_assign_load_p (load2_stmt))
+ goto fail;
+
+ stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
+ stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
+ if (load1_stmt_info == NULL
+ || load2_stmt_info == NULL
+ || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
+ || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
+ goto fail;
+
+ if (scan_operand_equal_p (gimple_assign_lhs (stmt),
+ gimple_assign_rhs1 (load2_stmt)))
+ {
+ std::swap (rhs1, rhs2);
+ std::swap (load1_stmt, load2_stmt);
+ std::swap (load1_stmt_info, load2_stmt_info);
+ }
+ if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
+ gimple_assign_rhs1 (load1_stmt))
+ || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
+ gimple_assign_rhs1 (load2_stmt)))
+ goto fail;
+
+ dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
+ if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
+ || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
+ goto fail;
+
+ tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+ tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
+ if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
+ || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
+ || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+ == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
+ goto fail;
+
+ if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+ std::swap (var1, var2);
+
+ if (loop_vinfo->scan_map == NULL)
+ goto fail;
+ tree *init = loop_vinfo->scan_map->get (var1);
+ if (init == NULL)
+ goto fail;
+
+ /* The IL is as expected, now check if we can actually vectorize it.
+ _26 = D.2043[_25];
+ _27 = D.2042[_25];
+ _28 = _26 + _27;
+ D.2043[_25] = _28;
+ D.2042[_25] = _28;
+ should be vectorized as (where _40 is the vectorized rhs
+ from the D.2042[_21] = 0; store):
+ _30 = MEM <vector(8) int> [(int *)&D.2043];
+ _31 = MEM <vector(8) int> [(int *)&D.2042];
+ _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
+ _33 = _31 + _32;
+ // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
+ _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
+ _35 = _33 + _34;
+ // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+ // _31[1]+.._31[4], ... _31[4]+.._31[7] };
+ _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
+ _37 = _35 + _36;
+ // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+ // _31[0]+.._31[4], ... _31[0]+.._31[7] };
+ _38 = _30 + _37;
+ _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
+ MEM <vector(8) int> [(int *)&D.2043] = _39;
+ MEM <vector(8) int> [(int *)&D.2042] = _38; */
+ enum machine_mode vec_mode = TYPE_MODE (vectype);
+ optab optab = optab_for_tree_code (code, vectype, optab_default);
+ if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+ goto fail;
+
+ unsigned HOST_WIDE_INT nunits;
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
+ goto fail;
+ int units_log2 = exact_log2 (nunits);
+ if (units_log2 <= 0)
+ goto fail;
+
+ for (int i = 0; i <= units_log2; ++i)
+ {
+ unsigned HOST_WIDE_INT j, k;
+ vec_perm_builder sel (nunits, nunits, 1);
+ sel.quick_grow (nunits);
+ if (i == units_log2)
+ {
+ for (j = 0; j < nunits; ++j)
+ sel[j] = nunits - 1;
+ }
+ else
+ {
+ for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
+ sel[j] = nunits + j;
+ for (k = 0; j < nunits; ++j, ++k)
+ sel[j] = k;
+ }
+ vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
+ if (!can_vec_perm_const_p (vec_mode, indices))
+ goto fail;
+ }
+
+ return true;
+}
+
+
+/* Function vectorizable_scan_store.
+
+ Helper of vectorizable_score, arguments like on vectorizable_store.
+ Handle only the transformation, checking is done in check_scan_store. */
+
+static bool
+vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ stmt_vec_info *vec_stmt, int ncopies)
+{
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
+ tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
+ vec_info *vinfo = stmt_info->vinfo;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "transform scan store. ncopies = %d\n", ncopies);
+
+ gimple *stmt = STMT_VINFO_STMT (stmt_info);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ gcc_assert (TREE_CODE (rhs) == SSA_NAME);
+
+ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+ enum tree_code code = gimple_assign_rhs_code (def_stmt);
+ if (code == POINTER_PLUS_EXPR)
+ code = PLUS_EXPR;
+ gcc_assert (TREE_CODE_LENGTH (code) == binary_op
+ && commutative_tree_code (code));
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+ gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
+ gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
+ gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
+ stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
+ stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
+ dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
+ dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
+ tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
+ tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
+
+ if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+ {
+ std::swap (rhs1, rhs2);
+ std::swap (var1, var2);
+ }
+
+ tree *init = loop_vinfo->scan_map->get (var1);
+ gcc_assert (init);
+
+ tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+ bool inscan_var_store
+ = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+ unsigned HOST_WIDE_INT nunits;
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
+ gcc_unreachable ();
+ int units_log2 = exact_log2 (nunits);
+ gcc_assert (units_log2 > 0);
+ auto_vec<tree, 16> perms;
+ perms.quick_grow (units_log2 + 1);
+ for (int i = 0; i <= units_log2; ++i)
+ {
+ unsigned HOST_WIDE_INT j, k;
+ vec_perm_builder sel (nunits, nunits, 1);
+ sel.quick_grow (nunits);
+ if (i == units_log2)
+ {
+ for (j = 0; j < nunits; ++j)
+ sel[j] = nunits - 1;
+ }
+ else
+ {
+ for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
+ sel[j] = nunits + j;
+ for (k = 0; j < nunits; ++j, ++k)
+ sel[j] = k;
+ }
+ vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
+ perms[i] = vect_gen_perm_mask_checked (vectype, indices);
+ }
+
+ stmt_vec_info prev_stmt_info = NULL;
+ tree vec_oprnd1 = NULL_TREE;
+ tree vec_oprnd2 = NULL_TREE;
+ tree vec_oprnd3 = NULL_TREE;
+ tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
+ tree dataref_offset = build_int_cst (ref_type, 0);
+ tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+ tree orig = NULL_TREE;
+ for (int j = 0; j < ncopies; j++)
+ {
+ stmt_vec_info new_stmt_info;
+ if (j == 0)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
+ vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
+ vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
+ orig = vec_oprnd3;
+ }
+ else
+ {
+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
+ vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
+ vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
+ if (!inscan_var_store)
+ dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+ }
+
+ tree v = vec_oprnd2;
+ for (int i = 0; i < units_log2; ++i)
+ {
+ tree new_temp = make_ssa_name (vectype);
+ gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v,
+ vec_oprnd1, perms[i]);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ if (prev_stmt_info == NULL)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+
+ tree new_temp2 = make_ssa_name (vectype);
+ g = gimple_build_assign (new_temp2, code, v, new_temp);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+
+ v = new_temp2;
+ }
+
+ tree new_temp = make_ssa_name (vectype);
+ gimple *g = gimple_build_assign (new_temp, code, orig, v);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+
+ orig = make_ssa_name (vectype);
+ g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
+ perms[units_log2]);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+
+ if (!inscan_var_store)
+ {
+ tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+ dataref_offset);
+ vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
+ g = gimple_build_assign (data_ref, new_temp);
+ new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+ }
+ }
+
+ if (inscan_var_store)
+ for (int j = 0; j < ncopies; j++)
+ {
+ if (j != 0)
+ dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+
+ tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+ dataref_offset);
+ vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
+ gimple *g = gimple_build_assign (data_ref, orig);
+ stmt_vec_info new_stmt_info
+ = vect_finish_stmt_generation (stmt_info, g, gsi);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+ prev_stmt_info = new_stmt_info;
+ }
+ return true;
+}
+
+
/* Function vectorizable_store.
Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
group_size = vec_num = 1;
}
+ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
+ {
+ if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
+ memory_access_type))
+ return false;
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
}
return true;
}
+ else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+ return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
gcc_assert (useless_type_conversion_p (vectype,
TREE_TYPE (vec_oprnd)));
bool simd_lane_access_p
- = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
+ = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
if (simd_lane_access_p
&& !loop_masks
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
if (j == 0)
{
bool simd_lane_access_p
- = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
+ = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
if (simd_lane_access_p
&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
/* Map of IV base/step expressions to inserted name in the preheader. */
hash_map<tree_operand_hash, tree> *ivexpr_map;
+ /* Map of OpenMP "omp simd array" scan variables to corresponding
+ rhs of the store of the initializer. */
+ hash_map<tree, tree> *scan_map;
+
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
applied to the loop, i.e., no unrolling is needed, this is 1. */
poly_uint64 slp_unrolling_factor;
bool strided_p;
/* For both loads and stores. */
- bool simd_lane_access_p;
+ unsigned simd_lane_access_p : 2;
/* Classifies how the load or store is going to be implemented
for loop vectorization. */