From: Jakub Jelinek Date: Mon, 17 Jun 2019 21:20:00 +0000 (+0200) Subject: omp-low.c (struct omp_context): Add scan_inclusive field. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0356aab806921fe5d766d3fc74cf49dffc271d18;p=gcc.git omp-low.c (struct omp_context): Add scan_inclusive field. * omp-low.c (struct omp_context): Add scan_inclusive field. (scan_omp_1_stmt) : Set ctx->scan_inclusive if inclusive scan. (struct omplow_simd_context): Add lastlane member. (lower_rec_simd_input_clauses): Add rvar argument, handle inscan reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than 1 or 2 argument. (lower_rec_input_clauses): Handle inscan reductions in simd contexts. (lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF. (lower_omp_scan): New function. (lower_omp_1) : Use lower_omp_scan. * tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE check 3rd argument if present rather than 2nd. * tree-vectorizer.h (struct _loop_vec_info): Add scan_map member. (struct _stmt_vec_info): Change simd_lane_access_p from bool into 2-bit bitfield. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather than 2nd. (_loop_vec_info::~_loop_vec_info): Delete scan_map. * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same init. (vect_find_stmt_data_reference): Encode in ->aux the 2nd IFN_GOMP_SIMD_LANE argument. (vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the encoded ->aux value. * tree-vect-stmts.c: Include attribs.h. (vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE. (scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New functions. (vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0. (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1. cp/ * semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN set need_copy_assignment. testsuite/ * gcc.dg/vect/vect-simd-8.c: New test. * gcc.dg/vect/vect-simd-9.c: New test. * g++.dg/vect/simd-2.cc: New test. * g++.dg/gomp/scan-1.C: New test. From-SVN: r272399 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6001d262fb2..c75b08e487b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2019-06-17 Jakub Jelinek + + * omp-low.c (struct omp_context): Add scan_inclusive field. + (scan_omp_1_stmt) : Set ctx->scan_inclusive + if inclusive scan. + (struct omplow_simd_context): Add lastlane member. + (lower_rec_simd_input_clauses): Add rvar argument, handle inscan + reductions. Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than + 1 or 2 argument. + (lower_rec_input_clauses): Handle inscan reductions in simd contexts. + (lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF. + (lower_omp_scan): New function. + (lower_omp_1) : Use lower_omp_scan. + * tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE + check 3rd argument if present rather than 2nd. + * tree-vectorizer.h (struct _loop_vec_info): Add scan_map member. + (struct _stmt_vec_info): Change simd_lane_access_p from bool into + 2-bit bitfield. + * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize + scan_map. For IFN_GOMP_SIMD_LANE check 3rd argument if present rather + than 2nd. + (_loop_vec_info::~_loop_vec_info): Delete scan_map. + * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two + different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same + init. + (vect_find_stmt_data_reference): Encode in ->aux the 2nd + IFN_GOMP_SIMD_LANE argument. + (vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the + encoded ->aux value. + * tree-vect-stmts.c: Include attribs.h. + (vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE. + (scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New + functions. + (vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0. + (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1. + 2019-06-17 Uroš Bizjak PR target/62055 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 7368dafea85..bab08df1ce5 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,8 @@ +2019-06-17 Jakub Jelinek + + * semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN + set need_copy_assignment. + 2019-06-17 Marek Polacek PR c++/83820 - excessive attribute arguments not detected. diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 8e354a2990c..6cf3af71c21 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -7688,6 +7688,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_REDUCTION: if (reduction_seen == -2) OMP_CLAUSE_REDUCTION_INSCAN (c) = 0; + if (OMP_CLAUSE_REDUCTION_INSCAN (c)) + need_copy_assignment = true; need_implicitly_determined = true; break; case OMP_CLAUSE_IN_REDUCTION: diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 962eadae02f..9df21a4d046 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -141,6 +141,9 @@ struct omp_context /* True if lower_omp_1 should look up lastprivate conditional in parent context. */ bool combined_into_simd_safelen0; + + /* True if there is nested scan context with inclusive clause. */ + bool scan_inclusive; }; static splay_tree all_contexts; @@ -3329,11 +3332,15 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, scan_omp_single (as_a (stmt), ctx); break; + case GIMPLE_OMP_SCAN: + if (tree clauses = gimple_omp_scan_clauses (as_a (stmt))) + if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE) + ctx->scan_inclusive = true; + /* FALLTHRU */ case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: case GIMPLE_OMP_ORDERED: case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_SCAN: case GIMPLE_OMP_GRID_BODY: ctx = new_omp_context (stmt, ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); @@ -3671,6 +3678,7 @@ struct omplow_simd_context { omplow_simd_context () { memset (this, 0, sizeof (*this)); } tree idx; tree lane; + tree lastlane; vec simt_eargs; gimple_seq simt_dlist; poly_uint64_pod max_vf; @@ -3682,7 +3690,8 @@ struct omplow_simd_context { static bool lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, - omplow_simd_context *sctx, tree &ivar, tree &lvar) + omplow_simd_context *sctx, tree &ivar, + tree &lvar, tree *rvar = NULL) { if (known_eq (sctx->max_vf, 0U)) { @@ -3738,7 +3747,27 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, = tree_cons (get_identifier ("omp simd array"), NULL, DECL_ATTRIBUTES (avar)); gimple_add_tmp_var (avar); - ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx, + tree iavar = avar; + if (rvar) + { + /* For inscan reductions, create another array temporary, + which will hold the reduced value. */ + iavar = create_tmp_var_raw (atype); + if (TREE_ADDRESSABLE (new_var)) + TREE_ADDRESSABLE (iavar) = 1; + DECL_ATTRIBUTES (iavar) + = tree_cons (get_identifier ("omp simd array"), NULL, + tree_cons (get_identifier ("omp simd inscan"), NULL, + DECL_ATTRIBUTES (iavar))); + gimple_add_tmp_var (iavar); + ctx->cb.decl_map->put (avar, iavar); + if (sctx->lastlane == NULL_TREE) + sctx->lastlane = create_tmp_var (unsigned_type_node); + *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, + sctx->lastlane, NULL_TREE, NULL_TREE); + TREE_THIS_NOTRAP (*rvar) = 1; + } + ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx, NULL_TREE, NULL_TREE); lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane, NULL_TREE, NULL_TREE); @@ -3814,7 +3843,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, omplow_simd_context sctx = omplow_simd_context (); tree simt_lane = NULL_TREE, simtrec = NULL_TREE; tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE; - gimple_seq llist[3] = { }; + gimple_seq llist[4] = { }; tree nonconst_simd_if = NULL_TREE; copyin_seq = NULL; @@ -5155,9 +5184,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } + tree rvar = NULL_TREE, *rvarp = NULL; + if (is_simd + && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) + rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar)) + ivar, lvar, rvarp)) { if (new_vard == new_var) { @@ -5173,6 +5207,93 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = lang_hooks.decls.omp_clause_default_ctor (c, unshare_expr (ivar), build_outer_var_ref (var, ctx)); + if (rvarp) + { + if (x) + { + gimplify_and_add (x, &llist[0]); + + tree ivar2 = unshare_expr (lvar); + TREE_OPERAND (ivar2, 1) = sctx.idx; + x = lang_hooks.decls.omp_clause_default_ctor + (c, ivar2, build_outer_var_ref (var, ctx)); + gimplify_and_add (x, &llist[0]); + + /* For types that need construction, add another + private var which will be default constructed + and optionally initialized with + OMP_CLAUSE_REDUCTION_GIMPLE_INIT, as in the + loop we want to assign this value instead of + constructing and destructing it in each + iteration. */ + tree nv = create_tmp_var_raw (TREE_TYPE (ivar)); + gimple_add_tmp_var (nv); + ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0), + nv); + x = lang_hooks.decls.omp_clause_default_ctor + (c, nv, build_outer_var_ref (var, ctx)); + gimplify_and_add (x, ilist); + + if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) + { + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + x = DECL_VALUE_EXPR (new_var); + SET_DECL_VALUE_EXPR (new_var, nv); + lower_omp (&tseq, ctx); + SET_DECL_VALUE_EXPR (new_var, x); + gimple_seq_add_seq (ilist, tseq); + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + } + + x = lang_hooks.decls.omp_clause_dtor (c, nv); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } + + tree ref = build_outer_var_ref (var, ctx); + x = unshare_expr (ivar); + x = lang_hooks.decls.omp_clause_assign_op (c, x, + ref); + gimplify_and_add (x, &llist[0]); + + ref = build_outer_var_ref (var, ctx); + x = lang_hooks.decls.omp_clause_assign_op (c, ref, + rvar); + gimplify_and_add (x, &llist[3]); + + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + if (new_vard == new_var) + SET_DECL_VALUE_EXPR (new_var, lvar); + else + SET_DECL_VALUE_EXPR (new_vard, + build_fold_addr_expr (lvar)); + + x = lang_hooks.decls.omp_clause_dtor (c, ivar); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (&llist[1], tseq); + } + + tree ivar2 = unshare_expr (lvar); + TREE_OPERAND (ivar2, 1) = sctx.idx; + x = lang_hooks.decls.omp_clause_dtor (c, ivar2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (&llist[1], tseq); + } + break; + } if (x) gimplify_and_add (x, &llist[0]); if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) @@ -5240,6 +5361,41 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, : build_outer_var_ref (var, ctx)); if (x) gimplify_and_add (x, ilist); + + if (rvarp) + { + if (x) + { + tree nv = create_tmp_var_raw (TREE_TYPE (new_vard)); + gimple_add_tmp_var (nv); + ctx->cb.decl_map->put (new_var, nv); + x = lang_hooks.decls.omp_clause_default_ctor + (c, nv, build_outer_var_ref (var, ctx)); + gimplify_and_add (x, ilist); + if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) + { + tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + SET_DECL_VALUE_EXPR (new_var, nv); + DECL_HAS_VALUE_EXPR_P (new_var) = 1; + lower_omp (&tseq, ctx); + SET_DECL_VALUE_EXPR (new_var, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (new_var) = 0; + gimple_seq_add_seq (ilist, tseq); + } + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + x = lang_hooks.decls.omp_clause_dtor (c, nv); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + goto do_dtor; + } + if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) { tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); @@ -5324,12 +5480,32 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } + tree rvar = NULL_TREE, *rvarp = NULL; + if (is_simd + && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) + rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar)) + ivar, lvar, rvarp)) { + if (new_vard != new_var) + { + SET_DECL_VALUE_EXPR (new_vard, + build_fold_addr_expr (lvar)); + DECL_HAS_VALUE_EXPR_P (new_vard) = 1; + } + tree ref = build_outer_var_ref (var, ctx); + if (rvarp) + { + gimplify_assign (ivar, ref, &llist[0]); + ref = build_outer_var_ref (var, ctx); + gimplify_assign (ref, rvar, &llist[3]); + break; + } + gimplify_assign (unshare_expr (ivar), x, &llist[0]); if (sctx.is_simt) @@ -5346,14 +5522,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, ref = build_outer_var_ref (var, ctx); gimplify_assign (ref, x, &llist[1]); - if (new_vard != new_var) - { - SET_DECL_VALUE_EXPR (new_vard, - build_fold_addr_expr (lvar)); - DECL_HAS_VALUE_EXPR_P (new_vard) = 1; - } } - else + else if (rvarp == NULL) { if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); @@ -5456,14 +5626,23 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (sctx.lane) { gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, - 1 + (nonconst_simd_if != NULL), - uid, nonconst_simd_if); + 2 + (nonconst_simd_if != NULL), + uid, integer_zero_node, + nonconst_simd_if); gimple_call_set_lhs (g, sctx.lane); gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt)); gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT); g = gimple_build_assign (sctx.lane, INTEGER_CST, build_int_cst (unsigned_type_node, 0)); gimple_seq_add_stmt (ilist, g); + if (sctx.lastlane) + { + g = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE, + 2, uid, sctx.lane); + gimple_call_set_lhs (g, sctx.lastlane); + gimple_seq_add_stmt (dlist, g); + gimple_seq_add_seq (dlist, llist[3]); + } /* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */ if (llist[2]) { @@ -5865,6 +6044,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p, new_var = build4 (ARRAY_REF, TREE_TYPE (val), TREE_OPERAND (val, 0), lastlane, NULL_TREE, NULL_TREE); + TREE_THIS_NOTRAP (new_var) = 1; } } else if (maybe_simt) @@ -8371,6 +8551,167 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx) } +/* Expand code for an OpenMP scan directive and the structured block + before the scan directive. */ + +static void +lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) +{ + gimple *stmt = gsi_stmt (*gsi_p); + bool has_clauses + = gimple_omp_scan_clauses (as_a (stmt)) != NULL; + tree lane = NULL_TREE; + gimple_seq before = NULL; + omp_context *octx = ctx->outer; + gcc_assert (octx); + bool input_phase = has_clauses ^ octx->scan_inclusive; + if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) + && !gimple_omp_for_combined_into_p (octx->stmt) + && octx->scan_inclusive) + { + if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), + OMP_CLAUSE__SIMDUID_)) + { + tree uid = OMP_CLAUSE__SIMDUID__DECL (c); + lane = create_tmp_var (unsigned_type_node); + tree t = build_int_cst (integer_type_node, 1 + !input_phase); + gimple *g + = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); + gimple_call_set_lhs (g, lane); + gimple_seq_add_stmt (&before, g); + } + for (tree c = gimple_omp_for_clauses (octx->stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_INSCAN (c)) + { + tree var = OMP_CLAUSE_DECL (c); + tree new_var = lookup_decl (var, octx); + tree val = new_var; + tree var2 = NULL_TREE; + tree var3 = NULL_TREE; + if (DECL_HAS_VALUE_EXPR_P (new_var)) + { + val = DECL_VALUE_EXPR (new_var); + if (TREE_CODE (val) == ARRAY_REF + && VAR_P (TREE_OPERAND (val, 0))) + { + tree v = TREE_OPERAND (val, 0); + if (lookup_attribute ("omp simd array", + DECL_ATTRIBUTES (v))) + { + val = unshare_expr (val); + TREE_OPERAND (val, 1) = lane; + var2 = lookup_decl (v, octx); + if (input_phase + && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + var3 = maybe_lookup_decl (var2, octx); + if (!input_phase) + { + var2 = build4 (ARRAY_REF, TREE_TYPE (val), + var2, lane, NULL_TREE, NULL_TREE); + TREE_THIS_NOTRAP (var2) = 1; + } + else + var2 = val; + } + } + } + else + { + var2 = build_outer_var_ref (var, octx); + if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + var3 = maybe_lookup_decl (new_var, octx); + if (var3 == new_var) + var3 = NULL_TREE; + } + } + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); + if (input_phase) + { + if (var3) + { + /* If we've added a separate identity element + variable, copy it over into val. */ + tree x = lang_hooks.decls.omp_clause_assign_op (c, val, + var3); + gimplify_and_add (x, &before); + } + else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) + { + /* Otherwise, assign to it the identity element. */ + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c); + tree x = (DECL_HAS_VALUE_EXPR_P (new_var) + ? DECL_VALUE_EXPR (new_var) : NULL_TREE); + tree ref = build_outer_var_ref (var, octx); + SET_DECL_VALUE_EXPR (new_var, val); + SET_DECL_VALUE_EXPR (placeholder, ref); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + lower_omp (&tseq, octx); + SET_DECL_VALUE_EXPR (new_var, x); + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + if (x == NULL_TREE) + DECL_HAS_VALUE_EXPR_P (new_var) = 0; + gimple_seq_add_seq (&before, tseq); + OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + } + } + else + { + gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); + tree x = (DECL_HAS_VALUE_EXPR_P (new_var) + ? DECL_VALUE_EXPR (new_var) : NULL_TREE); + SET_DECL_VALUE_EXPR (new_var, val); + SET_DECL_VALUE_EXPR (placeholder, var2); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + lower_omp (&tseq, octx); + gimple_seq_add_seq (&before, tseq); + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; + SET_DECL_VALUE_EXPR (new_var, x); + SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + x = lang_hooks.decls.omp_clause_assign_op (c, val, var2); + gimplify_and_add (x, &before); + } + } + else + { + if (input_phase) + { + /* input phase. Set val to initializer before + the body. */ + tree x = omp_reduction_init (c, TREE_TYPE (new_var)); + gimplify_assign (val, x, &before); + } + else + { + /* scan phase. */ + enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); + if (code == MINUS_EXPR) + code = PLUS_EXPR; + + tree x = build2 (code, TREE_TYPE (var2), + unshare_expr (var2), unshare_expr (val)); + gimplify_assign (unshare_expr (var2), x, &before); + gimplify_assign (val, var2, &before); + } + } + } + } + else if (has_clauses) + sorry_at (gimple_location (stmt), + "%<#pragma omp scan%> not supported yet"); + gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); + gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT); + gsi_replace (gsi_p, gimple_build_nop (), true); +} + + /* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple substitution of a couple of function calls. But in the NAMED case, requires that languages coordinate a symbol name. It is therefore @@ -10843,11 +11184,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GIMPLE_OMP_SCAN: ctx = maybe_lookup_ctx (stmt); gcc_assert (ctx); - gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT); - if (gimple_omp_scan_clauses (as_a (stmt))) - sorry_at (gimple_location (stmt), - "%<#pragma omp scan%> not supported yet"); - gsi_replace (gsi_p, gimple_build_nop (), true); + lower_omp_scan (gsi_p, ctx); break; case GIMPLE_OMP_CRITICAL: ctx = maybe_lookup_ctx (stmt); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d2ea2f7a093..27a522e0140 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-06-17 Jakub Jelinek + + * gcc.dg/vect/vect-simd-8.c: New test. + * gcc.dg/vect/vect-simd-9.c: New test. + * g++.dg/vect/simd-2.cc: New test. + * g++.dg/gomp/scan-1.C: New test. + 2019-06-17 Uroš Bizjak PR target/62055 diff --git a/gcc/testsuite/g++.dg/gomp/scan-1.C b/gcc/testsuite/g++.dg/gomp/scan-1.C new file mode 100644 index 00000000000..6fd3320d656 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/scan-1.C @@ -0,0 +1,26 @@ +// { dg-do compile { target c++11 } } + +struct S { S (); ~S (); S &operator = (const S &) = delete; int s; }; // { dg-message "declared here" } +#pragma omp declare reduction (+ : S : omp_out.s += omp_in.s) + +S s; + +void +foo (void) +{ + #pragma omp simd reduction (+: s) + for (int i = 0; i < 64; ++i) + s.s += i; +} + +void +bar (int *x) +{ + #pragma omp simd reduction (inscan, +: s) // { dg-error "use of deleted function" } + for (int i = 0; i < 64; ++i) + { + s.s += i; + #pragma omp scan inclusive (s) // { dg-error "" } + x[i] = s.s; + } +} diff --git a/gcc/testsuite/g++.dg/vect/simd-2.cc b/gcc/testsuite/g++.dg/vect/simd-2.cc new file mode 100644 index 00000000000..3160495761c --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/simd-2.cc @@ -0,0 +1,153 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp simd reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return S (s); +} + +__attribute__((noipa)) void +baz (S *a, S *b) +{ + #pragma omp simd reduction (inscan, +:r) simdlen(1) + for (int i = 0; i < 1024; i++) + { + r.s += a[i].s; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp simd if (0) reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + s.s += 2 * a[i].s; + #pragma omp scan inclusive(s) + b[i] = s; + } + return S (s); +} + +int +main () +{ + S s; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } + r.s = 0; + baz (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += i; + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + s.s += 2 * i; + if (b[i].s != s.s) + abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-8.c b/gcc/testsuite/gcc.dg/vect/vect-simd-8.c new file mode 100644 index 00000000000..123a201ce7f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-8.c @@ -0,0 +1,118 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +int r, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-9.c b/gcc/testsuite/gcc.dg/vect/vect-simd-9.c new file mode 100644 index 00000000000..ce5ae577100 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-9.c @@ -0,0 +1,120 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +int r, a[1024], b[1024]; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + r += a[i]; + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + s += 2 * a[i]; + #pragma omp scan inclusive(s) + b[i] = s; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index a38899edd6c..c73fbabfe29 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -1339,14 +1339,14 @@ eliminate_unnecessary_stmts (void) update_stmt (stmt); release_ssa_name (name); - /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON + /* GOMP_SIMD_LANE (unless three argument) or ASAN_POISON without lhs is not needed. */ if (gimple_call_internal_p (stmt)) switch (gimple_call_internal_fn (stmt)) { case IFN_GOMP_SIMD_LANE: - if (gimple_call_num_args (stmt) >= 2 - && !integer_nonzerop (gimple_call_arg (stmt, 1))) + if (gimple_call_num_args (stmt) >= 3 + && !integer_nonzerop (gimple_call_arg (stmt, 2))) break; /* FALLTHRU */ case IFN_ASAN_POISON: diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 55d87f8f59f..f2f0d231711 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -3003,6 +3003,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) || TREE_CODE (DR_INIT (drb)) != INTEGER_CST) break; + /* Different .GOMP_SIMD_LANE calls still give the same lane, + just hold extra information. */ + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a) + && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b) + && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0) + break; + /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); @@ -4101,7 +4108,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step); /* Mark as simd-lane access. */ - newdr->aux = (void *)-1; + tree arg2 = gimple_call_arg (def, 1); + newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); free_data_ref (dr); datarefs->safe_push (newdr); return opt_result::success (); @@ -4210,14 +4218,17 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) } /* See if this was detected as SIMD lane access. */ - if (dr->aux == (void *)-1) + if (dr->aux == (void *)-1 + || dr->aux == (void *)-2 + || dr->aux == (void *)-3) { if (nested_in_vect_loop_p (loop, stmt_info)) return opt_result::failure_at (stmt_info->stmt, "not vectorized:" " data ref analysis failed: %G", stmt_info->stmt); - STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true; + STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) + = -(uintptr_t) dr->aux; } tree base = get_base_address (DR_REF (dr)); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 671ef2f7658..92a7c29d077 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -824,6 +824,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) peeling_for_alignment (0), ptr_mask (0), ivexpr_map (NULL), + scan_map (NULL), slp_unrolling_factor (1), single_scalar_iteration_cost (0), vectorizable (false), @@ -863,8 +864,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) gimple *stmt = gsi_stmt (si); gimple_set_uid (stmt, 0); add_stmt (stmt); - /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the - second argument is the #pragma omp simd if (x) condition, when 0, + /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the + third argument is the #pragma omp simd if (x) condition, when 0, loop shouldn't be vectorized, when non-zero constant, it should be vectorized normally, otherwise versioned with vectorized loop done if the condition is non-zero at runtime. */ @@ -872,12 +873,12 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) && is_gimple_call (stmt) && gimple_call_internal_p (stmt) && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE - && gimple_call_num_args (stmt) >= 2 + && gimple_call_num_args (stmt) >= 3 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME && (loop_in->simduid == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))) { - tree arg = gimple_call_arg (stmt, 1); + tree arg = gimple_call_arg (stmt, 2); if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME) simd_if_cond = arg; else @@ -959,6 +960,7 @@ _loop_vec_info::~_loop_vec_info () release_vec_loop_masks (&masks); delete ivexpr_map; + delete scan_map; loop->aux = NULL; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 21046931243..790b49bcb4b 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-niter.h" #include "gimple-fold.h" #include "regs.h" +#include "attribs.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -3257,7 +3258,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (nargs == 0 || nargs > 4) return false; - /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */ + /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */ combined_fn cfn = gimple_call_combined_fn (stmt); if (cfn == CFN_GOMP_SIMD_LANE) { @@ -6320,6 +6321,489 @@ get_group_alias_ptr_type (stmt_vec_info first_stmt_info) } +/* Function scan_operand_equal_p. + + Helper function for check_scan_store. Compare two references + with .GOMP_SIMD_LANE bases. */ + +static bool +scan_operand_equal_p (tree ref1, tree ref2) +{ + machine_mode mode1, mode2; + poly_int64 bitsize1, bitsize2, bitpos1, bitpos2; + tree offset1, offset2; + int unsignedp1, unsignedp2, reversep1, reversep2; + int volatilep1 = 0, volatilep2 = 0; + tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1, + &mode1, &unsignedp1, &reversep1, + &volatilep1); + tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2, + &mode2, &unsignedp2, &reversep2, + &volatilep2); + if (reversep1 || reversep2 || volatilep1 || volatilep2) + return false; + if (!operand_equal_p (base1, base2, 0)) + return false; + if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0)) + return false; + if (maybe_ne (bitsize1, bitsize2)) + return false; + if (!operand_equal_p (offset1, offset2, 0)) + return false; + return true; +} + + +/* Function check_scan_store. + + Check magic stores for #pragma omp scan {in,ex}clusive reductions. */ + +static bool +check_scan_store (stmt_vec_info stmt_info, tree vectype, + enum vect_def_type rhs_dt, bool slp, tree mask, + vect_memory_access_type memory_access_type) +{ + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); + tree ref_type; + + gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); + if (slp + || mask + || memory_access_type != VMAT_CONTIGUOUS + || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) + || loop_vinfo == NULL + || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + || STMT_VINFO_GROUPED_ACCESS (stmt_info) + || !integer_zerop (DR_OFFSET (dr_info->dr)) + || !integer_zerop (DR_INIT (dr_info->dr)) + || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr))) + || !alias_sets_conflict_p (get_alias_set (vectype), + get_alias_set (TREE_TYPE (ref_type)))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported OpenMP scan store.\n"); + return false; + } + + /* We need to pattern match code built by OpenMP lowering and simplified + by following optimizations into something we can handle. + #pragma omp simd reduction(inscan,+:r) + for (...) + { + r += something (); + #pragma omp scan inclusive (r) + use (r); + } + shall have body with: + // Initialization for input phase, store the reduction initializer: + _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); + _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); + D.2042[_21] = 0; + // Actual input phase: + ... + r.0_5 = D.2042[_20]; + _6 = _4 + r.0_5; + D.2042[_20] = _6; + // Initialization for scan phase: + _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2); + _26 = D.2043[_25]; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + D.2042[_25] = _28; + // Actual scan phase: + ... + r.1_8 = D.2042[_20]; + ... + The "omp simd array" variable D.2042 holds the privatized copy used + inside of the loop and D.2043 is another one that holds copies of + the current original list item. The separate GOMP_SIMD_LANE ifn + kinds are there in order to allow optimizing the initializer store + and combiner sequence, e.g. if it is originally some C++ish user + defined reduction, but allow the vectorizer to pattern recognize it + and turn into the appropriate vectorized scan. */ + + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) + { + /* Match the D.2042[_21] = 0; store above. Just require that + it is a constant or external definition store. */ + if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def) + { + fail_init: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported OpenMP scan initializer store.\n"); + return false; + } + + if (! loop_vinfo->scan_map) + loop_vinfo->scan_map = new hash_map; + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + tree &cached = loop_vinfo->scan_map->get_or_insert (var); + if (cached) + goto fail_init; + cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info)); + + /* These stores can be vectorized normally. */ + return true; + } + + if (rhs_dt != vect_internal_def) + { + fail: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported OpenMP scan combiner pattern.\n"); + return false; + } + + gimple *stmt = STMT_VINFO_STMT (stmt_info); + tree rhs = gimple_assign_rhs1 (stmt); + if (TREE_CODE (rhs) != SSA_NAME) + goto fail; + + use_operand_p use_p; + imm_use_iterator iter; + gimple *other_store_stmt = NULL; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + if (gimple_bb (use_stmt) != gimple_bb (stmt) + || !gimple_store_p (use_stmt) + || other_store_stmt) + goto fail; + other_store_stmt = use_stmt; + } + if (other_store_stmt == NULL) + goto fail; + stmt_vec_info other_store_stmt_info + = loop_vinfo->lookup_stmt (other_store_stmt); + if (other_store_stmt_info == NULL + || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3) + goto fail; + + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); + if (gimple_bb (def_stmt) != gimple_bb (stmt) + || !is_gimple_assign (def_stmt) + || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS) + goto fail; + + enum tree_code code = gimple_assign_rhs_code (def_stmt); + /* For pointer addition, we should use the normal plus for the vector + operation. */ + switch (code) + { + case POINTER_PLUS_EXPR: + code = PLUS_EXPR; + break; + case MULT_HIGHPART_EXPR: + goto fail; + default: + break; + } + if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code)) + goto fail; + + tree rhs1 = gimple_assign_rhs1 (def_stmt); + tree rhs2 = gimple_assign_rhs2 (def_stmt); + if (TREE_CODE (rhs1) != SSA_NAME + || TREE_CODE (rhs2) != SSA_NAME) + goto fail; + + gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); + gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); + if (gimple_bb (load1_stmt) != gimple_bb (stmt) + || !gimple_assign_load_p (load1_stmt) + || gimple_bb (load2_stmt) != gimple_bb (stmt) + || !gimple_assign_load_p (load2_stmt)) + goto fail; + + stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); + stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); + if (load1_stmt_info == NULL + || load2_stmt_info == NULL + || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3 + || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3) + goto fail; + + if (scan_operand_equal_p (gimple_assign_lhs (stmt), + gimple_assign_rhs1 (load2_stmt))) + { + std::swap (rhs1, rhs2); + std::swap (load1_stmt, load2_stmt); + std::swap (load1_stmt_info, load2_stmt_info); + } + if (!scan_operand_equal_p (gimple_assign_lhs (stmt), + gimple_assign_rhs1 (load1_stmt)) + || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt), + gimple_assign_rhs1 (load2_stmt))) + goto fail; + + dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); + if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0))) + goto fail; + + tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0); + if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1)) + || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2)) + || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) + == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2)))) + goto fail; + + if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) + std::swap (var1, var2); + + if (loop_vinfo->scan_map == NULL) + goto fail; + tree *init = loop_vinfo->scan_map->get (var1); + if (init == NULL) + goto fail; + + /* The IL is as expected, now check if we can actually vectorize it. + _26 = D.2043[_25]; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + D.2042[_25] = _28; + should be vectorized as (where _40 is the vectorized rhs + from the D.2042[_21] = 0; store): + _30 = MEM [(int *)&D.2043]; + _31 = MEM [(int *)&D.2042]; + _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>; + _33 = _31 + _32; + // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; + _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>; + _35 = _33 + _34; + // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[1]+.._31[4], ... _31[4]+.._31[7] }; + _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>; + _37 = _35 + _36; + // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[0]+.._31[4], ... _31[0]+.._31[7] }; + _38 = _30 + _37; + _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; + MEM [(int *)&D.2043] = _39; + MEM [(int *)&D.2042] = _38; */ + enum machine_mode vec_mode = TYPE_MODE (vectype); + optab optab = optab_for_tree_code (code, vectype, optab_default); + if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) + goto fail; + + unsigned HOST_WIDE_INT nunits; + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) + goto fail; + int units_log2 = exact_log2 (nunits); + if (units_log2 <= 0) + goto fail; + + for (int i = 0; i <= units_log2; ++i) + { + unsigned HOST_WIDE_INT j, k; + vec_perm_builder sel (nunits, nunits, 1); + sel.quick_grow (nunits); + if (i == units_log2) + { + for (j = 0; j < nunits; ++j) + sel[j] = nunits - 1; + } + else + { + for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) + sel[j] = nunits + j; + for (k = 0; j < nunits; ++j, ++k) + sel[j] = k; + } + vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); + if (!can_vec_perm_const_p (vec_mode, indices)) + goto fail; + } + + return true; +} + + +/* Function vectorizable_scan_store. + + Helper of vectorizable_score, arguments like on vectorizable_store. + Handle only the transformation, checking is done in check_scan_store. */ + +static bool +vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + stmt_vec_info *vec_stmt, int ncopies) +{ + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); + tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)); + vec_info *vinfo = stmt_info->vinfo; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "transform scan store. ncopies = %d\n", ncopies); + + gimple *stmt = STMT_VINFO_STMT (stmt_info); + tree rhs = gimple_assign_rhs1 (stmt); + gcc_assert (TREE_CODE (rhs) == SSA_NAME); + + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); + enum tree_code code = gimple_assign_rhs_code (def_stmt); + if (code == POINTER_PLUS_EXPR) + code = PLUS_EXPR; + gcc_assert (TREE_CODE_LENGTH (code) == binary_op + && commutative_tree_code (code)); + tree rhs1 = gimple_assign_rhs1 (def_stmt); + tree rhs2 = gimple_assign_rhs2 (def_stmt); + gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME); + gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); + gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2); + stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt); + stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); + dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); + dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); + tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); + tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); + + if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) + { + std::swap (rhs1, rhs2); + std::swap (var1, var2); + } + + tree *init = loop_vinfo->scan_map->get (var1); + gcc_assert (init); + + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + bool inscan_var_store + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; + + unsigned HOST_WIDE_INT nunits; + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) + gcc_unreachable (); + int units_log2 = exact_log2 (nunits); + gcc_assert (units_log2 > 0); + auto_vec perms; + perms.quick_grow (units_log2 + 1); + for (int i = 0; i <= units_log2; ++i) + { + unsigned HOST_WIDE_INT j, k; + vec_perm_builder sel (nunits, nunits, 1); + sel.quick_grow (nunits); + if (i == units_log2) + { + for (j = 0; j < nunits; ++j) + sel[j] = nunits - 1; + } + else + { + for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) + sel[j] = nunits + j; + for (k = 0; j < nunits; ++j, ++k) + sel[j] = k; + } + vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); + perms[i] = vect_gen_perm_mask_checked (vectype, indices); + } + + stmt_vec_info prev_stmt_info = NULL; + tree vec_oprnd1 = NULL_TREE; + tree vec_oprnd2 = NULL_TREE; + tree vec_oprnd3 = NULL_TREE; + tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr)); + tree dataref_offset = build_int_cst (ref_type, 0); + tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS); + tree orig = NULL_TREE; + for (int j = 0; j < ncopies; j++) + { + stmt_vec_info new_stmt_info; + if (j == 0) + { + vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); + vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); + vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); + orig = vec_oprnd3; + } + else + { + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); + vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); + if (!inscan_var_store) + dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); + } + + tree v = vec_oprnd2; + for (int i = 0; i < units_log2; ++i) + { + tree new_temp = make_ssa_name (vectype); + gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v, + vec_oprnd1, perms[i]); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + if (prev_stmt_info == NULL) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + + tree new_temp2 = make_ssa_name (vectype); + g = gimple_build_assign (new_temp2, code, v, new_temp); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + + v = new_temp2; + } + + tree new_temp = make_ssa_name (vectype); + gimple *g = gimple_build_assign (new_temp, code, orig, v); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + + orig = make_ssa_name (vectype); + g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp, + perms[units_log2]); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + + if (!inscan_var_store) + { + tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + dataref_offset); + vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); + g = gimple_build_assign (data_ref, new_temp); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + } + + if (inscan_var_store) + for (int j = 0; j < ncopies; j++) + { + if (j != 0) + dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); + + tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + dataref_offset); + vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); + gimple *g = gimple_build_assign (data_ref, orig); + stmt_vec_info new_stmt_info + = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + return true; +} + + /* Function vectorizable_store. Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure) @@ -6514,6 +6998,13 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, group_size = vec_num = 1; } + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt) + { + if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask, + memory_access_type)) + return false; + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; @@ -6737,6 +7228,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, } return true; } + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) + return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++; @@ -7162,7 +7655,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, gcc_assert (useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd))); bool simd_lane_access_p - = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); + = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; if (simd_lane_access_p && !loop_masks && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR @@ -8347,7 +8840,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (j == 0) { bool simd_lane_access_p - = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info); + = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0; if (simd_lane_access_p && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 19a8af090a6..df828ff0a40 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -491,6 +491,10 @@ typedef struct _loop_vec_info : public vec_info { /* Map of IV base/step expressions to inserted name in the preheader. */ hash_map *ivexpr_map; + /* Map of OpenMP "omp simd array" scan variables to corresponding + rhs of the store of the initializer. */ + hash_map *scan_map; + /* The unrolling factor needed to SLP the loop. In case of that pure SLP is applied to the loop, i.e., no unrolling is needed, this is 1. */ poly_uint64 slp_unrolling_factor; @@ -913,7 +917,7 @@ struct _stmt_vec_info { bool strided_p; /* For both loads and stores. */ - bool simd_lane_access_p; + unsigned simd_lane_access_p : 2; /* Classifies how the load or store is going to be implemented for loop vectorization. */