From 1612b1febdd5b6b6ad17c96c980a218bfd40c025 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 21 Jun 2019 08:48:57 +0200 Subject: [PATCH] omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument... * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument, create another "omp scan inscan exclusive" array if !ctx->scan_inclusive. (lower_rec_input_clauses): Handle exclusive scan inscan reductions. (lower_omp_scan): Likewise. * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of 2-bit bitfield for simd_lane_access_p member. * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle aux == (void *)-4 as simd lane access. * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update comment with permutations to show the canonical permutation order. (vectorizable_scan_store): Handle exclusive scan. (vectorizable_store): Call vectorizable_scan_store even for STMT_VINFO_SIMD_LANE_ACCESS_P > 3. * gcc.dg/vect/vect-simd-12.c: New test. * gcc.dg/vect/vect-simd-13.c: New test. * gcc.dg/vect/vect-simd-14.c: New test. * gcc.dg/vect/vect-simd-15.c: New test. * gcc.target/i386/sse2-vect-simd-12.c: New test. * gcc.target/i386/sse2-vect-simd-13.c: New test. * gcc.target/i386/sse2-vect-simd-14.c: New test. * gcc.target/i386/sse2-vect-simd-15.c: New test. * gcc.target/i386/avx2-vect-simd-12.c: New test. * gcc.target/i386/avx2-vect-simd-13.c: New test. * gcc.target/i386/avx2-vect-simd-14.c: New test. * gcc.target/i386/avx2-vect-simd-15.c: New test. * gcc.target/i386/avx512f-vect-simd-12.c: New test. * gcc.target/i386/avx512f-vect-simd-13.c: New test. * gcc.target/i386/avx512f-vect-simd-14.c: New test. * gcc.target/i386/avx512bw-vect-simd-15.c: New test. * g++.dg/vect/simd-6.cc: New test. * g++.dg/vect/simd-7.cc: New test. * g++.dg/vect/simd-8.cc: New test. * g++.dg/vect/simd-9.cc: New test. * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics. From-SVN: r272544 --- gcc/ChangeLog | 15 + gcc/omp-low.c | 200 ++++++++++-- gcc/testsuite/ChangeLog | 22 ++ gcc/testsuite/c-c++-common/gomp/scan-2.c | 2 +- gcc/testsuite/g++.dg/vect/simd-6.cc | 161 ++++++++++ gcc/testsuite/g++.dg/vect/simd-7.cc | 124 ++++++++ gcc/testsuite/g++.dg/vect/simd-8.cc | 122 +++++++ gcc/testsuite/g++.dg/vect/simd-9.cc | 153 +++++++++ gcc/testsuite/gcc.dg/vect/vect-simd-12.c | 122 +++++++ gcc/testsuite/gcc.dg/vect/vect-simd-13.c | 124 ++++++++ gcc/testsuite/gcc.dg/vect/vect-simd-14.c | 94 ++++++ gcc/testsuite/gcc.dg/vect/vect-simd-15.c | 186 +++++++++++ .../gcc.target/i386/avx2-vect-simd-12.c | 16 + .../gcc.target/i386/avx2-vect-simd-13.c | 16 + .../gcc.target/i386/avx2-vect-simd-14.c | 16 + .../gcc.target/i386/avx2-vect-simd-15.c | 16 + .../gcc.target/i386/avx512bw-vect-simd-15.c | 16 + .../gcc.target/i386/avx512f-vect-simd-12.c | 16 + .../gcc.target/i386/avx512f-vect-simd-13.c | 16 + .../gcc.target/i386/avx512f-vect-simd-14.c | 16 + .../gcc.target/i386/sse2-vect-simd-12.c | 16 + .../gcc.target/i386/sse2-vect-simd-13.c | 16 + .../gcc.target/i386/sse2-vect-simd-14.c | 15 + .../gcc.target/i386/sse2-vect-simd-15.c | 16 + gcc/tree-vect-data-refs.c | 3 +- gcc/tree-vect-stmts.c | 299 +++++++++++++++--- gcc/tree-vectorizer.h | 2 +- 27 files changed, 1757 insertions(+), 63 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/simd-6.cc create mode 100644 gcc/testsuite/g++.dg/vect/simd-7.cc create mode 100644 gcc/testsuite/g++.dg/vect/simd-8.cc create mode 100644 gcc/testsuite/g++.dg/vect/simd-9.cc create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-12.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-13.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-14.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-15.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c create mode 100644 gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b585b329b83..3a6b86bd266 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,20 @@ 2019-06-21 Jakub Jelinek + * omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument, + create another "omp scan inscan exclusive" array if + !ctx->scan_inclusive. + (lower_rec_input_clauses): Handle exclusive scan inscan reductions. + (lower_omp_scan): Likewise. + * tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of + 2-bit bitfield for simd_lane_access_p member. + * tree-vect-data-refs.c (vect_analyze_data_refs): Also handle + aux == (void *)-4 as simd lane access. + * tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update + comment with permutations to show the canonical permutation order. + (vectorizable_scan_store): Handle exclusive scan. + (vectorizable_store): Call vectorizable_scan_store even for + STMT_VINFO_SIMD_LANE_ACCESS_P > 3. + * tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle "omp simd array" arrays with one byte elements. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index ba85b1914c0..6b1e6a8a624 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3692,7 +3692,8 @@ struct omplow_simd_context { static bool lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, omplow_simd_context *sctx, tree &ivar, - tree &lvar, tree *rvar = NULL) + tree &lvar, tree *rvar = NULL, + tree *rvar2 = NULL) { if (known_eq (sctx->max_vf, 0U)) { @@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->lastlane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (*rvar) = 1; + + if (!ctx->scan_inclusive) + { + /* And for exclusive scan yet another one, which will + hold the value during the scan phase. */ + tree savar = create_tmp_var_raw (atype); + if (TREE_ADDRESSABLE (new_var)) + TREE_ADDRESSABLE (savar) = 1; + DECL_ATTRIBUTES (savar) + = tree_cons (get_identifier ("omp simd array"), NULL, + tree_cons (get_identifier ("omp simd inscan " + "exclusive"), NULL, + DECL_ATTRIBUTES (savar))); + gimple_add_tmp_var (savar); + ctx->cb.decl_map->put (iavar, savar); + *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar, + sctx->idx, NULL_TREE, NULL_TREE); + TREE_THIS_NOTRAP (*rvar2) = 1; + } } ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx, NULL_TREE, NULL_TREE); @@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } - tree rvar = NULL_TREE, *rvarp = NULL; + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; if (is_simd && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_REDUCTION_INSCAN (c)) rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar, rvarp)) + ivar, lvar, rvarp, + &rvar2)) { if (new_vard == new_var) { @@ -5220,6 +5241,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, (c, ivar2, build_outer_var_ref (var, ctx)); gimplify_and_add (x, &llist[0]); + if (rvar2) + { + x = lang_hooks.decls.omp_clause_default_ctor + (c, unshare_expr (rvar2), + build_outer_var_ref (var, ctx)); + gimplify_and_add (x, &llist[0]); + } + /* For types that need construction, add another private var which will be default constructed and optionally initialized with @@ -5229,7 +5258,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, iteration. */ tree nv = create_tmp_var_raw (TREE_TYPE (ivar)); gimple_add_tmp_var (nv); - ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0), + ctx->cb.decl_map->put (TREE_OPERAND (rvar2 + ? rvar2 + : ivar, 0), nv); x = lang_hooks.decls.omp_clause_default_ctor (c, nv, build_outer_var_ref (var, ctx)); @@ -5296,6 +5327,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_stmt (&dtor, &tseq); gimple_seq_add_seq (&llist[1], tseq); } + + if (rvar2) + { + x = lang_hooks.decls.omp_clause_dtor (c, rvar2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (&llist[1], tseq); + } + } break; } if (x) @@ -5390,6 +5433,24 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple_seq_add_seq (ilist, tseq); } OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL; + if (!ctx->scan_inclusive) + { + tree nv2 + = create_tmp_var_raw (TREE_TYPE (new_var)); + gimple_add_tmp_var (nv2); + ctx->cb.decl_map->put (nv, nv2); + x = lang_hooks.decls.omp_clause_default_ctor + (c, nv2, build_outer_var_ref (var, ctx)); + gimplify_and_add (x, ilist); + x = lang_hooks.decls.omp_clause_dtor (c, nv2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } x = lang_hooks.decls.omp_clause_dtor (c, nv); if (x) { @@ -5399,6 +5460,21 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple_seq_add_seq (dlist, tseq); } } + else if (!ctx->scan_inclusive + && TREE_ADDRESSABLE (TREE_TYPE (new_var))) + { + tree nv2 = create_tmp_var_raw (TREE_TYPE (new_var)); + gimple_add_tmp_var (nv2); + ctx->cb.decl_map->put (new_vard, nv2); + x = lang_hooks.decls.omp_clause_dtor (c, nv2); + if (x) + { + tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } + } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; goto do_dtor; } @@ -5487,14 +5563,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, new_vard = TREE_OPERAND (new_var, 0); gcc_assert (DECL_P (new_vard)); } - tree rvar = NULL_TREE, *rvarp = NULL; + tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE; if (is_simd && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION && OMP_CLAUSE_REDUCTION_INSCAN (c)) rvarp = &rvar; if (is_simd && lower_rec_simd_input_clauses (new_var, ctx, &sctx, - ivar, lvar, rvarp)) + ivar, lvar, rvarp, + &rvar2)) { if (new_vard != new_var) { @@ -8573,18 +8650,40 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq before = NULL; omp_context *octx = ctx->outer; gcc_assert (octx); + if (!octx->scan_inclusive && !has_clauses) + { + gimple_stmt_iterator gsi2 = *gsi_p; + gsi_next (&gsi2); + gimple *stmt2 = gsi_stmt (gsi2); + /* For exclusive scan, swap GIMPLE_OMP_SCAN without clauses + with following GIMPLE_OMP_SCAN with clauses, so that input_phase, + the one with exclusive clause(s), comes first. */ + if (stmt2 + && gimple_code (stmt2) == GIMPLE_OMP_SCAN + && gimple_omp_scan_clauses (as_a (stmt2)) != NULL) + { + gsi_remove (gsi_p, false); + gsi_insert_after (gsi_p, stmt, GSI_SAME_STMT); + ctx = maybe_lookup_ctx (stmt2); + gcc_assert (ctx); + lower_omp_scan (gsi_p, ctx); + return; + } + } + bool input_phase = has_clauses ^ octx->scan_inclusive; if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD) - && !gimple_omp_for_combined_into_p (octx->stmt) - && octx->scan_inclusive) + && !gimple_omp_for_combined_into_p (octx->stmt)) { if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt), OMP_CLAUSE__SIMDUID_)) { tree uid = OMP_CLAUSE__SIMDUID__DECL (c); lane = create_tmp_var (unsigned_type_node); - tree t = build_int_cst (integer_type_node, 1 + !input_phase); + tree t = build_int_cst (integer_type_node, + input_phase ? 1 + : octx->scan_inclusive ? 2 : 3); gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t); gimple_call_set_lhs (g, lane); @@ -8601,6 +8700,8 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree val = new_var; tree var2 = NULL_TREE; tree var3 = NULL_TREE; + tree var4 = NULL_TREE; + tree lane0 = NULL_TREE; tree new_vard = new_var; if (omp_is_reference (var)) { @@ -8623,16 +8724,26 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) DECL_ATTRIBUTES (v))) { val = unshare_expr (val); + lane0 = TREE_OPERAND (val, 1); TREE_OPERAND (val, 1) = lane; var2 = lookup_decl (v, octx); + if (!octx->scan_inclusive) + var4 = lookup_decl (var2, octx); if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) - var3 = maybe_lookup_decl (var2, octx); + var3 = maybe_lookup_decl (var4 ? var4 : var2, octx); if (!input_phase) { var2 = build4 (ARRAY_REF, TREE_TYPE (val), var2, lane, NULL_TREE, NULL_TREE); TREE_THIS_NOTRAP (var2) = 1; + if (!octx->scan_inclusive) + { + var4 = build4 (ARRAY_REF, TREE_TYPE (val), + var4, lane, NULL_TREE, + NULL_TREE); + TREE_THIS_NOTRAP (var4) = 1; + } } else var2 = val; @@ -8643,12 +8754,28 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) else { var2 = build_outer_var_ref (var, octx); - if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { var3 = maybe_lookup_decl (new_vard, octx); - if (var3 == new_vard) + if (var3 == new_vard || var3 == NULL_TREE) var3 = NULL_TREE; + else if (!octx->scan_inclusive && !input_phase) + { + var4 = maybe_lookup_decl (var3, octx); + if (var4 == var3 || var4 == NULL_TREE) + { + if (TREE_ADDRESSABLE (TREE_TYPE (new_var))) + { + var4 = var3; + var3 = NULL_TREE; + } + else + var4 = NULL_TREE; + } + } } + if (!octx->scan_inclusive && !input_phase && var4 == NULL_TREE) + var4 = create_tmp_var (TREE_TYPE (val)); } if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) { @@ -8689,9 +8816,17 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else { + tree x; + if (!octx->scan_inclusive) + { + tree v4 = unshare_expr (var4); + tree v2 = unshare_expr (var2); + x = lang_hooks.decls.omp_clause_assign_op (c, v4, v2); + gimplify_and_add (x, &before); + } gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c); - tree x = (DECL_HAS_VALUE_EXPR_P (new_vard) - ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); + x = (DECL_HAS_VALUE_EXPR_P (new_vard) + ? DECL_VALUE_EXPR (new_vard) : NULL_TREE); tree vexpr = val; if (x && omp_is_reference (var)) vexpr = build_fold_addr_expr_loc (clause_loc, val); @@ -8706,8 +8841,18 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) SET_DECL_VALUE_EXPR (new_vard, x); SET_DECL_VALUE_EXPR (placeholder, NULL_TREE); DECL_HAS_VALUE_EXPR_P (placeholder) = 0; - x = lang_hooks.decls.omp_clause_assign_op (c, val, var2); - gimplify_and_add (x, &before); + if (octx->scan_inclusive) + { + x = lang_hooks.decls.omp_clause_assign_op (c, val, + var2); + gimplify_and_add (x, &before); + } + else if (lane0 == NULL_TREE) + { + x = lang_hooks.decls.omp_clause_assign_op (c, val, + var4); + gimplify_and_add (x, &before); + } } } else @@ -8728,10 +8873,29 @@ lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree x = build2 (code, TREE_TYPE (var2), unshare_expr (var2), unshare_expr (val)); - gimplify_assign (unshare_expr (var2), x, &before); - gimplify_assign (val, var2, &before); + if (octx->scan_inclusive) + { + gimplify_assign (unshare_expr (var2), x, &before); + gimplify_assign (val, var2, &before); + } + else + { + gimplify_assign (unshare_expr (var4), + unshare_expr (var2), &before); + gimplify_assign (var2, x, &before); + if (lane0 == NULL_TREE) + gimplify_assign (val, var4, &before); + } } } + if (!octx->scan_inclusive && !input_phase && lane0) + { + tree vexpr = unshare_expr (var4); + TREE_OPERAND (vexpr, 1) = lane0; + if (omp_is_reference (var)) + vexpr = build_fold_addr_expr_loc (clause_loc, vexpr); + SET_DECL_VALUE_EXPR (new_vard, vexpr); + } } } else if (has_clauses) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ab8cc800dd5..c5bb4508539 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,27 @@ 2019-06-21 Jakub Jelinek + * gcc.dg/vect/vect-simd-12.c: New test. + * gcc.dg/vect/vect-simd-13.c: New test. + * gcc.dg/vect/vect-simd-14.c: New test. + * gcc.dg/vect/vect-simd-15.c: New test. + * gcc.target/i386/sse2-vect-simd-12.c: New test. + * gcc.target/i386/sse2-vect-simd-13.c: New test. + * gcc.target/i386/sse2-vect-simd-14.c: New test. + * gcc.target/i386/sse2-vect-simd-15.c: New test. + * gcc.target/i386/avx2-vect-simd-12.c: New test. + * gcc.target/i386/avx2-vect-simd-13.c: New test. + * gcc.target/i386/avx2-vect-simd-14.c: New test. + * gcc.target/i386/avx2-vect-simd-15.c: New test. + * gcc.target/i386/avx512f-vect-simd-12.c: New test. + * gcc.target/i386/avx512f-vect-simd-13.c: New test. + * gcc.target/i386/avx512f-vect-simd-14.c: New test. + * gcc.target/i386/avx512bw-vect-simd-15.c: New test. + * g++.dg/vect/simd-6.cc: New test. + * g++.dg/vect/simd-7.cc: New test. + * g++.dg/vect/simd-8.cc: New test. + * g++.dg/vect/simd-9.cc: New test. + * c-c++-common/gomp/scan-2.c: Don't expect any diagnostics. + PR c++/90950 * g++.dg/gomp/lastprivate-1.C: New test. diff --git a/gcc/testsuite/c-c++-common/gomp/scan-2.c b/gcc/testsuite/c-c++-common/gomp/scan-2.c index 14b74a5b87f..4f322ab65de 100644 --- a/gcc/testsuite/c-c++-common/gomp/scan-2.c +++ b/gcc/testsuite/c-c++-common/gomp/scan-2.c @@ -8,7 +8,7 @@ f1 (int *c, int *d) for (i = 0; i < 64; i++) { d[i] = a; - #pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ + #pragma omp scan exclusive (a) a += c[i]; } } diff --git a/gcc/testsuite/g++.dg/vect/simd-6.cc b/gcc/testsuite/g++.dg/vect/simd-6.cc new file mode 100644 index 00000000000..997f7b1cf92 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/simd-6.cc @@ -0,0 +1,161 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +template +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + T s; +}; + +template +S::S () : s (0) +{ +} + +template +S::~S () +{ +} + +template +S::S (const S &x) +{ + s = x.s; +} + +template +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +template +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +template +__attribute__((noipa)) void +foo (S *a, S *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +template +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp simd reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S (s); +} + +__attribute__((noipa)) void +baz (S *a, S *b) +{ + #pragma omp simd reduction (inscan, +:r) simdlen(1) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp simd if (0) reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return S (s); +} + +int +main () +{ + S s; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/g++.dg/vect/simd-7.cc b/gcc/testsuite/g++.dg/vect/simd-7.cc new file mode 100644 index 00000000000..1467849e0c6 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/simd-7.cc @@ -0,0 +1,124 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "../../gcc.dg/vect/tree-vect.h" + +int r, a[1024], b[1024], q; + +template +__attribute__((noipa)) void +foo (T a, T b, U r) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template +__attribute__((noipa)) T +bar (void) +{ + T &s = q; + q = 0; + #pragma omp simd reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +template +__attribute__((noipa)) void +baz (T *a, T *b, T &r) +{ + #pragma omp simd reduction (inscan, +:r) if (simd: 0) + for (T i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +template +__attribute__((noipa)) int +qux (void) +{ + T s = q; + q = 0; + #pragma omp simd reduction (inscan, +:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/g++.dg/vect/simd-8.cc b/gcc/testsuite/g++.dg/vect/simd-8.cc new file mode 100644 index 00000000000..8e297e246bd --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/simd-8.cc @@ -0,0 +1,122 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +int r, a[1024], b[1024], q; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b, int &r) +{ + #pragma omp simd reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int &s = q; + q = 0; + #pragma omp simd reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, int &r) +{ + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int &s = q; + q = 0; + #pragma omp simd reduction (inscan, foo:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b, r); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/g++.dg/vect/simd-9.cc b/gcc/testsuite/g++.dg/vect/simd-9.cc new file mode 100644 index 00000000000..bfef445d121 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/simd-9.cc @@ -0,0 +1,153 @@ +// { dg-require-effective-target size32plus } +// { dg-additional-options "-fopenmp-simd" } +// { dg-additional-options "-mavx" { target avx_runtime } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } + +#include "../../gcc.dg/vect/tree-vect.h" + +struct S { + inline S (); + inline ~S (); + inline S (const S &); + inline S & operator= (const S &); + int s; +}; + +S::S () : s (0) +{ +} + +S::~S () +{ +} + +S::S (const S &x) +{ + s = x.s; +} + +S & +S::operator= (const S &x) +{ + s = x.s; + return *this; +} + +static inline void +ini (S &x) +{ + x.s = 0; +} + +S r, a[1024], b[1024]; + +#pragma omp declare reduction (+: S: omp_out.s += omp_in.s) +#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv)) + +__attribute__((noipa)) void +foo (S *a, S *b, S &r) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +bar (void) +{ + S s; + #pragma omp simd reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +__attribute__((noipa)) void +baz (S *a, S *b, S &r) +{ + #pragma omp simd reduction (inscan, +:r) simdlen(1) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r.s += a[i].s; + } +} + +__attribute__((noipa)) S +qux (void) +{ + S s; + #pragma omp simd if (0) reduction (inscan, plus:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s.s += 2 * a[i].s; + } + return s; +} + +int +main () +{ + S s; + check_vect (); + for (int i = 0; i < 1024; ++i) + { + a[i].s = i; + b[i].s = -1; + asm ("" : "+g" (i)); + } + foo (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (bar ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + r.s = 0; + baz (a, b, r); + if (r.s != 1024 * 1023 / 2) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + else + b[i].s = 25; + s.s += i; + } + if (qux ().s != 1024 * 1023) + abort (); + s.s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i].s != s.s) + abort (); + s.s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-12.c b/gcc/testsuite/gcc.dg/vect/vect-simd-12.c new file mode 100644 index 00000000000..cdfec81a6e6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-12.c @@ -0,0 +1,122 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, +:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, +:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-13.c b/gcc/testsuite/gcc.dg/vect/vect-simd-13.c new file mode 100644 index 00000000000..aee5244d85e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-13.c @@ -0,0 +1,124 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; + +#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0) + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp simd reduction (inscan, foo:r) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r += a[i]; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp simd reduction (inscan, foo:s) simdlen (1) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s += 2 * a[i]; + } + return s; +} + +int +main () +{ + int s = 0; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r != 1024 * 1023 / 2) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-14.c b/gcc/testsuite/gcc.dg/vect/vect-simd-14.c new file mode 100644 index 00000000000..43663bbc598 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-14.c @@ -0,0 +1,94 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +float r = 1.0f, a[1024], b[1024]; + +__attribute__((noipa)) void +foo (float *a, float *b) +{ + #pragma omp simd reduction (inscan, *:r) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + r *= a[i]; + } +} + +__attribute__((noipa)) float +bar (void) +{ + float s = -__builtin_inff (); + #pragma omp simd reduction (inscan, max:s) + for (int i = 0; i < 1024; i++) + { + b[i] = s; + #pragma omp scan exclusive(s) + s = s > a[i] ? s : a[i]; + } + return s; +} + +int +main () +{ + float s = 1.0f; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + if (i < 80) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 200) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else if (i < 280) + a[i] = (i & 1) ? 0.25f : 0.5f; + else if (i < 380) + a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f; + else + switch (i % 6) + { + case 0: a[i] = 0.25f; break; + case 1: a[i] = 2.0f; break; + case 2: a[i] = -1.0f; break; + case 3: a[i] = -4.0f; break; + case 4: a[i] = 0.5f; break; + case 5: a[i] = 1.0f; break; + default: a[i] = 0.0f; break; + } + b[i] = -19.0f; + asm ("" : "+g" (i)); + } + foo (a, b); + if (r * 16384.0f != 0.125f) + abort (); + float m = -175.25f; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -231.75f; + s *= a[i]; + a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f); + m += 0.75f; + } + if (bar () != 592.0f) + abort (); + s = -__builtin_inff (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + if (s < a[i]) + s = a[i]; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-15.c b/gcc/testsuite/gcc.dg/vect/vect-simd-15.c new file mode 100644 index 00000000000..91e34cd6428 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-15.c @@ -0,0 +1,186 @@ +/* { dg-require-effective-target size32plus } */ +/* { dg-additional-options "-fopenmp-simd" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ + +#ifndef main +#include "tree-vect.h" +#endif + +int r, a[1024], b[1024]; +unsigned short r2, b2[1024]; +unsigned char r3, b3[1024]; + +__attribute__((noipa)) void +foo (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp simd reduction (inscan, +:r, r2, r3) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { r += a[i]; r2 += a[i]; r3 += a[i]; } + } +} + +__attribute__((noipa)) int +bar (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp simd reduction (inscan, +:s, s2, s3) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { + s += 2 * a[i]; + s2 += 2 * a[i]; + s3 += 2 * a[i]; + } + } + *s2p = s2; + *s3p = s3; + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b, unsigned short *b2, unsigned char *b3) +{ + #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0) + for (int i = 0; i < 1024; i++) + { + { + b[i] = r; + b2[i] = r2; + b3[i] = r3; + } + #pragma omp scan exclusive(r, r2, r3) + { + r += a[i]; + r2 += a[i]; + r3 += a[i]; + } + } +} + +__attribute__((noipa)) int +qux (unsigned short *s2p, unsigned char *s3p) +{ + int s = 0; + unsigned short s2 = 0; + unsigned char s3 = 0; + #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; b2[i] = s2; b3[i] = s3; } + #pragma omp scan exclusive(s, s2, s3) + { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } + } + *s2p = s2; + *s3p = s3; + return s; +} + +int +main () +{ + int s = 0; + unsigned short s2; + unsigned char s3; +#ifndef main + check_vect (); +#endif + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + asm ("" : "+g" (i)); + } + foo (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + if (bar (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = -1; + b2[i] = -1; + b3[i] = -1; + } + s += 2 * i; + } + r = 0; + r2 = 0; + r3 = 0; + baz (a, b, b2, b3); + if (r != 1024 * 1023 / 2 + || r2 != (unsigned short) r + || r3 != (unsigned char) r) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + else + { + b[i] = 25; + b2[i] = 24; + b3[i] = 26; + } + s += i; + } + s2 = 0; + s3 = 0; + if (qux (&s2, &s3) != 1024 * 1023) + abort (); + if (s2 != (unsigned short) (1024 * 1023) + || s3 != (unsigned char) (1024 * 1023)) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s + || b2[i] != (unsigned short) s + || b3[i] != (unsigned char) s) + abort (); + s += 2 * i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c new file mode 100644 index 00000000000..b170faaf080 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-12.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c new file mode 100644 index 00000000000..e9fb68db459 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-13.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c new file mode 100644 index 00000000000..3293d14d727 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-14.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c new file mode 100644 index 00000000000..0cb5e8663cb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-vect-simd-15.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +avx2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c b/gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c new file mode 100644 index 00000000000..8c14e2091e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vect-simd-15.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512bw-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +avx512bw_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c new file mode 100644 index 00000000000..87c46d7018d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-12.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +avx512f_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c new file mode 100644 index 00000000000..a8a28dc9d18 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-13.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +avx512f_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c new file mode 100644 index 00000000000..eba9228091f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-simd-14.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "avx512f-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +avx512f_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c new file mode 100644 index 00000000000..e6f15910c55 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-12.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-12.c" + +static void +sse2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c new file mode 100644 index 00000000000..5ad65b1c460 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-13.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-13.c" + +static void +sse2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c new file mode 100644 index 00000000000..8274d42ffaa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-14.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-14.c" + +static void +sse2_test (void) +{ + do_main (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c new file mode 100644 index 00000000000..8098d20b202 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-vect-simd-15.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */ + +#include "sse2-check.h" + +#define main() do_main () + +#include "../../gcc.dg/vect/vect-simd-15.c" + +static void +sse2_test (void) +{ + do_main (); +} diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 35a48d101af..1a72cffc8d4 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) /* See if this was detected as SIMD lane access. */ if (dr->aux == (void *)-1 || dr->aux == (void *)-2 - || dr->aux == (void *)-3) + || dr->aux == (void *)-3 + || dr->aux == (void *)-4) { if (nested_in_vect_loop_p (loop, stmt_info)) return opt_result::failure_at (stmt_info->stmt, diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index e87c32df869..800c000fdc8 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6512,7 +6512,37 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, kinds are there in order to allow optimizing the initializer store and combiner sequence, e.g. if it is originally some C++ish user defined reduction, but allow the vectorizer to pattern recognize it - and turn into the appropriate vectorized scan. */ + and turn into the appropriate vectorized scan. + + For exclusive scan, this is slightly different: + #pragma omp simd reduction(inscan,+:r) + for (...) + { + use (r); + #pragma omp scan exclusive (r) + r += something (); + } + shall have body with: + // Initialization for input phase, store the reduction initializer: + _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0); + _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1); + D.2042[_21] = 0; + // Actual input phase: + ... + r.0_5 = D.2042[_20]; + _6 = _4 + r.0_5; + D.2042[_20] = _6; + // Initialization for scan phase: + _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3); + _26 = D.2043[_25]; + D.2044[_25] = _26; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + // Actual scan phase: + ... + r.1_8 = D.2044[_20]; + ... */ if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2) { @@ -6553,26 +6583,52 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, if (TREE_CODE (rhs) != SSA_NAME) goto fail; - use_operand_p use_p; - imm_use_iterator iter; gimple *other_store_stmt = NULL; - FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + bool inscan_var_store + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; + + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) { - gimple *use_stmt = USE_STMT (use_p); - if (use_stmt == stmt || is_gimple_debug (use_stmt)) - continue; - if (gimple_bb (use_stmt) != gimple_bb (stmt) - || !gimple_store_p (use_stmt) - || other_store_stmt) - goto fail; - other_store_stmt = use_stmt; + if (!inscan_var_store) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + if (gimple_bb (use_stmt) != gimple_bb (stmt) + || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS + || other_store_stmt + || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME) + goto fail; + other_store_stmt = use_stmt; + } + if (other_store_stmt == NULL) + goto fail; + rhs = gimple_assign_lhs (other_store_stmt); + if (!single_imm_use (rhs, &use_p, &other_store_stmt)) + goto fail; + } } - if (other_store_stmt == NULL) - goto fail; - stmt_vec_info other_store_stmt_info - = loop_vinfo->lookup_stmt (other_store_stmt); - if (other_store_stmt_info == NULL - || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3) + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + if (other_store_stmt) + goto fail; + other_store_stmt = use_stmt; + } + } + else goto fail; gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); @@ -6599,8 +6655,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, tree rhs1 = gimple_assign_rhs1 (def_stmt); tree rhs2 = gimple_assign_rhs2 (def_stmt); - if (TREE_CODE (rhs1) != SSA_NAME - || TREE_CODE (rhs2) != SSA_NAME) + if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME) goto fail; gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1); @@ -6615,22 +6670,83 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt); if (load1_stmt_info == NULL || load2_stmt_info == NULL - || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3 - || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3) + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)) + || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) goto fail; - if (scan_operand_equal_p (gimple_assign_lhs (stmt), + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store) + { + dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info); + if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0))) + goto fail; + tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0); + tree lrhs; + if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) + lrhs = rhs1; + else + lrhs = rhs2; + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == def_stmt || is_gimple_debug (use_stmt)) + continue; + if (other_store_stmt) + goto fail; + other_store_stmt = use_stmt; + } + } + + if (other_store_stmt == NULL) + goto fail; + if (gimple_bb (other_store_stmt) != gimple_bb (stmt) + || !gimple_store_p (other_store_stmt)) + goto fail; + + stmt_vec_info other_store_stmt_info + = loop_vinfo->lookup_stmt (other_store_stmt); + if (other_store_stmt_info == NULL + || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) + != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))) + goto fail; + + gimple *stmt1 = stmt; + gimple *stmt2 = other_store_stmt; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + std::swap (stmt1, stmt2); + if (scan_operand_equal_p (gimple_assign_lhs (stmt1), gimple_assign_rhs1 (load2_stmt))) { std::swap (rhs1, rhs2); std::swap (load1_stmt, load2_stmt); std::swap (load1_stmt_info, load2_stmt_info); } - if (!scan_operand_equal_p (gimple_assign_lhs (stmt), - gimple_assign_rhs1 (load1_stmt)) - || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt), + if (!scan_operand_equal_p (gimple_assign_lhs (stmt1), + gimple_assign_rhs1 (load1_stmt))) + goto fail; + + tree var3 = NULL_TREE; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3 + && !scan_operand_equal_p (gimple_assign_lhs (stmt2), gimple_assign_rhs1 (load2_stmt))) goto fail; + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info); + if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR + || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0))) + goto fail; + var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0); + if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3)) + || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3)) + || lookup_attribute ("omp simd inscan exclusive", + DECL_ATTRIBUTES (var3))) + goto fail; + } dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info); if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR @@ -6648,6 +6764,14 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1))) std::swap (var1, var2); + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + if (!lookup_attribute ("omp simd inscan exclusive", + DECL_ATTRIBUTES (var1))) + goto fail; + var1 = var3; + } + if (loop_vinfo->scan_map == NULL) goto fail; tree *init = loop_vinfo->scan_map->get (var1); @@ -6655,6 +6779,7 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, goto fail; /* The IL is as expected, now check if we can actually vectorize it. + Inclusive scan: _26 = D.2043[_25]; _27 = D.2042[_25]; _28 = _26 + _27; @@ -6664,21 +6789,49 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, from the D.2042[_21] = 0; store): _30 = MEM [(int *)&D.2043]; _31 = MEM [(int *)&D.2042]; - _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>; + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; _33 = _31 + _32; // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] }; - _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>; + _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>; _35 = _33 + _34; // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _31[1]+.._31[4], ... _31[4]+.._31[7] }; - _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>; + _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>; _37 = _35 + _36; // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], // _31[0]+.._31[4], ... _31[0]+.._31[7] }; _38 = _30 + _37; _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>; MEM [(int *)&D.2043] = _39; - MEM [(int *)&D.2042] = _38; */ + MEM [(int *)&D.2042] = _38; + Exclusive scan: + _26 = D.2043[_25]; + D.2044[_25] = _26; + _27 = D.2042[_25]; + _28 = _26 + _27; + D.2043[_25] = _28; + should be vectorized as (where _40 is the vectorized rhs + from the D.2042[_21] = 0; store): + _30 = MEM [(int *)&D.2043]; + _31 = MEM [(int *)&D.2042]; + _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>; + _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>; + _34 = _32 + _33; + // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3], + // _31[3]+_31[4], ... _31[5]+.._31[6] }; + _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>; + _36 = _34 + _35; + // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[1]+.._31[4], ... _31[3]+.._31[6] }; + _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>; + _38 = _36 + _37; + // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3], + // _31[0]+.._31[4], ... _31[0]+.._31[6] }; + _39 = _30 + _38; + _50 = _31 + _39; + _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>; + MEM [(int *)&D.2044] = _39; + MEM [(int *)&D.2042] = _51; */ enum machine_mode vec_mode = TYPE_MODE (vectype); optab optab = optab_for_tree_code (code, vectype, optab_default); if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) @@ -6715,6 +6868,24 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, tree rhs = gimple_assign_rhs1 (stmt); gcc_assert (TREE_CODE (rhs) == SSA_NAME); + tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); + bool inscan_var_store + = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; + + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + { + use_operand_p use_p; + imm_use_iterator iter; + FOR_EACH_IMM_USE_FAST (use_p, iter, rhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (use_stmt == stmt || is_gimple_debug (use_stmt)) + continue; + rhs = gimple_assign_lhs (use_stmt); + break; + } + } + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); enum tree_code code = gimple_assign_rhs_code (def_stmt); if (code == POINTER_PLUS_EXPR) @@ -6737,15 +6908,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { std::swap (rhs1, rhs2); std::swap (var1, var2); + std::swap (load1_dr_info, load2_dr_info); } tree *init = loop_vinfo->scan_map->get (var1); gcc_assert (init); - tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0); - bool inscan_var_store - = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL; - unsigned HOST_WIDE_INT nunits; if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) gcc_unreachable (); @@ -6789,29 +6957,50 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, tree vec_oprnd1 = NULL_TREE; tree vec_oprnd2 = NULL_TREE; tree vec_oprnd3 = NULL_TREE; - tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr)); + tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr); tree dataref_offset = build_int_cst (ref_type, 0); tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS); + tree ldataref_ptr = NULL_TREE; tree orig = NULL_TREE; + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store) + ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr); for (int j = 0; j < ncopies; j++) { stmt_vec_info new_stmt_info; if (j == 0) { vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info); - vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); + if (ldataref_ptr == NULL) + vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info); vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info); orig = vec_oprnd3; } else { vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1); - vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); + if (ldataref_ptr == NULL) + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2); vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3); if (!inscan_var_store) dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); } + if (ldataref_ptr) + { + vec_oprnd2 = make_ssa_name (vectype); + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (ldataref_ptr), + dataref_offset); + vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr)); + gimple *g = gimple_build_assign (vec_oprnd2, data_ref); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + if (prev_stmt_info == NULL) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + tree v = vec_oprnd2; for (int i = 0; i < units_log2; ++i) { @@ -6848,6 +7037,17 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, new_temp = new_temp2; } + /* For exclusive scan, perform the perms[i] permutation once + more. */ + if (i == 0 + && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 + && v == vec_oprnd2) + { + v = new_temp; + --i; + continue; + } + tree new_temp2 = make_ssa_name (vectype); g = gimple_build_assign (new_temp2, code, v, new_temp); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); @@ -6863,16 +7063,30 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; prev_stmt_info = new_stmt_info; + tree last_perm_arg = new_temp; + /* For exclusive scan, new_temp computed above is the exclusive scan + prefix sum. Turn it into inclusive prefix sum for the broadcast + of the last element into orig. */ + if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4) + { + last_perm_arg = make_ssa_name (vectype); + g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2); + new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; + prev_stmt_info = new_stmt_info; + } + orig = make_ssa_name (vectype); - g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp, - perms[units_log2]); + g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg, + last_perm_arg, perms[units_log2]); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; prev_stmt_info = new_stmt_info; if (!inscan_var_store) { - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (dataref_ptr), dataref_offset); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); g = gimple_build_assign (data_ref, new_temp); @@ -6888,7 +7102,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (j != 0) dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump); - tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, + tree data_ref = fold_build2 (MEM_REF, vectype, + unshare_expr (dataref_ptr), dataref_offset); vect_copy_ref_info (data_ref, DR_REF (dr_info->dr)); gimple *g = gimple_build_assign (data_ref, orig); @@ -7325,7 +7540,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, } return true; } - else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3) + else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies); if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index df828ff0a40..5fc8f9fc640 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -917,7 +917,7 @@ struct _stmt_vec_info { bool strided_p; /* For both loads and stores. */ - unsigned simd_lane_access_p : 2; + unsigned simd_lane_access_p : 3; /* Classifies how the load or store is going to be implemented for loop vectorization. */ -- 2.30.2