omp-low.c (struct omp_context): Add scan_inclusive field.
authorJakub Jelinek <jakub@redhat.com>
Mon, 17 Jun 2019 21:20:00 +0000 (23:20 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Mon, 17 Jun 2019 21:20:00 +0000 (23:20 +0200)
* omp-low.c (struct omp_context): Add scan_inclusive field.
(scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
if inclusive scan.
(struct omplow_simd_context): Add lastlane member.
(lower_rec_simd_input_clauses): Add rvar argument, handle inscan
reductions.  Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
1 or 2 argument.
(lower_rec_input_clauses): Handle inscan reductions in simd contexts.
(lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
(lower_omp_scan): New function.
(lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
* tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
check 3rd argument if present rather than 2nd.
* tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
(struct _stmt_vec_info): Change simd_lane_access_p from bool into
2-bit bitfield.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
scan_map.  For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
than 2nd.
(_loop_vec_info::~_loop_vec_info): Delete scan_map.
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
init.
(vect_find_stmt_data_reference): Encode in ->aux the 2nd
IFN_GOMP_SIMD_LANE argument.
(vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
encoded ->aux value.
* tree-vect-stmts.c: Include attribs.h.
(vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
(scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
functions.
(vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
(vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
cp/
* semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
set need_copy_assignment.
testsuite/
* gcc.dg/vect/vect-simd-8.c: New test.
* gcc.dg/vect/vect-simd-9.c: New test.
* g++.dg/vect/simd-2.cc: New test.
* g++.dg/gomp/scan-1.C: New test.

From-SVN: r272399

14 files changed:
gcc/ChangeLog
gcc/cp/ChangeLog
gcc/cp/semantics.c
gcc/omp-low.c
gcc/testsuite/ChangeLog
gcc/testsuite/g++.dg/gomp/scan-1.C [new file with mode: 0644]
gcc/testsuite/g++.dg/vect/simd-2.cc [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-8.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-9.c [new file with mode: 0644]
gcc/tree-ssa-dce.c
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 6001d262fb25286e18de4a53afcc1d2915625fd9..c75b08e487b0151b45350d459c3774399262b067 100644 (file)
@@ -1,3 +1,39 @@
+2019-06-17  Jakub Jelinek  <jakub@redhat.com>
+
+       * omp-low.c (struct omp_context): Add scan_inclusive field.
+       (scan_omp_1_stmt) <case GIMPLE_OMP_SCAN>: Set ctx->scan_inclusive
+       if inclusive scan.
+       (struct omplow_simd_context): Add lastlane member.
+       (lower_rec_simd_input_clauses): Add rvar argument, handle inscan
+       reductions.  Build 2 or 3 argument .GOMP_SIMD_LANE calls rather than
+       1 or 2 argument.
+       (lower_rec_input_clauses): Handle inscan reductions in simd contexts.
+       (lower_lastprivate_clauses): Set TREE_THIS_NOTRAP on the ARRAY_REF.
+       (lower_omp_scan): New function.
+       (lower_omp_1) <case GIMPLE_OMP_SCAN>: Use lower_omp_scan.
+       * tree-ssa-dce.c (eliminate_unnecessary_stmts): For IFN_GOMP_SIMD_LANE
+       check 3rd argument if present rather than 2nd.
+       * tree-vectorizer.h (struct _loop_vec_info): Add scan_map member.
+       (struct _stmt_vec_info): Change simd_lane_access_p from bool into
+       2-bit bitfield.
+       * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
+       scan_map.  For IFN_GOMP_SIMD_LANE check 3rd argument if present rather
+       than 2nd.
+       (_loop_vec_info::~_loop_vec_info): Delete scan_map.
+       * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Allow two
+       different STMT_VINFO_SIMD_LANE_ACCESS_P refs if they have the same
+       init.
+       (vect_find_stmt_data_reference): Encode in ->aux the 2nd
+       IFN_GOMP_SIMD_LANE argument.
+       (vect_analyze_data_refs): Set STMT_VINFO_SIMD_LANE_ACCESS_P from the
+       encoded ->aux value.
+       * tree-vect-stmts.c: Include attribs.h.
+       (vectorizable_call): Adjust comment about IFN_GOMP_SIMD_LANE.
+       (scan_operand_equal_p, check_scan_store, vectorizable_scan_store): New
+       functions.
+       (vectorizable_load): For STMT_VINFO_SIMD_LANE_ACCESS_P tests use != 0.
+       (vectorizable_store): Handle STMT_VINFO_SIMD_LANE_ACCESS_P > 1.
+
 2019-06-17  Uroš Bizjak  <ubizjak@gmail.com>
 
        PR target/62055
index 7368dafea852f0eaa535c1d3275bdbf15e4ad471..bab08df1ce5a086d8fe07e45d83db13b599afe4a 100644 (file)
@@ -1,3 +1,8 @@
+2019-06-17  Jakub Jelinek  <jakub@redhat.com>
+
+       * semantics.c (finish_omp_clauses): For OMP_CLAUSE_REDUCTION_INSCAN
+       set need_copy_assignment.
+
 2019-06-17  Marek Polacek  <polacek@redhat.com>
 
        PR c++/83820 - excessive attribute arguments not detected.
index 8e354a2990cb2137b45d15426abdaa334f1d7d4d..6cf3af71c21b90e6e5ca217adf01d920abcd3766 100644 (file)
@@ -7688,6 +7688,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
        case OMP_CLAUSE_REDUCTION:
          if (reduction_seen == -2)
            OMP_CLAUSE_REDUCTION_INSCAN (c) = 0;
+         if (OMP_CLAUSE_REDUCTION_INSCAN (c))
+           need_copy_assignment = true;
          need_implicitly_determined = true;
          break;
        case OMP_CLAUSE_IN_REDUCTION:
index 962eadae02f1d408ee5e5fdd386b8290022dacbc..9df21a4d0466f27771f8222293ff5f665cd88312 100644 (file)
@@ -141,6 +141,9 @@ struct omp_context
   /* True if lower_omp_1 should look up lastprivate conditional in parent
      context.  */
   bool combined_into_simd_safelen0;
+
+  /* True if there is nested scan context with inclusive clause.  */
+  bool scan_inclusive;
 };
 
 static splay_tree all_contexts;
@@ -3329,11 +3332,15 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       scan_omp_single (as_a <gomp_single *> (stmt), ctx);
       break;
 
+    case GIMPLE_OMP_SCAN:
+      if (tree clauses = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
+       if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_INCLUSIVE)
+         ctx->scan_inclusive = true;
+      /* FALLTHRU */
     case GIMPLE_OMP_SECTION:
     case GIMPLE_OMP_MASTER:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
-    case GIMPLE_OMP_SCAN:
     case GIMPLE_OMP_GRID_BODY:
       ctx = new_omp_context (stmt, ctx);
       scan_omp (gimple_omp_body_ptr (stmt), ctx);
@@ -3671,6 +3678,7 @@ struct omplow_simd_context {
   omplow_simd_context () { memset (this, 0, sizeof (*this)); }
   tree idx;
   tree lane;
+  tree lastlane;
   vec<tree, va_heap> simt_eargs;
   gimple_seq simt_dlist;
   poly_uint64_pod max_vf;
@@ -3682,7 +3690,8 @@ struct omplow_simd_context {
 
 static bool
 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
-                             omplow_simd_context *sctx, tree &ivar, tree &lvar)
+                             omplow_simd_context *sctx, tree &ivar,
+                             tree &lvar, tree *rvar = NULL)
 {
   if (known_eq (sctx->max_vf, 0U))
     {
@@ -3738,7 +3747,27 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
        = tree_cons (get_identifier ("omp simd array"), NULL,
                     DECL_ATTRIBUTES (avar));
       gimple_add_tmp_var (avar);
-      ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+      tree iavar = avar;
+      if (rvar)
+       {
+         /* For inscan reductions, create another array temporary,
+            which will hold the reduced value.  */
+         iavar = create_tmp_var_raw (atype);
+         if (TREE_ADDRESSABLE (new_var))
+           TREE_ADDRESSABLE (iavar) = 1;
+         DECL_ATTRIBUTES (iavar)
+           = tree_cons (get_identifier ("omp simd array"), NULL,
+                        tree_cons (get_identifier ("omp simd inscan"), NULL,
+                                   DECL_ATTRIBUTES (iavar)));
+         gimple_add_tmp_var (iavar);
+         ctx->cb.decl_map->put (avar, iavar);
+         if (sctx->lastlane == NULL_TREE)
+           sctx->lastlane = create_tmp_var (unsigned_type_node);
+         *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
+                         sctx->lastlane, NULL_TREE, NULL_TREE);
+         TREE_THIS_NOTRAP (*rvar) = 1;
+       }
+      ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
                     NULL_TREE, NULL_TREE);
       lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
                     NULL_TREE, NULL_TREE);
@@ -3814,7 +3843,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
   omplow_simd_context sctx = omplow_simd_context ();
   tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
   tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
-  gimple_seq llist[3] = { };
+  gimple_seq llist[4] = { };
   tree nonconst_simd_if = NULL_TREE;
 
   copyin_seq = NULL;
@@ -5155,9 +5184,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                      new_vard = TREE_OPERAND (new_var, 0);
                      gcc_assert (DECL_P (new_vard));
                    }
+                 tree rvar = NULL_TREE, *rvarp = NULL;
+                 if (is_simd
+                     && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+                     && OMP_CLAUSE_REDUCTION_INSCAN (c))
+                   rvarp = &rvar;
                  if (is_simd
                      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
-                                                      ivar, lvar))
+                                                      ivar, lvar, rvarp))
                    {
                      if (new_vard == new_var)
                        {
@@ -5173,6 +5207,93 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                      x = lang_hooks.decls.omp_clause_default_ctor
                                (c, unshare_expr (ivar),
                                 build_outer_var_ref (var, ctx));
+                     if (rvarp)
+                       {
+                         if (x)
+                           {
+                             gimplify_and_add (x, &llist[0]);
+
+                             tree ivar2 = unshare_expr (lvar);
+                             TREE_OPERAND (ivar2, 1) = sctx.idx;
+                             x = lang_hooks.decls.omp_clause_default_ctor
+                                   (c, ivar2, build_outer_var_ref (var, ctx));
+                             gimplify_and_add (x, &llist[0]);
+
+                             /* For types that need construction, add another
+                                private var which will be default constructed
+                                and optionally initialized with
+                                OMP_CLAUSE_REDUCTION_GIMPLE_INIT, as in the
+                                loop we want to assign this value instead of
+                                constructing and destructing it in each
+                                iteration.  */
+                             tree nv = create_tmp_var_raw (TREE_TYPE (ivar));
+                             gimple_add_tmp_var (nv);
+                             ctx->cb.decl_map->put (TREE_OPERAND (ivar, 0),
+                                                    nv);
+                             x = lang_hooks.decls.omp_clause_default_ctor
+                                   (c, nv, build_outer_var_ref (var, ctx));
+                             gimplify_and_add (x, ilist);
+
+                             if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+                               {
+                                 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+                                 x = DECL_VALUE_EXPR (new_var);
+                                 SET_DECL_VALUE_EXPR (new_var, nv);
+                                 lower_omp (&tseq, ctx);
+                                 SET_DECL_VALUE_EXPR (new_var, x);
+                                 gimple_seq_add_seq (ilist, tseq);
+                                 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+                               }
+
+                             x = lang_hooks.decls.omp_clause_dtor (c, nv);
+                             if (x)
+                               {
+                                 tseq = NULL;
+                                 dtor = x;
+                                 gimplify_stmt (&dtor, &tseq);
+                                 gimple_seq_add_seq (dlist, tseq);
+                               }
+                           }
+
+                         tree ref = build_outer_var_ref (var, ctx);
+                         x = unshare_expr (ivar);
+                         x = lang_hooks.decls.omp_clause_assign_op (c, x,
+                                                                    ref);
+                         gimplify_and_add (x, &llist[0]);
+
+                         ref = build_outer_var_ref (var, ctx);
+                         x = lang_hooks.decls.omp_clause_assign_op (c, ref,
+                                                                    rvar);
+                         gimplify_and_add (x, &llist[3]);
+
+                         DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                         if (new_vard == new_var)
+                           SET_DECL_VALUE_EXPR (new_var, lvar);
+                         else
+                           SET_DECL_VALUE_EXPR (new_vard,
+                                                build_fold_addr_expr (lvar));
+
+                         x = lang_hooks.decls.omp_clause_dtor (c, ivar);
+                         if (x)
+                           {
+                             tseq = NULL;
+                             dtor = x;
+                             gimplify_stmt (&dtor, &tseq);
+                             gimple_seq_add_seq (&llist[1], tseq);
+                           }
+
+                         tree ivar2 = unshare_expr (lvar);
+                         TREE_OPERAND (ivar2, 1) = sctx.idx;
+                         x = lang_hooks.decls.omp_clause_dtor (c, ivar2);
+                         if (x)
+                           {
+                             tseq = NULL;
+                             dtor = x;
+                             gimplify_stmt (&dtor, &tseq);
+                             gimple_seq_add_seq (&llist[1], tseq);
+                           }
+                         break;
+                       }
                      if (x)
                        gimplify_and_add (x, &llist[0]);
                      if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
@@ -5240,6 +5361,41 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                                 : build_outer_var_ref (var, ctx));
                  if (x)
                    gimplify_and_add (x, ilist);
+
+                 if (rvarp)
+                   {
+                     if (x)
+                       {
+                         tree nv = create_tmp_var_raw (TREE_TYPE (new_vard));
+                         gimple_add_tmp_var (nv);
+                         ctx->cb.decl_map->put (new_var, nv);
+                         x = lang_hooks.decls.omp_clause_default_ctor
+                               (c, nv, build_outer_var_ref (var, ctx));
+                         gimplify_and_add (x, ilist);
+                         if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+                           {
+                             tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+                             SET_DECL_VALUE_EXPR (new_var, nv);
+                             DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+                             lower_omp (&tseq, ctx);
+                             SET_DECL_VALUE_EXPR (new_var, NULL_TREE);
+                             DECL_HAS_VALUE_EXPR_P (new_var) = 0;
+                             gimple_seq_add_seq (ilist, tseq);
+                           }
+                         OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+                         x = lang_hooks.decls.omp_clause_dtor (c, nv);
+                         if (x)
+                           {
+                             tseq = NULL;
+                             dtor = x;
+                             gimplify_stmt (&dtor, &tseq);
+                             gimple_seq_add_seq (dlist, tseq);
+                           }
+                       }
+                     DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                     goto do_dtor;
+                   }
+
                  if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
                    {
                      tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
@@ -5324,12 +5480,32 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                      new_vard = TREE_OPERAND (new_var, 0);
                      gcc_assert (DECL_P (new_vard));
                    }
+                 tree rvar = NULL_TREE, *rvarp = NULL;
+                 if (is_simd
+                     && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+                     && OMP_CLAUSE_REDUCTION_INSCAN (c))
+                   rvarp = &rvar;
                  if (is_simd
                      && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
-                                                      ivar, lvar))
+                                                      ivar, lvar, rvarp))
                    {
+                     if (new_vard != new_var)
+                       {
+                         SET_DECL_VALUE_EXPR (new_vard,
+                                              build_fold_addr_expr (lvar));
+                         DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+                       }
+
                      tree ref = build_outer_var_ref (var, ctx);
 
+                     if (rvarp)
+                       {
+                         gimplify_assign (ivar, ref, &llist[0]);
+                         ref = build_outer_var_ref (var, ctx);
+                         gimplify_assign (ref, rvar, &llist[3]);
+                         break;
+                       }
+
                      gimplify_assign (unshare_expr (ivar), x, &llist[0]);
 
                      if (sctx.is_simt)
@@ -5346,14 +5522,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                      ref = build_outer_var_ref (var, ctx);
                      gimplify_assign (ref, x, &llist[1]);
 
-                     if (new_vard != new_var)
-                       {
-                         SET_DECL_VALUE_EXPR (new_vard,
-                                              build_fold_addr_expr (lvar));
-                         DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
-                       }
                    }
-                 else
+                 else if (rvarp == NULL)
                    {
                      if (omp_is_reference (var) && is_simd)
                        handle_simd_reference (clause_loc, new_vard, ilist);
@@ -5456,14 +5626,23 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
   if (sctx.lane)
     {
       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMD_LANE,
-                                             1 + (nonconst_simd_if != NULL),
-                                             uid, nonconst_simd_if);
+                                             2 + (nonconst_simd_if != NULL),
+                                             uid, integer_zero_node,
+                                             nonconst_simd_if);
       gimple_call_set_lhs (g, sctx.lane);
       gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
       gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
       g = gimple_build_assign (sctx.lane, INTEGER_CST,
                               build_int_cst (unsigned_type_node, 0));
       gimple_seq_add_stmt (ilist, g);
+      if (sctx.lastlane)
+       {
+         g = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
+                                         2, uid, sctx.lane);
+         gimple_call_set_lhs (g, sctx.lastlane);
+         gimple_seq_add_stmt (dlist, g);
+         gimple_seq_add_seq (dlist, llist[3]);
+       }
       /* Emit reductions across SIMT lanes in log_2(simt_vf) steps.  */
       if (llist[2])
        {
@@ -5865,6 +6044,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
                  new_var = build4 (ARRAY_REF, TREE_TYPE (val),
                                    TREE_OPERAND (val, 0), lastlane,
                                    NULL_TREE, NULL_TREE);
+                 TREE_THIS_NOTRAP (new_var) = 1;
                }
            }
          else if (maybe_simt)
@@ -8371,6 +8551,167 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 
+/* Expand code for an OpenMP scan directive and the structured block
+   before the scan directive.  */
+
+static void
+lower_omp_scan (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+  bool has_clauses
+    = gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)) != NULL;
+  tree lane = NULL_TREE;
+  gimple_seq before = NULL;
+  omp_context *octx = ctx->outer;
+  gcc_assert (octx);
+  bool input_phase = has_clauses ^ octx->scan_inclusive;
+  if (gimple_code (octx->stmt) == GIMPLE_OMP_FOR
+      && (gimple_omp_for_kind (octx->stmt) & GF_OMP_FOR_SIMD)
+      && !gimple_omp_for_combined_into_p (octx->stmt)
+      && octx->scan_inclusive)
+    {
+      if (tree c = omp_find_clause (gimple_omp_for_clauses (octx->stmt),
+                                   OMP_CLAUSE__SIMDUID_))
+       {
+         tree uid = OMP_CLAUSE__SIMDUID__DECL (c);
+         lane = create_tmp_var (unsigned_type_node);
+         tree t = build_int_cst (integer_type_node, 1 + !input_phase);
+         gimple *g
+           = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 2, uid, t);
+         gimple_call_set_lhs (g, lane);
+         gimple_seq_add_stmt (&before, g);
+       }
+      for (tree c = gimple_omp_for_clauses (octx->stmt);
+          c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+           && OMP_CLAUSE_REDUCTION_INSCAN (c))
+         {
+           tree var = OMP_CLAUSE_DECL (c);
+           tree new_var = lookup_decl (var, octx);
+           tree val = new_var;
+           tree var2 = NULL_TREE;
+           tree var3 = NULL_TREE;
+           if (DECL_HAS_VALUE_EXPR_P (new_var))
+             {
+               val = DECL_VALUE_EXPR (new_var);
+               if (TREE_CODE (val) == ARRAY_REF
+                   && VAR_P (TREE_OPERAND (val, 0)))
+                 {
+                   tree v = TREE_OPERAND (val, 0);
+                   if (lookup_attribute ("omp simd array",
+                                         DECL_ATTRIBUTES (v)))
+                     {
+                       val = unshare_expr (val);
+                       TREE_OPERAND (val, 1) = lane;
+                       var2 = lookup_decl (v, octx);
+                       if (input_phase
+                           && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+                         var3 = maybe_lookup_decl (var2, octx);
+                       if (!input_phase)
+                         {
+                           var2 = build4 (ARRAY_REF, TREE_TYPE (val),
+                                          var2, lane, NULL_TREE, NULL_TREE);
+                           TREE_THIS_NOTRAP (var2) = 1;
+                         }
+                       else
+                         var2 = val;
+                     }
+                 }
+             }
+           else
+             {
+               var2 = build_outer_var_ref (var, octx);
+               if (input_phase && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+                 {
+                   var3 = maybe_lookup_decl (new_var, octx);
+                   if (var3 == new_var)
+                     var3 = NULL_TREE;
+                 }
+             }
+           if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+             {
+               tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
+               if (input_phase)
+                 {
+                   if (var3)
+                     {
+                       /* If we've added a separate identity element
+                          variable, copy it over into val.  */
+                       tree x = lang_hooks.decls.omp_clause_assign_op (c, val,
+                                                                       var3);
+                       gimplify_and_add (x, &before);
+                     }
+                   else if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+                     {
+                       /* Otherwise, assign to it the identity element.  */
+                       gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+                       tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
+                                 ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
+                       tree ref = build_outer_var_ref (var, octx);
+                       SET_DECL_VALUE_EXPR (new_var, val);
+                       SET_DECL_VALUE_EXPR (placeholder, ref);
+                       DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+                       lower_omp (&tseq, octx);
+                       SET_DECL_VALUE_EXPR (new_var, x);
+                       SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+                       DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                       if (x == NULL_TREE)
+                         DECL_HAS_VALUE_EXPR_P (new_var) = 0;
+                       gimple_seq_add_seq (&before, tseq);
+                       OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+                     }
+                 }
+               else
+                 {
+                   gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+                   tree x = (DECL_HAS_VALUE_EXPR_P (new_var)
+                             ? DECL_VALUE_EXPR (new_var) : NULL_TREE);
+                   SET_DECL_VALUE_EXPR (new_var, val);
+                   SET_DECL_VALUE_EXPR (placeholder, var2);
+                   DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
+                   lower_omp (&tseq, octx);
+                   gimple_seq_add_seq (&before, tseq);
+                   OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
+                   SET_DECL_VALUE_EXPR (new_var, x);
+                   SET_DECL_VALUE_EXPR (placeholder, NULL_TREE);
+                   DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                   x = lang_hooks.decls.omp_clause_assign_op (c, val, var2);
+                   gimplify_and_add (x, &before);
+                 }
+             }
+           else
+             {
+               if (input_phase)
+                 {
+                   /* input phase.  Set val to initializer before
+                      the body.  */
+                   tree x = omp_reduction_init (c, TREE_TYPE (new_var));
+                   gimplify_assign (val, x, &before);
+                 }
+               else
+                 {
+                   /* scan phase.  */
+                   enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+                   if (code == MINUS_EXPR)
+                     code = PLUS_EXPR;
+
+                   tree x = build2 (code, TREE_TYPE (var2),
+                                    unshare_expr (var2), unshare_expr (val));
+                   gimplify_assign (unshare_expr (var2), x, &before);
+                   gimplify_assign (val, var2, &before);
+                 }
+             }
+         }
+    }
+  else if (has_clauses)
+    sorry_at (gimple_location (stmt),
+             "%<#pragma omp scan%> not supported yet");
+  gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
+  gsi_insert_seq_after (gsi_p, before, GSI_SAME_STMT);
+  gsi_replace (gsi_p, gimple_build_nop (), true);
+}
+
+
 /* Gimplify a GIMPLE_OMP_CRITICAL statement.  This is a relatively simple
    substitution of a couple of function calls.  But in the NAMED case,
    requires that languages coordinate a symbol name.  It is therefore
@@ -10843,11 +11184,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
     case GIMPLE_OMP_SCAN:
       ctx = maybe_lookup_ctx (stmt);
       gcc_assert (ctx);
-      gsi_insert_seq_after (gsi_p, gimple_omp_body (stmt), GSI_SAME_STMT);
-      if (gimple_omp_scan_clauses (as_a <gomp_scan *> (stmt)))
-       sorry_at (gimple_location (stmt),
-                 "%<#pragma omp scan%> not supported yet");
-      gsi_replace (gsi_p, gimple_build_nop (), true);
+      lower_omp_scan (gsi_p, ctx);
       break;
     case GIMPLE_OMP_CRITICAL:
       ctx = maybe_lookup_ctx (stmt);
index d2ea2f7a0937fbc26ddbef1c01b1c3d5114ed2cf..27a522e0140a478cb2d025270b2a4ce9c1940551 100644 (file)
@@ -1,3 +1,10 @@
+2019-06-17  Jakub Jelinek  <jakub@redhat.com>
+
+       * gcc.dg/vect/vect-simd-8.c: New test.
+       * gcc.dg/vect/vect-simd-9.c: New test.
+       * g++.dg/vect/simd-2.cc: New test.
+       * g++.dg/gomp/scan-1.C: New test.
+
 2019-06-17  Uroš Bizjak  <ubizjak@gmail.com>
 
        PR target/62055
diff --git a/gcc/testsuite/g++.dg/gomp/scan-1.C b/gcc/testsuite/g++.dg/gomp/scan-1.C
new file mode 100644 (file)
index 0000000..6fd3320
--- /dev/null
@@ -0,0 +1,26 @@
+// { dg-do compile { target c++11 } }
+
+struct S { S (); ~S (); S &operator = (const S &) = delete; int s; };  // { dg-message "declared here" }
+#pragma omp declare reduction (+ : S : omp_out.s += omp_in.s)
+
+S s;
+
+void
+foo (void)
+{
+  #pragma omp simd reduction (+: s)
+  for (int i = 0; i < 64; ++i)
+    s.s += i;
+}
+
+void
+bar (int *x)
+{
+  #pragma omp simd reduction (inscan, +: s)    // { dg-error "use of deleted function" }
+  for (int i = 0; i < 64; ++i)
+    {
+      s.s += i;
+      #pragma omp scan inclusive (s)           // { dg-error "" }
+      x[i] = s.s;
+    }
+}
diff --git a/gcc/testsuite/g++.dg/vect/simd-2.cc b/gcc/testsuite/g++.dg/vect/simd-2.cc
new file mode 100644 (file)
index 0000000..3160495
--- /dev/null
@@ -0,0 +1,153 @@
+// { dg-require-effective-target size32plus }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
+
+#include "../../gcc.dg/vect/tree-vect.h"
+
+struct S {
+  inline S ();
+  inline ~S ();
+  inline S (const S &);
+  inline S & operator= (const S &);
+  int s;
+};
+
+S::S () : s (0)
+{
+}
+
+S::~S ()
+{
+}
+
+S::S (const S &x)
+{
+  s = x.s;
+}
+
+S &
+S::operator= (const S &x)
+{
+  s = x.s;
+  return *this;
+}
+
+static inline void
+ini (S &x)
+{
+  x.s = 0;
+}
+
+S r, a[1024], b[1024];
+
+#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
+#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
+
+__attribute__((noipa)) void
+foo (S *a, S *b)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r.s += a[i].s;
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) S
+bar (void)
+{
+  S s;
+  #pragma omp simd reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s.s += 2 * a[i].s;
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return S (s);
+}
+
+__attribute__((noipa)) void
+baz (S *a, S *b)
+{
+  #pragma omp simd reduction (inscan, +:r) simdlen(1)
+  for (int i = 0; i < 1024; i++)
+    {
+      r.s += a[i].s;
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) S
+qux (void)
+{
+  S s;
+  #pragma omp simd if (0) reduction (inscan, plus:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s.s += 2 * a[i].s;
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return S (s);
+}
+
+int
+main ()
+{
+  S s;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i].s = i;
+      b[i].s = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += i;
+      if (b[i].s != s.s)
+       abort ();
+      else
+       b[i].s = 25;
+    }
+  if (bar ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += 2 * i;
+      if (b[i].s != s.s)
+       abort ();
+    }
+  r.s = 0;
+  baz (a, b);
+  if (r.s != 1024 * 1023 / 2)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += i;
+      if (b[i].s != s.s)
+       abort ();
+      else
+       b[i].s = 25;
+    }
+  if (qux ().s != 1024 * 1023)
+    abort ();
+  s.s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s.s += 2 * i;
+      if (b[i].s != s.s)
+       abort ();
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-8.c b/gcc/testsuite/gcc.dg/vect/vect-simd-8.c
new file mode 100644 (file)
index 0000000..123a201
--- /dev/null
@@ -0,0 +1,118 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+int r, a[1024], b[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, +:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, +:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, +:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, +:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = 25;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -1;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -25;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-9.c b/gcc/testsuite/gcc.dg/vect/vect-simd-9.c
new file mode 100644 (file)
index 0000000..ce5ae57
--- /dev/null
@@ -0,0 +1,120 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+int r, a[1024], b[1024];
+
+#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
+
+__attribute__((noipa)) void
+foo (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, foo:r)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, foo:s)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b)
+{
+  #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive(r)
+      b[i] = r;
+    }
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  int s = 0;
+  #pragma omp simd reduction (inscan, foo:s) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+    {
+      s += 2 * a[i];
+      #pragma omp scan inclusive(s)
+      b[i] = s;
+    }
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  check_vect ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      a[i] = i;
+      b[i] = -1;
+      asm ("" : "+g" (i));
+    }
+  foo (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = 25;
+    }
+  if (bar () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -1;
+    }
+  r = 0;
+  baz (a, b);
+  if (r != 1024 * 1023 / 2)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += i;
+      if (b[i] != s)
+       abort ();
+      else
+       b[i] = -25;
+    }
+  if (qux () != 1024 * 1023)
+    abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+    {
+      s += 2 * i;
+      if (b[i] != s)
+       abort ();
+    }
+  return 0;
+}
index a38899edd6cfd62b18faee8f96183b1283c3b0f2..c73fbabfe29107703f8cb1fcd8b49f242db9c474 100644 (file)
@@ -1339,14 +1339,14 @@ eliminate_unnecessary_stmts (void)
                  update_stmt (stmt);
                  release_ssa_name (name);
 
-                 /* GOMP_SIMD_LANE (unless two argument) or ASAN_POISON
+                 /* GOMP_SIMD_LANE (unless three argument) or ASAN_POISON
                     without lhs is not needed.  */
                  if (gimple_call_internal_p (stmt))
                    switch (gimple_call_internal_fn (stmt))
                      {
                      case IFN_GOMP_SIMD_LANE:
-                       if (gimple_call_num_args (stmt) >= 2
-                           && !integer_nonzerop (gimple_call_arg (stmt, 1)))
+                       if (gimple_call_num_args (stmt) >= 3
+                           && !integer_nonzerop (gimple_call_arg (stmt, 2)))
                          break;
                        /* FALLTHRU */
                      case IFN_ASAN_POISON:
index 55d87f8f59fd6b292ff15c2466c1dcd292623240..f2f0d2317119dc1ba0ad51220e69dca14ffebb2d 100644 (file)
@@ -3003,6 +3003,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
              || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
            break;
 
+         /* Different .GOMP_SIMD_LANE calls still give the same lane,
+            just hold extra information.  */
+         if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
+             && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
+             && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
+           break;
+
          /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
          HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
          HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
@@ -4101,7 +4108,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
                          DR_STEP_ALIGNMENT (newdr)
                            = highest_pow2_factor (step);
                          /* Mark as simd-lane access.  */
-                         newdr->aux = (void *)-1;
+                         tree arg2 = gimple_call_arg (def, 1);
+                         newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
                          free_data_ref (dr);
                          datarefs->safe_push (newdr);
                          return opt_result::success ();
@@ -4210,14 +4218,17 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
         }
 
       /* See if this was detected as SIMD lane access.  */
-      if (dr->aux == (void *)-1)
+      if (dr->aux == (void *)-1
+         || dr->aux == (void *)-2
+         || dr->aux == (void *)-3)
        {
          if (nested_in_vect_loop_p (loop, stmt_info))
            return opt_result::failure_at (stmt_info->stmt,
                                           "not vectorized:"
                                           " data ref analysis failed: %G",
                                           stmt_info->stmt);
-         STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
+         STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
+           = -(uintptr_t) dr->aux;
        }
 
       tree base = get_base_address (DR_REF (dr));
index 671ef2f7658e9c20b990306c943745c8a98ee218..92a7c29d077c078ea0b6f61f7cd1108f897598d8 100644 (file)
@@ -824,6 +824,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
     peeling_for_alignment (0),
     ptr_mask (0),
     ivexpr_map (NULL),
+    scan_map (NULL),
     slp_unrolling_factor (1),
     single_scalar_iteration_cost (0),
     vectorizable (false),
@@ -863,8 +864,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
          gimple *stmt = gsi_stmt (si);
          gimple_set_uid (stmt, 0);
          add_stmt (stmt);
-         /* If .GOMP_SIMD_LANE call for the current loop has 2 arguments, the
-            second argument is the #pragma omp simd if (x) condition, when 0,
+         /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
+            third argument is the #pragma omp simd if (x) condition, when 0,
             loop shouldn't be vectorized, when non-zero constant, it should
             be vectorized normally, otherwise versioned with vectorized loop
             done if the condition is non-zero at runtime.  */
@@ -872,12 +873,12 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
              && is_gimple_call (stmt)
              && gimple_call_internal_p (stmt)
              && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
-             && gimple_call_num_args (stmt) >= 2
+             && gimple_call_num_args (stmt) >= 3
              && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
              && (loop_in->simduid
                  == SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
            {
-             tree arg = gimple_call_arg (stmt, 1);
+             tree arg = gimple_call_arg (stmt, 2);
              if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
                simd_if_cond = arg;
              else
@@ -959,6 +960,7 @@ _loop_vec_info::~_loop_vec_info ()
 
   release_vec_loop_masks (&masks);
   delete ivexpr_map;
+  delete scan_map;
 
   loop->aux = NULL;
 }
index 21046931243d4cc01fd8d98d9af3f734079c14d3..790b49bcb4be1ea324415d27a12e323466653775 100644 (file)
@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop-niter.h"
 #include "gimple-fold.h"
 #include "regs.h"
+#include "attribs.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -3257,7 +3258,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   if (nargs == 0 || nargs > 4)
     return false;
 
-  /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
+  /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
   combined_fn cfn = gimple_call_combined_fn (stmt);
   if (cfn == CFN_GOMP_SIMD_LANE)
     {
@@ -6320,6 +6321,489 @@ get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
 }
 
 
+/* Function scan_operand_equal_p.
+
+   Helper function for check_scan_store.  Compare two references
+   with .GOMP_SIMD_LANE bases.  */
+
+static bool
+scan_operand_equal_p (tree ref1, tree ref2)
+{
+  machine_mode mode1, mode2;
+  poly_int64 bitsize1, bitsize2, bitpos1, bitpos2;
+  tree offset1, offset2;
+  int unsignedp1, unsignedp2, reversep1, reversep2;
+  int volatilep1 = 0, volatilep2 = 0;
+  tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1,
+                                   &mode1, &unsignedp1, &reversep1,
+                                   &volatilep1);
+  tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2,
+                                   &mode2, &unsignedp2, &reversep2,
+                                   &volatilep2);
+  if (reversep1 || reversep2 || volatilep1 || volatilep2)
+    return false;
+  if (!operand_equal_p (base1, base2, 0))
+    return false;
+  if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0))
+    return false;
+  if (maybe_ne (bitsize1, bitsize2))
+    return false;
+  if (!operand_equal_p (offset1, offset2, 0))
+    return false;
+  return true;
+}
+
+
+/* Function check_scan_store.
+
+   Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
+
+static bool
+check_scan_store (stmt_vec_info stmt_info, tree vectype,
+                 enum vect_def_type rhs_dt, bool slp, tree mask,
+                 vect_memory_access_type memory_access_type)
+{
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
+  tree ref_type;
+
+  gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
+  if (slp
+      || mask
+      || memory_access_type != VMAT_CONTIGUOUS
+      || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
+      || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
+      || loop_vinfo == NULL
+      || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      || STMT_VINFO_GROUPED_ACCESS (stmt_info)
+      || !integer_zerop (DR_OFFSET (dr_info->dr))
+      || !integer_zerop (DR_INIT (dr_info->dr))
+      || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
+      || !alias_sets_conflict_p (get_alias_set (vectype),
+                                get_alias_set (TREE_TYPE (ref_type))))
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "unsupported OpenMP scan store.\n");
+      return false;
+    }
+
+  /* We need to pattern match code built by OpenMP lowering and simplified
+     by following optimizations into something we can handle.
+     #pragma omp simd reduction(inscan,+:r)
+     for (...)
+       {
+        r += something ();
+        #pragma omp scan inclusive (r)
+        use (r);
+       }
+     shall have body with:
+       // Initialization for input phase, store the reduction initializer:
+       _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
+       _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
+       D.2042[_21] = 0;
+       // Actual input phase:
+       ...
+       r.0_5 = D.2042[_20];
+       _6 = _4 + r.0_5;
+       D.2042[_20] = _6;
+       // Initialization for scan phase:
+       _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
+       _26 = D.2043[_25];
+       _27 = D.2042[_25];
+       _28 = _26 + _27;
+       D.2043[_25] = _28;
+       D.2042[_25] = _28;
+       // Actual scan phase:
+       ...
+       r.1_8 = D.2042[_20];
+       ...
+     The "omp simd array" variable D.2042 holds the privatized copy used
+     inside of the loop and D.2043 is another one that holds copies of
+     the current original list item.  The separate GOMP_SIMD_LANE ifn
+     kinds are there in order to allow optimizing the initializer store
+     and combiner sequence, e.g. if it is originally some C++ish user
+     defined reduction, but allow the vectorizer to pattern recognize it
+     and turn into the appropriate vectorized scan.  */
+
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
+    {
+      /* Match the D.2042[_21] = 0; store above.  Just require that
+        it is a constant or external definition store.  */
+      if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
+       {
+        fail_init:
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "unsupported OpenMP scan initializer store.\n");
+         return false;
+       }
+
+      if (! loop_vinfo->scan_map)
+       loop_vinfo->scan_map = new hash_map<tree, tree>;
+      tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+      tree &cached = loop_vinfo->scan_map->get_or_insert (var);
+      if (cached)
+       goto fail_init;
+      cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
+
+      /* These stores can be vectorized normally.  */
+      return true;
+    }
+
+  if (rhs_dt != vect_internal_def)
+    {
+     fail:
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "unsupported OpenMP scan combiner pattern.\n");
+      return false;
+    }
+
+  gimple *stmt = STMT_VINFO_STMT (stmt_info);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  if (TREE_CODE (rhs) != SSA_NAME)
+    goto fail;
+
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  gimple *other_store_stmt = NULL;
+  FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
+    {
+      gimple *use_stmt = USE_STMT (use_p);
+      if (use_stmt == stmt || is_gimple_debug (use_stmt))
+       continue;
+      if (gimple_bb (use_stmt) != gimple_bb (stmt)
+         || !gimple_store_p (use_stmt)
+         || other_store_stmt)
+       goto fail;
+      other_store_stmt = use_stmt;
+    }
+  if (other_store_stmt == NULL)
+    goto fail;
+  stmt_vec_info other_store_stmt_info
+    = loop_vinfo->lookup_stmt (other_store_stmt);
+  if (other_store_stmt_info == NULL
+      || STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info) != 3)
+    goto fail;
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+  if (gimple_bb (def_stmt) != gimple_bb (stmt)
+      || !is_gimple_assign (def_stmt)
+      || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
+    goto fail;
+
+  enum tree_code code = gimple_assign_rhs_code (def_stmt);
+  /* For pointer addition, we should use the normal plus for the vector
+     operation.  */
+  switch (code)
+    {
+    case POINTER_PLUS_EXPR:
+      code = PLUS_EXPR;
+      break;
+    case MULT_HIGHPART_EXPR:
+      goto fail;
+    default:
+      break;
+    }
+  if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
+    goto fail;
+
+  tree rhs1 = gimple_assign_rhs1 (def_stmt);
+  tree rhs2 = gimple_assign_rhs2 (def_stmt);
+  if (TREE_CODE (rhs1) != SSA_NAME
+      || TREE_CODE (rhs2) != SSA_NAME)
+    goto fail;
+
+  gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
+  gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
+  if (gimple_bb (load1_stmt) != gimple_bb (stmt)
+      || !gimple_assign_load_p (load1_stmt)
+      || gimple_bb (load2_stmt) != gimple_bb (stmt)
+      || !gimple_assign_load_p (load2_stmt))
+    goto fail;
+
+  stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
+  stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
+  if (load1_stmt_info == NULL
+      || load2_stmt_info == NULL
+      || STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info) != 3
+      || STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info) != 3)
+    goto fail;
+
+  if (scan_operand_equal_p (gimple_assign_lhs (stmt),
+                           gimple_assign_rhs1 (load2_stmt)))
+    {
+      std::swap (rhs1, rhs2);
+      std::swap (load1_stmt, load2_stmt);
+      std::swap (load1_stmt_info, load2_stmt_info);
+    }
+  if (!scan_operand_equal_p (gimple_assign_lhs (stmt),
+                            gimple_assign_rhs1 (load1_stmt))
+      || !scan_operand_equal_p (gimple_assign_lhs (other_store_stmt),
+                               gimple_assign_rhs1 (load2_stmt)))
+    goto fail;
+
+  dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
+  if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
+      || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
+    goto fail;
+
+  tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+  tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
+  if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
+      || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
+      || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+        == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
+    goto fail;
+
+  if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+    std::swap (var1, var2);
+
+  if (loop_vinfo->scan_map == NULL)
+    goto fail;
+  tree *init = loop_vinfo->scan_map->get (var1);
+  if (init == NULL)
+    goto fail;
+
+  /* The IL is as expected, now check if we can actually vectorize it.
+       _26 = D.2043[_25];
+       _27 = D.2042[_25];
+       _28 = _26 + _27;
+       D.2043[_25] = _28;
+       D.2042[_25] = _28;
+     should be vectorized as (where _40 is the vectorized rhs
+     from the D.2042[_21] = 0; store):
+       _30 = MEM <vector(8) int> [(int *)&D.2043];
+       _31 = MEM <vector(8) int> [(int *)&D.2042];
+       _32 = VEC_PERM_EXPR <_31, _40, { 8, 0, 1, 2, 3, 4, 5, 6 }>;
+       _33 = _31 + _32;
+       // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
+       _34 = VEC_PERM_EXPR <_33, _40, { 8, 9, 0, 1, 2, 3, 4, 5 }>;
+       _35 = _33 + _34;
+       // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+       //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
+       _36 = VEC_PERM_EXPR <_35, _40, { 8, 9, 10, 11, 0, 1, 2, 3 }>;
+       _37 = _35 + _36;
+       // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
+       //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
+       _38 = _30 + _37;
+       _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
+       MEM <vector(8) int> [(int *)&D.2043] = _39;
+       MEM <vector(8) int> [(int *)&D.2042] = _38;  */
+  enum machine_mode vec_mode = TYPE_MODE (vectype);
+  optab optab = optab_for_tree_code (code, vectype, optab_default);
+  if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+    goto fail;
+
+  unsigned HOST_WIDE_INT nunits;
+  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
+    goto fail;
+  int units_log2 = exact_log2 (nunits);
+  if (units_log2 <= 0)
+    goto fail;
+
+  for (int i = 0; i <= units_log2; ++i)
+    {
+      unsigned HOST_WIDE_INT j, k;
+      vec_perm_builder sel (nunits, nunits, 1);
+      sel.quick_grow (nunits);
+      if (i == units_log2)
+       {
+         for (j = 0; j < nunits; ++j)
+           sel[j] = nunits - 1;
+       }
+      else
+       {
+         for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
+           sel[j] = nunits + j;
+         for (k = 0; j < nunits; ++j, ++k)
+           sel[j] = k;
+       }
+      vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
+      if (!can_vec_perm_const_p (vec_mode, indices))
+       goto fail;
+    }
+
+  return true;
+}
+
+
+/* Function vectorizable_scan_store.
+
+   Helper of vectorizable_score, arguments like on vectorizable_store.
+   Handle only the transformation, checking is done in check_scan_store.  */
+
+static bool
+vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                        stmt_vec_info *vec_stmt, int ncopies)
+{
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
+  tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
+  vec_info *vinfo = stmt_info->vinfo;
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "transform scan store. ncopies = %d\n", ncopies);
+
+  gimple *stmt = STMT_VINFO_STMT (stmt_info);
+  tree rhs = gimple_assign_rhs1 (stmt);
+  gcc_assert (TREE_CODE (rhs) == SSA_NAME);
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
+  enum tree_code code = gimple_assign_rhs_code (def_stmt);
+  if (code == POINTER_PLUS_EXPR)
+    code = PLUS_EXPR;
+  gcc_assert (TREE_CODE_LENGTH (code) == binary_op
+             && commutative_tree_code (code));
+  tree rhs1 = gimple_assign_rhs1 (def_stmt);
+  tree rhs2 = gimple_assign_rhs2 (def_stmt);
+  gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
+  gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
+  gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
+  stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
+  stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
+  dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
+  dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
+  tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
+  tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
+
+  if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
+    {
+      std::swap (rhs1, rhs2);
+      std::swap (var1, var2);
+    }
+
+  tree *init = loop_vinfo->scan_map->get (var1);
+  gcc_assert (init);
+
+  tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
+  bool inscan_var_store
+    = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
+
+  unsigned HOST_WIDE_INT nunits;
+  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
+    gcc_unreachable ();
+  int units_log2 = exact_log2 (nunits);
+  gcc_assert (units_log2 > 0);
+  auto_vec<tree, 16> perms;
+  perms.quick_grow (units_log2 + 1);
+  for (int i = 0; i <= units_log2; ++i)
+    {
+      unsigned HOST_WIDE_INT j, k;
+      vec_perm_builder sel (nunits, nunits, 1);
+      sel.quick_grow (nunits);
+      if (i == units_log2)
+       {
+         for (j = 0; j < nunits; ++j)
+           sel[j] = nunits - 1;
+       }
+      else
+       {
+         for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
+           sel[j] = nunits + j;
+         for (k = 0; j < nunits; ++j, ++k)
+           sel[j] = k;
+       }
+      vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
+      perms[i] = vect_gen_perm_mask_checked (vectype, indices);
+    }
+
+  stmt_vec_info prev_stmt_info = NULL;
+  tree vec_oprnd1 = NULL_TREE;
+  tree vec_oprnd2 = NULL_TREE;
+  tree vec_oprnd3 = NULL_TREE;
+  tree dataref_ptr = unshare_expr (DR_BASE_ADDRESS (dr_info->dr));
+  tree dataref_offset = build_int_cst (ref_type, 0);
+  tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
+  tree orig = NULL_TREE;
+  for (int j = 0; j < ncopies; j++)
+    {
+      stmt_vec_info new_stmt_info;
+      if (j == 0)
+       {
+         vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
+         vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
+         vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
+         orig = vec_oprnd3;
+       }
+      else
+       {
+         vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
+         vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
+         vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
+         if (!inscan_var_store)
+           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+       }
+
+      tree v = vec_oprnd2;
+      for (int i = 0; i < units_log2; ++i)
+       {
+         tree new_temp = make_ssa_name (vectype);
+         gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v,
+                                          vec_oprnd1, perms[i]);
+         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+         if (prev_stmt_info == NULL)
+           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
+         else
+           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+         prev_stmt_info = new_stmt_info;
+
+         tree new_temp2 = make_ssa_name (vectype);
+         g = gimple_build_assign (new_temp2, code, v, new_temp);
+         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+         prev_stmt_info = new_stmt_info;
+
+         v = new_temp2;
+       }
+
+      tree new_temp = make_ssa_name (vectype);
+      gimple *g = gimple_build_assign (new_temp, code, orig, v);
+      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+      prev_stmt_info = new_stmt_info;
+
+      orig = make_ssa_name (vectype);
+      g = gimple_build_assign (orig, VEC_PERM_EXPR, new_temp, new_temp,
+                              perms[units_log2]);
+      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+      prev_stmt_info = new_stmt_info;
+
+      if (!inscan_var_store)
+       {
+         tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+                                      dataref_offset);
+         vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
+         g = gimple_build_assign (data_ref, new_temp);
+         new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
+         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+         prev_stmt_info = new_stmt_info;
+       }
+    }
+
+  if (inscan_var_store)
+    for (int j = 0; j < ncopies; j++)
+      {
+       if (j != 0)
+         dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+
+       tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
+                                    dataref_offset);
+       vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
+       gimple *g = gimple_build_assign (data_ref, orig);
+       stmt_vec_info new_stmt_info
+         = vect_finish_stmt_generation (stmt_info, g, gsi);
+       STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
+       prev_stmt_info = new_stmt_info;
+      }
+  return true;
+}
+
+
 /* Function vectorizable_store.
 
    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
@@ -6514,6 +6998,13 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       group_size = vec_num = 1;
     }
 
+  if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
+    {
+      if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
+                            memory_access_type))
+       return false;
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
@@ -6737,6 +7228,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
        }
       return true;
     }
+  else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
+    return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
 
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
@@ -7162,7 +7655,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
          gcc_assert (useless_type_conversion_p (vectype,
                                                 TREE_TYPE (vec_oprnd)));
          bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
+           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
          if (simd_lane_access_p
              && !loop_masks
              && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
@@ -8347,7 +8840,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       if (j == 0)
        {
          bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
+           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
          if (simd_lane_access_p
              && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
              && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
index 19a8af090a64f0f9936e19f3b17203366f42f989..df828ff0a40fbf7db2fbb4f02fd984f1a1ea4183 100644 (file)
@@ -491,6 +491,10 @@ typedef struct _loop_vec_info : public vec_info {
   /* Map of IV base/step expressions to inserted name in the preheader.  */
   hash_map<tree_operand_hash, tree> *ivexpr_map;
 
+  /* Map of OpenMP "omp simd array" scan variables to corresponding
+     rhs of the store of the initializer.  */
+  hash_map<tree, tree> *scan_map;
+
   /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
      applied to the loop, i.e., no unrolling is needed, this is 1.  */
   poly_uint64 slp_unrolling_factor;
@@ -913,7 +917,7 @@ struct _stmt_vec_info {
   bool strided_p;
 
   /* For both loads and stores.  */
-  bool simd_lane_access_p;
+  unsigned simd_lane_access_p : 2;
 
   /* Classifies how the load or store is going to be implemented
      for loop vectorization.  */