re PR target/60693 (ICE on funny memcpy)
[gcc.git] / gcc / omp-low.c
index 4d3b03239d4e03cae7a1698ba6cac2582a019c0f..11bb2d35a151aa0b563254d2148d00fd163f466a 100644 (file)
@@ -3,8 +3,7 @@
    marshalling to implement data sharing and copying clauses.
    Contributed by Diego Novillo <dnovillo@redhat.com>
 
-   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-   Free Software Foundation, Inc.
+   Copyright (C) 2005-2014 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -27,22 +26,51 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
 #include "rtl.h"
+#include "pointer-set.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "gimple-expr.h"
+#include "is-a.h"
 #include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-walk.h"
 #include "tree-iterator.h"
 #include "tree-inline.h"
 #include "langhooks.h"
 #include "diagnostic-core.h"
-#include "tree-flow.h"
+#include "gimple-ssa.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "tree-ssanames.h"
+#include "tree-into-ssa.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
 #include "flags.h"
 #include "function.h"
 #include "expr.h"
 #include "tree-pass.h"
-#include "ggc.h"
 #include "except.h"
 #include "splay-tree.h"
 #include "optabs.h"
 #include "cfgloop.h"
+#include "target.h"
+#include "omp-low.h"
+#include "gimple-low.h"
+#include "tree-cfgcleanup.h"
+#include "pretty-print.h"
+#include "ipa-prop.h"
+#include "tree-nested.h"
+#include "tree-eh.h"
 
 
 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -56,6 +84,45 @@ along with GCC; see the file COPYING3.  If not see
    scanned for parallel regions which are then moved to a new
    function, to be invoked by the thread library.  */
 
+/* Parallel region information.  Every parallel and workshare
+   directive is enclosed between two markers, the OMP_* directive
+   and a corresponding OMP_RETURN statement.  */
+
+struct omp_region
+{
+  /* The enclosing region.  */
+  struct omp_region *outer;
+
+  /* First child region.  */
+  struct omp_region *inner;
+
+  /* Next peer region.  */
+  struct omp_region *next;
+
+  /* Block containing the omp directive as its last stmt.  */
+  basic_block entry;
+
+  /* Block containing the OMP_RETURN as its last stmt.  */
+  basic_block exit;
+
+  /* Block containing the OMP_CONTINUE as its last stmt.  */
+  basic_block cont;
+
+  /* If this is a combined parallel+workshare region, this is a list
+     of additional arguments needed by the combined parallel+workshare
+     library call.  */
+  vec<tree, va_gc> *ws_args;
+
+  /* The code for the omp directive of this region.  */
+  enum gimple_code type;
+
+  /* Schedule kind, only used for OMP_FOR type regions.  */
+  enum omp_clause_schedule_kind sched_kind;
+
+  /* True if this is a combined parallel+workshare region.  */
+  bool is_combined_parallel;
+};
+
 /* Context structure.  Used to store information about each parallel
    directive in the code.  */
 
@@ -90,6 +157,10 @@ typedef struct omp_context
      construct.  In the case of a parallel, this is in the child function.  */
   tree block_vars;
 
+  /* Label to which GOMP_cancel{,llation_point} and explicit and implicit
+     barriers should jump to during omplower pass.  */
+  tree cancel_label;
+
   /* What to do with variables with implicitly determined sharing
      attributes.  */
   enum omp_clause_default_kind default_kind;
@@ -101,6 +172,9 @@ typedef struct omp_context
 
   /* True if this parallel directive is nested within another.  */
   bool is_nested;
+
+  /* True if this construct can be cancelled.  */
+  bool cancellable;
 } omp_context;
 
 
@@ -127,7 +201,8 @@ struct omp_for_data
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
-struct omp_region *root_omp_region;
+static int target_nesting_level;
+static struct omp_region *root_omp_region;
 static bitmap task_shared_vars;
 
 static void scan_omp (gimple_seq *, omp_context *);
@@ -223,6 +298,9 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
   int i;
   struct omp_for_data_loop dummy_loop;
   location_t loc = gimple_location (for_stmt);
+  bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_KIND_SIMD;
+  bool distribute = gimple_omp_for_kind (for_stmt)
+                   == GF_OMP_FOR_KIND_DISTRIBUTE;
 
   fd->for_stmt = for_stmt;
   fd->pre = NULL;
@@ -232,7 +310,8 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
   else
     fd->loops = &fd->loop;
 
-  fd->have_nowait = fd->have_ordered = false;
+  fd->have_nowait = distribute || simd;
+  fd->have_ordered = false;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
   collapse_iter = NULL;
@@ -248,9 +327,14 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
        fd->have_ordered = true;
        break;
       case OMP_CLAUSE_SCHEDULE:
+       gcc_assert (!distribute);
        fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
        fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
        break;
+      case OMP_CLAUSE_DIST_SCHEDULE:
+       gcc_assert (distribute);
+       fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
+       break;
       case OMP_CLAUSE_COLLAPSE:
        if (fd->collapse > 1)
          {
@@ -278,8 +362,7 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
       /* We only need to compute a default chunk size for ordered
         static loops and dynamic loops.  */
       if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
-         || fd->have_ordered
-         || fd->collapse > 1)
+         || fd->have_ordered)
        fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
                         ? integer_zero_node : integer_one_node;
     }
@@ -293,7 +376,6 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
       else
        loop = &dummy_loop;
 
-
       loop->v = gimple_omp_for_index (for_stmt, i);
       gcc_assert (SSA_VAR_P (loop->v));
       gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
@@ -308,6 +390,10 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
        case LT_EXPR:
        case GT_EXPR:
          break;
+       case NE_EXPR:
+         gcc_assert (gimple_omp_for_kind (for_stmt)
+                     == GF_OMP_FOR_KIND_CILKSIMD);
+         break;
        case LE_EXPR:
          if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
            loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
@@ -350,7 +436,20 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
          gcc_unreachable ();
        }
 
-      if (iter_type != long_long_unsigned_type_node)
+      if (simd
+         || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
+             && !fd->have_ordered))
+       {
+         if (fd->collapse == 1)
+           iter_type = TREE_TYPE (loop->v);
+         else if (i == 0
+                  || TYPE_PRECISION (iter_type)
+                     < TYPE_PRECISION (TREE_TYPE (loop->v)))
+           iter_type
+             = build_nonstandard_integer_type
+                 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);
+       }
+      else if (iter_type != long_long_unsigned_type_node)
        {
          if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
            iter_type = long_long_unsigned_type_node;
@@ -399,11 +498,16 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
 
       if (collapse_count && *collapse_count == NULL)
        {
-         if ((i == 0 || count != NULL_TREE)
-             && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
-             && TREE_CONSTANT (loop->n1)
-             && TREE_CONSTANT (loop->n2)
-             && TREE_CODE (loop->step) == INTEGER_CST)
+         t = fold_binary (loop->cond_code, boolean_type_node,
+                          fold_convert (TREE_TYPE (loop->v), loop->n1),
+                          fold_convert (TREE_TYPE (loop->v), loop->n2));
+         if (t && integer_zerop (t))
+           count = build_zero_cst (long_long_unsigned_type_node);
+         else if ((i == 0 || count != NULL_TREE)
+                  && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+                  && TREE_CONSTANT (loop->n1)
+                  && TREE_CONSTANT (loop->n2)
+                  && TREE_CODE (loop->step) == INTEGER_CST)
            {
              tree itype = TREE_TYPE (loop->v);
 
@@ -436,12 +540,15 @@ extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
              if (TREE_CODE (count) != INTEGER_CST)
                count = NULL_TREE;
            }
-         else
+         else if (count && !integer_zerop (count))
            count = NULL_TREE;
        }
     }
 
-  if (count)
+  if (count
+      && !simd
+      && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
+         || fd->have_ordered))
     {
       if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
        iter_type = long_long_unsigned_type_node;
@@ -551,34 +658,50 @@ workshare_safe_to_combine_p (basic_block ws_entry_bb)
    parallel+workshare call.  WS_STMT is the workshare directive being
    expanded.  */
 
-static VEC(tree,gc) *
-get_ws_args_for (gimple ws_stmt)
+static vec<tree, va_gc> *
+get_ws_args_for (gimple par_stmt, gimple ws_stmt)
 {
   tree t;
   location_t loc = gimple_location (ws_stmt);
-  VEC(tree,gc) *ws_args;
+  vec<tree, va_gc> *ws_args;
 
   if (gimple_code (ws_stmt) == GIMPLE_OMP_FOR)
     {
       struct omp_for_data fd;
+      tree n1, n2;
 
       extract_omp_for_data (ws_stmt, &fd, NULL);
+      n1 = fd.loop.n1;
+      n2 = fd.loop.n2;
+
+      if (gimple_omp_for_combined_into_p (ws_stmt))
+       {
+         tree innerc
+           = find_omp_clause (gimple_omp_parallel_clauses (par_stmt),
+                              OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         n1 = OMP_CLAUSE_DECL (innerc);
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         n2 = OMP_CLAUSE_DECL (innerc);
+       }
 
-      ws_args = VEC_alloc (tree, gc, 3 + (fd.chunk_size != 0));
+      vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
 
-      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.n1);
-      VEC_quick_push (tree, ws_args, t);
+      t = fold_convert_loc (loc, long_integer_type_node, n1);
+      ws_args->quick_push (t);
 
-      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.n2);
-      VEC_quick_push (tree, ws_args, t);
+      t = fold_convert_loc (loc, long_integer_type_node, n2);
+      ws_args->quick_push (t);
 
       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
-      VEC_quick_push (tree, ws_args, t);
+      ws_args->quick_push (t);
 
       if (fd.chunk_size)
        {
          t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
-         VEC_quick_push (tree, ws_args, t);
+         ws_args->quick_push (t);
        }
 
       return ws_args;
@@ -590,8 +713,8 @@ get_ws_args_for (gimple ws_stmt)
         the exit of the sections region.  */
       basic_block bb = single_succ (gimple_bb (ws_stmt));
       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
-      ws_args = VEC_alloc (tree, gc, 1);
-      VEC_quick_push (tree, ws_args, t);
+      vec_alloc (ws_args, 1);
+      ws_args->quick_push (t);
       return ws_args;
     }
 
@@ -632,6 +755,7 @@ determine_parallel_type (struct omp_region *region)
          || (last_and_only_stmt (ws_entry_bb)
              && last_and_only_stmt (par_exit_bb))))
     {
+      gimple par_stmt = last_stmt (par_entry_bb);
       gimple ws_stmt = last_stmt (ws_entry_bb);
 
       if (region->inner->type == GIMPLE_OMP_FOR)
@@ -659,7 +783,7 @@ determine_parallel_type (struct omp_region *region)
 
       region->is_combined_parallel = true;
       region->inner->is_combined_parallel = true;
-      region->ws_args = get_ws_args_for (ws_stmt);
+      region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
     }
 }
 
@@ -758,12 +882,20 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx)
       if (TREE_ADDRESSABLE (decl))
        return true;
 
+      /* lower_send_shared_vars only uses copy-in, but not copy-out
+        for these.  */
+      if (TREE_READONLY (decl)
+         || ((TREE_CODE (decl) == RESULT_DECL
+              || TREE_CODE (decl) == PARM_DECL)
+             && DECL_BY_REFERENCE (decl)))
+       return false;
+
       /* Disallow copy-in/out in nested parallel if
         decl is shared in outer parallel, otherwise
         each thread could store the shared variable
         in its own copy-in location, making the
         variable no longer really shared.  */
-      if (!TREE_READONLY (decl) && shared_ctx->is_nested)
+      if (shared_ctx->is_nested)
        {
          omp_context *up;
 
@@ -786,11 +918,10 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx)
            }
        }
 
-      /* For tasks avoid using copy-in/out, unless they are readonly
-        (in which case just copy-in is used).  As tasks can be
+      /* For tasks avoid using copy-in/out.  As tasks can be
         deferred or executed in different thread, when GOMP_task
         returns, the task hasn't necessarily terminated.  */
-      if (!TREE_READONLY (decl) && is_task_ctx (shared_ctx))
+      if (is_task_ctx (shared_ctx))
        {
          tree outer;
        maybe_mark_addressable_and_ret:
@@ -812,25 +943,6 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx)
   return false;
 }
 
-/* Create a new VAR_DECL and copy information from VAR to it.  */
-
-tree
-copy_var_decl (tree var, tree name, tree type)
-{
-  tree copy = build_decl (DECL_SOURCE_LOCATION (var), VAR_DECL, name, type);
-
-  TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
-  TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var);
-  DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var);
-  DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
-  DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
-  DECL_CONTEXT (copy) = DECL_CONTEXT (var);
-  TREE_USED (copy) = 1;
-  DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
-
-  return copy;
-}
-
 /* Construct a new automatic decl similar to VAR.  */
 
 static tree
@@ -907,6 +1019,19 @@ build_outer_var_ref (tree var, omp_context *ctx)
       bool by_ref = use_pointer_for_field (var, NULL);
       x = build_receiver_ref (var, by_ref, ctx);
     }
+  else if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+          && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD)
+    {
+      /* #pragma omp simd isn't a worksharing construct, and can reference even
+        private vars in its linear etc. clauses.  */
+      x = NULL_TREE;
+      if (ctx->outer && is_taskreg_ctx (ctx))
+       x = lookup_decl (var, ctx->outer);
+      else if (ctx->outer)
+       x = maybe_lookup_decl_in_outer_ctx (var, ctx);
+      if (x == NULL_TREE)
+       x = var;
+    }
   else if (ctx->outer)
     x = lookup_decl (var, ctx->outer);
   else if (is_reference (var))
@@ -944,7 +1069,12 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
              || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
 
   type = TREE_TYPE (var);
-  if (by_ref)
+  if (mask & 4)
+    {
+      gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+      type = build_pointer_type (build_pointer_type (type));
+    }
+  else if (by_ref)
     type = build_pointer_type (type);
   else if ((mask & 3) == 1 && is_reference (var))
     type = TREE_TYPE (type);
@@ -1090,8 +1220,6 @@ omp_copy_decl (tree var, copy_body_data *cb)
 }
 
 
-/* Return the parallel region associated with STMT.  */
-
 /* Debugging dumps for parallel regions.  */
 void dump_omp_region (FILE *, struct omp_region *, int);
 void debug_omp_region (struct omp_region *);
@@ -1139,7 +1267,7 @@ debug_all_omp_regions (void)
 
 /* Create a new parallel region starting at STMT inside region PARENT.  */
 
-struct omp_region *
+static struct omp_region *
 new_omp_region (basic_block bb, enum gimple_code type,
                struct omp_region *parent)
 {
@@ -1243,7 +1371,7 @@ static void
 finalize_task_copyfn (gimple task_stmt)
 {
   struct function *child_cfun;
-  tree child_fn, old_fn;
+  tree child_fn;
   gimple_seq seq = NULL, new_seq;
   gimple bind;
 
@@ -1252,14 +1380,9 @@ finalize_task_copyfn (gimple task_stmt)
     return;
 
   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
+  DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
 
-  /* Inform the callgraph about the new function.  */
-  DECL_STRUCT_FUNCTION (child_fn)->curr_properties
-    = cfun->curr_properties & ~PROP_loops;
-
-  old_fn = current_function_decl;
   push_cfun (child_cfun);
-  current_function_decl = child_fn;
   bind = gimplify_body (child_fn, false);
   gimple_seq_add_stmt (&seq, bind);
   new_seq = maybe_catch_exception (seq);
@@ -1271,8 +1394,8 @@ finalize_task_copyfn (gimple task_stmt)
     }
   gimple_set_body (child_fn, seq);
   pop_cfun ();
-  current_function_decl = old_fn;
 
+  /* Inform the callgraph about the new function.  */
   cgraph_add_new_function (child_fn, false);
 }
 
@@ -1386,6 +1509,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
          break;
 
        case OMP_CLAUSE_SHARED:
+         /* Ignore shared directives in teams construct.  */
+         if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+           break;
          gcc_assert (is_taskreg_ctx (ctx));
          decl = OMP_CLAUSE_DECL (c);
          gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
@@ -1417,6 +1543,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 
        case OMP_CLAUSE_FIRSTPRIVATE:
        case OMP_CLAUSE_REDUCTION:
+       case OMP_CLAUSE_LINEAR:
          decl = OMP_CLAUSE_DECL (c);
        do_private:
          if (is_variable_sized (decl))
@@ -1444,6 +1571,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
          install_var_local (decl, ctx);
          break;
 
+       case OMP_CLAUSE__LOOPTEMP_:
+         gcc_assert (is_parallel_ctx (ctx));
+         decl = OMP_CLAUSE_DECL (c);
+         install_var_field (decl, false, 3, ctx);
+         install_var_local (decl, ctx);
+         break;
+
        case OMP_CLAUSE_COPYPRIVATE:
        case OMP_CLAUSE_COPYIN:
          decl = OMP_CLAUSE_DECL (c);
@@ -1458,16 +1592,111 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
        case OMP_CLAUSE_FINAL:
        case OMP_CLAUSE_IF:
        case OMP_CLAUSE_NUM_THREADS:
+       case OMP_CLAUSE_NUM_TEAMS:
+       case OMP_CLAUSE_THREAD_LIMIT:
+       case OMP_CLAUSE_DEVICE:
        case OMP_CLAUSE_SCHEDULE:
+       case OMP_CLAUSE_DIST_SCHEDULE:
+       case OMP_CLAUSE_DEPEND:
          if (ctx->outer)
            scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
          break;
 
+       case OMP_CLAUSE_TO:
+       case OMP_CLAUSE_FROM:
+       case OMP_CLAUSE_MAP:
+         if (ctx->outer)
+           scan_omp_op (&OMP_CLAUSE_SIZE (c), ctx->outer);
+         decl = OMP_CLAUSE_DECL (c);
+         /* Global variables with "omp declare target" attribute
+            don't need to be copied, the receiver side will use them
+            directly.  */
+         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+             && DECL_P (decl)
+             && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
+             && lookup_attribute ("omp declare target",
+                                  DECL_ATTRIBUTES (decl)))
+           break;
+         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+             && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER)
+           {
+             /* Ignore OMP_CLAUSE_MAP_POINTER kind for arrays in
+                #pragma omp target data, there is nothing to map for
+                those.  */
+             if (gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_DATA
+                 && !POINTER_TYPE_P (TREE_TYPE (decl)))
+               break;
+           }
+         if (DECL_P (decl))
+           {
+             if (DECL_SIZE (decl)
+                 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+               {
+                 tree decl2 = DECL_VALUE_EXPR (decl);
+                 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
+                 decl2 = TREE_OPERAND (decl2, 0);
+                 gcc_assert (DECL_P (decl2));
+                 install_var_field (decl2, true, 3, ctx);
+                 install_var_local (decl2, ctx);
+                 install_var_local (decl, ctx);
+               }
+             else
+               {
+                 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+                     && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER
+                     && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
+                     && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+                   install_var_field (decl, true, 7, ctx);
+                 else
+                   install_var_field (decl, true, 3, ctx);
+                 if (gimple_omp_target_kind (ctx->stmt)
+                     == GF_OMP_TARGET_KIND_REGION)
+                   install_var_local (decl, ctx);
+               }
+           }
+         else
+           {
+             tree base = get_base_address (decl);
+             tree nc = OMP_CLAUSE_CHAIN (c);
+             if (DECL_P (base)
+                 && nc != NULL_TREE
+                 && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
+                 && OMP_CLAUSE_DECL (nc) == base
+                 && OMP_CLAUSE_MAP_KIND (nc) == OMP_CLAUSE_MAP_POINTER
+                 && integer_zerop (OMP_CLAUSE_SIZE (nc)))
+               {
+                 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) = 1;
+                 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (nc) = 1;
+               }
+             else
+               {
+                 gcc_assert (!splay_tree_lookup (ctx->field_map,
+                                                 (splay_tree_key) decl));
+                 tree field
+                   = build_decl (OMP_CLAUSE_LOCATION (c),
+                                 FIELD_DECL, NULL_TREE, ptr_type_node);
+                 DECL_ALIGN (field) = TYPE_ALIGN (ptr_type_node);
+                 insert_field_into_struct (ctx->record_type, field);
+                 splay_tree_insert (ctx->field_map, (splay_tree_key) decl,
+                                    (splay_tree_value) field);
+               }
+           }
+         break;
+
        case OMP_CLAUSE_NOWAIT:
        case OMP_CLAUSE_ORDERED:
        case OMP_CLAUSE_COLLAPSE:
        case OMP_CLAUSE_UNTIED:
        case OMP_CLAUSE_MERGEABLE:
+       case OMP_CLAUSE_PROC_BIND:
+       case OMP_CLAUSE_SAFELEN:
+         break;
+
+       case OMP_CLAUSE_ALIGNED:
+         decl = OMP_CLAUSE_DECL (c);
+         if (is_global_var (decl)
+             && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+           install_var_local (decl, ctx);
          break;
 
        default:
@@ -1491,6 +1720,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
        case OMP_CLAUSE_PRIVATE:
        case OMP_CLAUSE_FIRSTPRIVATE:
        case OMP_CLAUSE_REDUCTION:
+       case OMP_CLAUSE_LINEAR:
          decl = OMP_CLAUSE_DECL (c);
          if (is_variable_sized (decl))
            install_var_local (decl, ctx);
@@ -1503,23 +1733,71 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
          break;
 
        case OMP_CLAUSE_SHARED:
+         /* Ignore shared directives in teams construct.  */
+         if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+           break;
          decl = OMP_CLAUSE_DECL (c);
          if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
            fixup_remapped_decl (decl, ctx, false);
          break;
 
+       case OMP_CLAUSE_MAP:
+         if (gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_DATA)
+           break;
+         decl = OMP_CLAUSE_DECL (c);
+         if (DECL_P (decl)
+             && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
+             && lookup_attribute ("omp declare target",
+                                  DECL_ATTRIBUTES (decl)))
+           break;
+         if (DECL_P (decl))
+           {
+             if (OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER
+                 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
+                 && !COMPLETE_TYPE_P (TREE_TYPE (decl)))
+               {
+                 tree new_decl = lookup_decl (decl, ctx);
+                 TREE_TYPE (new_decl)
+                   = remap_type (TREE_TYPE (decl), &ctx->cb);
+               }
+             else if (DECL_SIZE (decl)
+                      && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+               {
+                 tree decl2 = DECL_VALUE_EXPR (decl);
+                 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
+                 decl2 = TREE_OPERAND (decl2, 0);
+                 gcc_assert (DECL_P (decl2));
+                 fixup_remapped_decl (decl2, ctx, false);
+                 fixup_remapped_decl (decl, ctx, true);
+               }
+             else
+               fixup_remapped_decl (decl, ctx, false);
+           }
+         break;
+
        case OMP_CLAUSE_COPYPRIVATE:
        case OMP_CLAUSE_COPYIN:
        case OMP_CLAUSE_DEFAULT:
        case OMP_CLAUSE_IF:
        case OMP_CLAUSE_NUM_THREADS:
+       case OMP_CLAUSE_NUM_TEAMS:
+       case OMP_CLAUSE_THREAD_LIMIT:
+       case OMP_CLAUSE_DEVICE:
        case OMP_CLAUSE_SCHEDULE:
+       case OMP_CLAUSE_DIST_SCHEDULE:
        case OMP_CLAUSE_NOWAIT:
        case OMP_CLAUSE_ORDERED:
        case OMP_CLAUSE_COLLAPSE:
        case OMP_CLAUSE_UNTIED:
        case OMP_CLAUSE_FINAL:
        case OMP_CLAUSE_MERGEABLE:
+       case OMP_CLAUSE_PROC_BIND:
+       case OMP_CLAUSE_SAFELEN:
+       case OMP_CLAUSE_ALIGNED:
+       case OMP_CLAUSE_DEPEND:
+       case OMP_CLAUSE__LOOPTEMP_:
+       case OMP_CLAUSE_TO:
+       case OMP_CLAUSE_FROM:
          break;
 
        default:
@@ -1542,8 +1820,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 
 /* Create a new name for omp child function.  Returns an identifier.  */
 
-static GTY(()) unsigned int tmp_ompfn_id_num;
-
 static tree
 create_omp_child_function_name (bool task_copy)
 {
@@ -1584,6 +1860,26 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
   DECL_EXTERNAL (decl) = 0;
   DECL_CONTEXT (decl) = NULL_TREE;
   DECL_INITIAL (decl) = make_node (BLOCK);
+  bool target_p = false;
+  if (lookup_attribute ("omp declare target",
+                       DECL_ATTRIBUTES (current_function_decl)))
+    target_p = true;
+  else
+    {
+      omp_context *octx;
+      for (octx = ctx; octx; octx = octx->outer)
+       if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET
+           && gimple_omp_target_kind (octx->stmt)
+              == GF_OMP_TARGET_KIND_REGION)
+         {
+           target_p = true;
+           break;
+         }
+    }
+  if (target_p)
+    DECL_ATTRIBUTES (decl)
+      = tree_cons (get_identifier ("omp declare target"),
+                  NULL_TREE, DECL_ATTRIBUTES (decl));
 
   t = build_decl (DECL_SOURCE_LOCATION (decl),
                  RESULT_DECL, NULL_TREE, void_type_node);
@@ -1625,6 +1921,34 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
   pop_cfun ();
 }
 
+/* Callback for walk_gimple_seq.  Check if combined parallel
+   contains gimple_omp_for_combined_into_p OMP_FOR.  */
+
+static tree
+find_combined_for (gimple_stmt_iterator *gsi_p,
+                  bool *handled_ops_p,
+                  struct walk_stmt_info *wi)
+{
+  gimple stmt = gsi_stmt (*gsi_p);
+
+  *handled_ops_p = true;
+  switch (gimple_code (stmt))
+    {
+    WALK_SUBSTMTS;
+
+    case GIMPLE_OMP_FOR:
+      if (gimple_omp_for_combined_into_p (stmt)
+         && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
+       {
+         wi->info = stmt;
+         return integer_zero_node;
+       }
+      break;
+    default:
+      break;
+    }
+  return NULL;
+}
 
 /* Scan an OpenMP parallel directive.  */
 
@@ -1646,6 +1970,40 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
       return;
     }
 
+  if (gimple_omp_parallel_combined_p (stmt))
+    {
+      gimple for_stmt;
+      struct walk_stmt_info wi;
+
+      memset (&wi, 0, sizeof (wi));
+      wi.val_only = true;
+      walk_gimple_seq (gimple_omp_body (stmt),
+                      find_combined_for, NULL, &wi);
+      for_stmt = (gimple) wi.info;
+      if (for_stmt)
+       {
+         struct omp_for_data fd;
+         extract_omp_for_data (for_stmt, &fd, NULL);
+         /* We need two temporaries with fd.loop.v type (istart/iend)
+            and then (fd.collapse - 1) temporaries with the same
+            type for count2 ... countN-1 vars if not constant.  */
+         size_t count = 2, i;
+         tree type = fd.iter_type;
+         if (fd.collapse > 1
+             && TREE_CODE (fd.loop.n2) != INTEGER_CST)
+           count += fd.collapse - 1;
+         for (i = 0; i < count; i++)
+           {
+             tree temp = create_tmp_var (type, NULL);
+             tree c = build_omp_clause (UNKNOWN_LOCATION,
+                                        OMP_CLAUSE__LOOPTEMP_);
+             OMP_CLAUSE_DECL (c) = temp;
+             OMP_CLAUSE_CHAIN (c) = gimple_omp_parallel_clauses (stmt);
+             gimple_omp_parallel_set_clauses (stmt, c);
+           }
+       }
+    }
+
   ctx = new_omp_context (stmt, outer_ctx);
   if (taskreg_nesting_level > 1)
     ctx->is_nested = true;
@@ -1820,17 +2178,220 @@ scan_omp_single (gimple stmt, omp_context *outer_ctx)
     layout_type (ctx->record_type);
 }
 
+/* Scan an OpenMP target{, data, update} directive.  */
+
+static void
+scan_omp_target (gimple stmt, omp_context *outer_ctx)
+{
+  omp_context *ctx;
+  tree name;
+  int kind = gimple_omp_target_kind (stmt);
+
+  ctx = new_omp_context (stmt, outer_ctx);
+  ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
+  ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
+  ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
+  name = create_tmp_var_name (".omp_data_t");
+  name = build_decl (gimple_location (stmt),
+                    TYPE_DECL, name, ctx->record_type);
+  DECL_ARTIFICIAL (name) = 1;
+  DECL_NAMELESS (name) = 1;
+  TYPE_NAME (ctx->record_type) = name;
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      create_omp_child_function (ctx, false);
+      gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
+    }
+
+  scan_sharing_clauses (gimple_omp_target_clauses (stmt), ctx);
+  scan_omp (gimple_omp_body_ptr (stmt), ctx);
+
+  if (TYPE_FIELDS (ctx->record_type) == NULL)
+    ctx->record_type = ctx->receiver_decl = NULL;
+  else
+    {
+      TYPE_FIELDS (ctx->record_type)
+       = nreverse (TYPE_FIELDS (ctx->record_type));
+#ifdef ENABLE_CHECKING
+      tree field;
+      unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
+      for (field = TYPE_FIELDS (ctx->record_type);
+          field;
+          field = DECL_CHAIN (field))
+       gcc_assert (DECL_ALIGN (field) == align);
+#endif
+      layout_type (ctx->record_type);
+      if (kind == GF_OMP_TARGET_KIND_REGION)
+       fixup_child_record_type (ctx);
+    }
+}
+
+/* Scan an OpenMP teams directive.  */
+
+static void
+scan_omp_teams (gimple stmt, omp_context *outer_ctx)
+{
+  omp_context *ctx = new_omp_context (stmt, outer_ctx);
+  scan_sharing_clauses (gimple_omp_teams_clauses (stmt), ctx);
+  scan_omp (gimple_omp_body_ptr (stmt), ctx);
+}
 
 /* Check OpenMP nesting restrictions.  */
 static bool
 check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
 {
+  if (ctx != NULL)
+    {
+      if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+         && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD)
+       {
+         error_at (gimple_location (stmt),
+                   "OpenMP constructs may not be nested inside simd region");
+         return false;
+       }
+      else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+       {
+         if ((gimple_code (stmt) != GIMPLE_OMP_FOR
+              || (gimple_omp_for_kind (stmt)
+                  != GF_OMP_FOR_KIND_DISTRIBUTE))
+             && gimple_code (stmt) != GIMPLE_OMP_PARALLEL)
+           {
+             error_at (gimple_location (stmt),
+                       "only distribute or parallel constructs are allowed to "
+                       "be closely nested inside teams construct");
+             return false;
+           }
+       }
+    }
   switch (gimple_code (stmt))
     {
     case GIMPLE_OMP_FOR:
+      if (gimple_omp_for_kind (stmt) & GF_OMP_FOR_KIND_SIMD)
+       return true;
+      if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
+       {
+         if (ctx != NULL && gimple_code (ctx->stmt) != GIMPLE_OMP_TEAMS)
+           {
+             error_at (gimple_location (stmt),
+                       "distribute construct must be closely nested inside "
+                       "teams construct");
+             return false;
+           }
+         return true;
+       }
+      /* FALLTHRU */
+    case GIMPLE_CALL:
+      if (is_gimple_call (stmt)
+         && (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+             == BUILT_IN_GOMP_CANCEL
+             || DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                == BUILT_IN_GOMP_CANCELLATION_POINT))
+       {
+         const char *bad = NULL;
+         const char *kind = NULL;
+         if (ctx == NULL)
+           {
+             error_at (gimple_location (stmt), "orphaned %qs construct",
+                       DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                       == BUILT_IN_GOMP_CANCEL
+                       ? "#pragma omp cancel"
+                       : "#pragma omp cancellation point");
+             return false;
+           }
+         switch (tree_fits_shwi_p (gimple_call_arg (stmt, 0))
+                 ? tree_to_shwi (gimple_call_arg (stmt, 0))
+                 : 0)
+           {
+           case 1:
+             if (gimple_code (ctx->stmt) != GIMPLE_OMP_PARALLEL)
+               bad = "#pragma omp parallel";
+             else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                      == BUILT_IN_GOMP_CANCEL
+                      && !integer_zerop (gimple_call_arg (stmt, 1)))
+               ctx->cancellable = true;
+             kind = "parallel";
+             break;
+           case 2:
+             if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
+                 || gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR)
+               bad = "#pragma omp for";
+             else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                      == BUILT_IN_GOMP_CANCEL
+                      && !integer_zerop (gimple_call_arg (stmt, 1)))
+               {
+                 ctx->cancellable = true;
+                 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
+                                      OMP_CLAUSE_NOWAIT))
+                   warning_at (gimple_location (stmt), 0,
+                               "%<#pragma omp cancel for%> inside "
+                               "%<nowait%> for construct");
+                 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
+                                      OMP_CLAUSE_ORDERED))
+                   warning_at (gimple_location (stmt), 0,
+                               "%<#pragma omp cancel for%> inside "
+                               "%<ordered%> for construct");
+               }
+             kind = "for";
+             break;
+           case 4:
+             if (gimple_code (ctx->stmt) != GIMPLE_OMP_SECTIONS
+                 && gimple_code (ctx->stmt) != GIMPLE_OMP_SECTION)
+               bad = "#pragma omp sections";
+             else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                      == BUILT_IN_GOMP_CANCEL
+                      && !integer_zerop (gimple_call_arg (stmt, 1)))
+               {
+                 if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS)
+                   {
+                     ctx->cancellable = true;
+                     if (find_omp_clause (gimple_omp_sections_clauses
+                                                               (ctx->stmt),
+                                          OMP_CLAUSE_NOWAIT))
+                       warning_at (gimple_location (stmt), 0,
+                                   "%<#pragma omp cancel sections%> inside "
+                                   "%<nowait%> sections construct");
+                   }
+                 else
+                   {
+                     gcc_assert (ctx->outer
+                                 && gimple_code (ctx->outer->stmt)
+                                    == GIMPLE_OMP_SECTIONS);
+                     ctx->outer->cancellable = true;
+                     if (find_omp_clause (gimple_omp_sections_clauses
+                                                       (ctx->outer->stmt),
+                                          OMP_CLAUSE_NOWAIT))
+                       warning_at (gimple_location (stmt), 0,
+                                   "%<#pragma omp cancel sections%> inside "
+                                   "%<nowait%> sections construct");
+                   }
+               }
+             kind = "sections";
+             break;
+           case 8:
+             if (gimple_code (ctx->stmt) != GIMPLE_OMP_TASK)
+               bad = "#pragma omp task";
+             else
+               ctx->cancellable = true;
+             kind = "taskgroup";
+             break;
+           default:
+             error_at (gimple_location (stmt), "invalid arguments");
+             return false;
+           }
+         if (bad)
+           {
+             error_at (gimple_location (stmt),
+                       "%<%s %s%> construct not closely nested inside of %qs",
+                       DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                       == BUILT_IN_GOMP_CANCEL
+                       ? "#pragma omp cancel"
+                       : "#pragma omp cancellation point", kind, bad);
+             return false;
+           }
+       }
+      /* FALLTHRU */
     case GIMPLE_OMP_SECTIONS:
     case GIMPLE_OMP_SINGLE:
-    case GIMPLE_CALL:
       for (; ctx != NULL; ctx = ctx->outer)
        switch (gimple_code (ctx->stmt))
          {
@@ -1840,8 +2401,12 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
          case GIMPLE_OMP_ORDERED:
          case GIMPLE_OMP_MASTER:
          case GIMPLE_OMP_TASK:
+         case GIMPLE_OMP_CRITICAL:
            if (is_gimple_call (stmt))
              {
+               if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
+                   != BUILT_IN_GOMP_BARRIER)
+                 return true;
                error_at (gimple_location (stmt),
                          "barrier region may not be closely nested inside "
                          "of work-sharing, critical, ordered, master or "
@@ -1898,7 +2463,10 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
              }
            return true;
          case GIMPLE_OMP_PARALLEL:
-           return true;
+           error_at (gimple_location (stmt),
+                     "ordered region must be closely nested inside "
+                     "a loop region with an ordered clause");
+           return false;
          default:
            break;
          }
@@ -1915,6 +2483,17 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
            return false;
          }
       break;
+    case GIMPLE_OMP_TEAMS:
+      if (ctx == NULL
+         || gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET
+         || gimple_omp_target_kind (ctx->stmt) != GF_OMP_TARGET_KIND_REGION)
+       {
+         error_at (gimple_location (stmt),
+                   "teams construct not closely nested inside of target "
+                   "region");
+         return false;
+       }
+      break;
     default:
       break;
     }
@@ -1970,6 +2549,23 @@ scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
   return NULL_TREE;
 }
 
+/* Return true if FNDECL is a setjmp or a longjmp.  */
+
+static bool
+setjmp_or_longjmp_p (const_tree fndecl)
+{
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
+      && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_SETJMP
+         || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LONGJMP))
+    return true;
+
+  tree declname = DECL_NAME (fndecl);
+  if (!declname)
+    return false;
+  const char *name = IDENTIFIER_POINTER (declname);
+  return !strcmp (name, "setjmp") || !strcmp (name, "longjmp");
+}
+
 
 /* Helper function for scan_omp.
 
@@ -1987,24 +2583,45 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
     input_location = gimple_location (stmt);
 
   /* Check the OpenMP nesting restrictions.  */
-  if (ctx != NULL)
+  bool remove = false;
+  if (is_gimple_omp (stmt))
+    remove = !check_omp_nesting_restrictions (stmt, ctx);
+  else if (is_gimple_call (stmt))
     {
-      bool remove = false;
-      if (is_gimple_omp (stmt))
-       remove = !check_omp_nesting_restrictions (stmt, ctx);
-      else if (is_gimple_call (stmt))
-       {
-         tree fndecl = gimple_call_fndecl (stmt);
-         if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
-             && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
-           remove = !check_omp_nesting_restrictions (stmt, ctx);
-       }
-      if (remove)
+      tree fndecl = gimple_call_fndecl (stmt);
+      if (fndecl)
        {
-         stmt = gimple_build_nop ();
-         gsi_replace (gsi, stmt, false);
+         if (setjmp_or_longjmp_p (fndecl)
+             && ctx
+             && gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+             && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD)
+           {
+             remove = true;
+             error_at (gimple_location (stmt),
+                       "setjmp/longjmp inside simd construct");
+           }
+         else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+           switch (DECL_FUNCTION_CODE (fndecl))
+             {
+             case BUILT_IN_GOMP_BARRIER:
+             case BUILT_IN_GOMP_CANCEL:
+             case BUILT_IN_GOMP_CANCELLATION_POINT:
+             case BUILT_IN_GOMP_TASKYIELD:
+             case BUILT_IN_GOMP_TASKWAIT:
+             case BUILT_IN_GOMP_TASKGROUP_START:
+             case BUILT_IN_GOMP_TASKGROUP_END:
+               remove = !check_omp_nesting_restrictions (stmt, ctx);
+               break;
+             default:
+               break;
+             }
        }
     }
+  if (remove)
+    {
+      stmt = gimple_build_nop ();
+      gsi_replace (gsi, stmt, false);
+    }
 
   *handled_ops_p = true;
 
@@ -2036,12 +2653,21 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 
     case GIMPLE_OMP_SECTION:
     case GIMPLE_OMP_MASTER:
+    case GIMPLE_OMP_TASKGROUP:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
       ctx = new_omp_context (stmt, ctx);
       scan_omp (gimple_omp_body_ptr (stmt), ctx);
       break;
 
+    case GIMPLE_OMP_TARGET:
+      scan_omp_target (stmt, ctx);
+      break;
+
+    case GIMPLE_OMP_TEAMS:
+      scan_omp_teams (stmt, ctx);
+      break;
+
     case GIMPLE_BIND:
       {
        tree var;
@@ -2084,10 +2710,15 @@ scan_omp (gimple_seq *body_p, omp_context *ctx)
 
 /* Build a call to GOMP_barrier.  */
 
-static tree
-build_omp_barrier (void)
+static gimple
+build_omp_barrier (tree lhs)
 {
-  return build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_BARRIER), 0);
+  tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL
+                                          : BUILT_IN_GOMP_BARRIER);
+  gimple g = gimple_build_call (fndecl, 0);
+  if (lhs)
+    gimple_call_set_lhs (g, lhs);
+  return g;
 }
 
 /* If a context was created for STMT when it was scanned, return it.  */
@@ -2248,25 +2879,167 @@ omp_reduction_init (tree clause, tree type)
     }
 }
 
-/* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
-   from the receiver (aka child) side and initializers for REFERENCE_TYPE
-   private variables.  Initialization statements go in ILIST, while calls
-   to destructors go in DLIST.  */
+/* Return alignment to be assumed for var in CLAUSE, which should be
+   OMP_CLAUSE_ALIGNED.  */
 
-static void
-lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
-                        omp_context *ctx)
+static tree
+omp_clause_aligned_alignment (tree clause)
 {
-  tree c, dtor, copyin_seq, x, ptr;
-  bool copyin_by_ref = false;
-  bool lastprivate_firstprivate = false;
-  int pass;
+  if (OMP_CLAUSE_ALIGNED_ALIGNMENT (clause))
+    return OMP_CLAUSE_ALIGNED_ALIGNMENT (clause);
+
+  /* Otherwise return implementation defined alignment.  */
+  unsigned int al = 1;
+  enum machine_mode mode, vmode;
+  int vs = targetm.vectorize.autovectorize_vector_sizes ();
+  if (vs)
+    vs = 1 << floor_log2 (vs);
+  static enum mode_class classes[]
+    = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
+  for (int i = 0; i < 4; i += 2)
+    for (mode = GET_CLASS_NARROWEST_MODE (classes[i]);
+        mode != VOIDmode;
+        mode = GET_MODE_WIDER_MODE (mode))
+      {
+       vmode = targetm.vectorize.preferred_simd_mode (mode);
+       if (GET_MODE_CLASS (vmode) != classes[i + 1])
+         continue;
+       while (vs
+              && GET_MODE_SIZE (vmode) < vs
+              && GET_MODE_2XWIDER_MODE (vmode) != VOIDmode)
+         vmode = GET_MODE_2XWIDER_MODE (vmode);
+       
+       tree type = lang_hooks.types.type_for_mode (mode, 1);
+       if (type == NULL_TREE || TYPE_MODE (type) != mode)
+         continue;
+       type = build_vector_type (type, GET_MODE_SIZE (vmode)
+                                       / GET_MODE_SIZE (mode));
+       if (TYPE_MODE (type) != vmode)
+         continue;
+       if (TYPE_ALIGN_UNIT (type) > al)
+         al = TYPE_ALIGN_UNIT (type);
+      }
+  return build_int_cst (integer_type_node, al);
+}
 
-  copyin_seq = NULL;
+/* Return maximum possible vectorization factor for the target.  */
 
-  /* Do all the fixed sized types in the first pass, and the variable sized
-     types in the second pass.  This makes sure that the scalar arguments to
-     the variable sized types are processed before we use them in the
+static int
+omp_max_vf (void)
+{
+  if (!optimize
+      || optimize_debug
+      || !flag_tree_loop_optimize
+      || (!flag_tree_loop_vectorize
+         && (global_options_set.x_flag_tree_loop_vectorize
+              || global_options_set.x_flag_tree_vectorize)))
+    return 1;
+
+  int vs = targetm.vectorize.autovectorize_vector_sizes ();
+  if (vs)
+    {
+      vs = 1 << floor_log2 (vs);
+      return vs;
+    }
+  enum machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
+  if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
+    return GET_MODE_NUNITS (vqimode);
+  return 1;
+}
+
+/* Helper function of lower_rec_input_clauses, used for #pragma omp simd
+   privatization.  */
+
+static bool
+lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf,
+                             tree &idx, tree &lane, tree &ivar, tree &lvar)
+{
+  if (max_vf == 0)
+    {
+      max_vf = omp_max_vf ();
+      if (max_vf > 1)
+       {
+         tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
+                                   OMP_CLAUSE_SAFELEN);
+         if (c
+             && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c), max_vf) == -1)
+           max_vf = tree_to_shwi (OMP_CLAUSE_SAFELEN_EXPR (c));
+       }
+      if (max_vf > 1)
+       {
+         idx = create_tmp_var (unsigned_type_node, NULL);
+         lane = create_tmp_var (unsigned_type_node, NULL);
+       }
+    }
+  if (max_vf == 1)
+    return false;
+
+  tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
+  tree avar = create_tmp_var_raw (atype, NULL);
+  if (TREE_ADDRESSABLE (new_var))
+    TREE_ADDRESSABLE (avar) = 1;
+  DECL_ATTRIBUTES (avar)
+    = tree_cons (get_identifier ("omp simd array"), NULL,
+                DECL_ATTRIBUTES (avar));
+  gimple_add_tmp_var (avar);
+  ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, idx,
+                NULL_TREE, NULL_TREE);
+  lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, lane,
+                NULL_TREE, NULL_TREE);
+  if (DECL_P (new_var))
+    {
+      SET_DECL_VALUE_EXPR (new_var, lvar);
+      DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+    }
+  return true;
+}
+
+/* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
+   from the receiver (aka child) side and initializers for REFERENCE_TYPE
+   private variables.  Initialization statements go in ILIST, while calls
+   to destructors go in DLIST.  */
+
+static void
+lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
+                        omp_context *ctx, struct omp_for_data *fd)
+{
+  tree c, dtor, copyin_seq, x, ptr;
+  bool copyin_by_ref = false;
+  bool lastprivate_firstprivate = false;
+  bool reduction_omp_orig_ref = false;
+  int pass;
+  bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+                 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD);
+  int max_vf = 0;
+  tree lane = NULL_TREE, idx = NULL_TREE;
+  tree ivar = NULL_TREE, lvar = NULL_TREE;
+  gimple_seq llist[2] = { NULL, NULL };
+
+  copyin_seq = NULL;
+
+  /* Set max_vf=1 (which will later enforce safelen=1) in simd loops
+     with data sharing clauses referencing variable sized vars.  That
+     is unnecessarily hard to support and very unlikely to result in
+     vectorized code anyway.  */
+  if (is_simd)
+    for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+      switch (OMP_CLAUSE_CODE (c))
+       {
+       case OMP_CLAUSE_REDUCTION:
+       case OMP_CLAUSE_PRIVATE:
+       case OMP_CLAUSE_FIRSTPRIVATE:
+       case OMP_CLAUSE_LASTPRIVATE:
+       case OMP_CLAUSE_LINEAR:
+         if (is_variable_sized (OMP_CLAUSE_DECL (c)))
+           max_vf = 1;
+         break;
+       default:
+         continue;
+       }
+
+  /* Do all the fixed sized types in the first pass, and the variable sized
+     types in the second pass.  This makes sure that the scalar arguments to
+     the variable sized types are processed before we use them in the
      variable sized operations.  */
   for (pass = 0; pass < 2; ++pass)
     {
@@ -2284,6 +3057,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                continue;
              break;
            case OMP_CLAUSE_SHARED:
+             /* Ignore shared directives in teams construct.  */
+             if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+               continue;
              if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
                {
                  gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
@@ -2291,7 +3067,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                }
            case OMP_CLAUSE_FIRSTPRIVATE:
            case OMP_CLAUSE_COPYIN:
+           case OMP_CLAUSE_LINEAR:
+             break;
            case OMP_CLAUSE_REDUCTION:
+             if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
+               reduction_omp_orig_ref = true;
+             break;
+           case OMP_CLAUSE__LOOPTEMP_:
+             /* Handle _looptemp_ clauses only on parallel.  */
+             if (fd)
+               continue;
              break;
            case OMP_CLAUSE_LASTPRIVATE:
              if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
@@ -2301,6 +3086,42 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                    continue;
                }
              break;
+           case OMP_CLAUSE_ALIGNED:
+             if (pass == 0)
+               continue;
+             var = OMP_CLAUSE_DECL (c);
+             if (TREE_CODE (TREE_TYPE (var)) == POINTER_TYPE
+                 && !is_global_var (var))
+               {
+                 new_var = maybe_lookup_decl (var, ctx);
+                 if (new_var == NULL_TREE)
+                   new_var = maybe_lookup_decl_in_outer_ctx (var, ctx);
+                 x = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
+                 x = build_call_expr_loc (clause_loc, x, 2, new_var,
+                                          omp_clause_aligned_alignment (c));
+                 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
+                 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
+                 gimplify_and_add (x, ilist);
+               }
+             else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
+                      && is_global_var (var))
+               {
+                 tree ptype = build_pointer_type (TREE_TYPE (var)), t, t2;
+                 new_var = lookup_decl (var, ctx);
+                 t = maybe_lookup_decl_in_outer_ctx (var, ctx);
+                 t = build_fold_addr_expr_loc (clause_loc, t);
+                 t2 = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
+                 t = build_call_expr_loc (clause_loc, t2, 2, t,
+                                          omp_clause_aligned_alignment (c));
+                 t = fold_convert_loc (clause_loc, ptype, t);
+                 x = create_tmp_var (ptype, NULL);
+                 t = build2 (MODIFY_EXPR, ptype, x, t);
+                 gimplify_and_add (t, ilist);
+                 t = build_simple_mem_ref_loc (clause_loc, x);
+                 SET_DECL_VALUE_EXPR (new_var, t);
+                 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+               }
+             continue;
            default:
              continue;
            }
@@ -2352,10 +3173,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                 allocate new backing storage for the new pointer
                 variable.  This allows us to avoid changing all the
                 code that expects a pointer to something that expects
-                a direct variable.  Note that this doesn't apply to
-                C++, since reference types are disallowed in data
-                sharing clauses there, except for NRV optimized
-                return values.  */
+                a direct variable.  */
              if (pass == 0)
                continue;
 
@@ -2367,15 +3185,26 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                }
              else if (TREE_CONSTANT (x))
                {
-                 const char *name = NULL;
-                 if (DECL_NAME (var))
-                   name = IDENTIFIER_POINTER (DECL_NAME (new_var));
-
-                 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
-                                         name);
-                 gimple_add_tmp_var (x);
-                 TREE_ADDRESSABLE (x) = 1;
-                 x = build_fold_addr_expr_loc (clause_loc, x);
+                 /* For reduction with placeholder in SIMD loop,
+                    defer adding the initialization of the reference,
+                    because if we decide to use SIMD array for it,
+                    the initilization could cause expansion ICE.  */
+                 if (c_kind == OMP_CLAUSE_REDUCTION
+                     && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
+                     && is_simd)
+                   x = NULL_TREE;
+                 else
+                   {
+                     const char *name = NULL;
+                     if (DECL_NAME (var))
+                       name = IDENTIFIER_POINTER (DECL_NAME (new_var));
+
+                     x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
+                                             name);
+                     gimple_add_tmp_var (x);
+                     TREE_ADDRESSABLE (x) = 1;
+                     x = build_fold_addr_expr_loc (clause_loc, x);
+                   }
                }
              else
                {
@@ -2383,8 +3212,11 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                  x = build_call_expr_loc (clause_loc, atmp, 1, x);
                }
 
-             x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
-             gimplify_assign (new_var, x, ilist);
+             if (x)
+               {
+                 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
+                 gimplify_assign (new_var, x, ilist);
+               }
 
              new_var = build_simple_mem_ref_loc (clause_loc, new_var);
            }
@@ -2400,6 +3232,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
          switch (OMP_CLAUSE_CODE (c))
            {
            case OMP_CLAUSE_SHARED:
+             /* Ignore shared directives in teams construct.  */
+             if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
+               continue;
              /* Shared global vars are just accessed directly.  */
              if (is_global_var (new_var))
                break;
@@ -2437,9 +3272,39 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                }
              else
                x = NULL;
-             x = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x);
-             if (x)
-               gimplify_and_add (x, ilist);
+           do_private:
+             tree nx;
+             nx = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x);
+             if (is_simd)
+               {
+                 tree y = lang_hooks.decls.omp_clause_dtor (c, new_var);
+                 if ((TREE_ADDRESSABLE (new_var) || nx || y
+                      || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
+                     && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
+                                                      idx, lane, ivar, lvar))
+                   {
+                     if (nx)
+                       x = lang_hooks.decls.omp_clause_default_ctor
+                                               (c, unshare_expr (ivar), x);
+                     if (nx && x)
+                       gimplify_and_add (x, &llist[0]);
+                     if (y)
+                       {
+                         y = lang_hooks.decls.omp_clause_dtor (c, ivar);
+                         if (y)
+                           {
+                             gimple_seq tseq = NULL;
+
+                             dtor = y;
+                             gimplify_stmt (&dtor, &tseq);
+                             gimple_seq_add_seq (&llist[1], tseq);
+                           }
+                       }
+                     break;
+                   }
+               }
+             if (nx)
+               gimplify_and_add (nx, ilist);
              /* FALLTHRU */
 
            do_dtor:
@@ -2454,6 +3319,15 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                }
              break;
 
+           case OMP_CLAUSE_LINEAR:
+             if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c))
+               goto do_firstprivate;
+             if (OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
+               x = NULL;
+             else
+               x = build_outer_var_ref (var, ctx);
+             goto do_private;
+
            case OMP_CLAUSE_FIRSTPRIVATE:
              if (is_task_ctx (ctx))
                {
@@ -2469,10 +3343,100 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                      goto do_dtor;
                    }
                }
+           do_firstprivate:
              x = build_outer_var_ref (var, ctx);
+             if (is_simd)
+               {
+                 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
+                     && gimple_omp_for_combined_into_p (ctx->stmt))
+                   {
+                     tree stept = POINTER_TYPE_P (TREE_TYPE (x))
+                                  ? sizetype : TREE_TYPE (x);
+                     tree t = fold_convert (stept,
+                                            OMP_CLAUSE_LINEAR_STEP (c));
+                     tree c = find_omp_clause (clauses,
+                                               OMP_CLAUSE__LOOPTEMP_);
+                     gcc_assert (c);
+                     tree l = OMP_CLAUSE_DECL (c);
+                     if (fd->collapse == 1)
+                       {
+                         tree n1 = fd->loop.n1;
+                         tree step = fd->loop.step;
+                         tree itype = TREE_TYPE (l);
+                         if (POINTER_TYPE_P (itype))
+                           itype = signed_type_for (itype);
+                         l = fold_build2 (MINUS_EXPR, itype, l, n1);
+                         if (TYPE_UNSIGNED (itype)
+                             && fd->loop.cond_code == GT_EXPR)
+                           l = fold_build2 (TRUNC_DIV_EXPR, itype,
+                                            fold_build1 (NEGATE_EXPR,
+                                                         itype, l),
+                                            fold_build1 (NEGATE_EXPR,
+                                                         itype, step));
+                         else
+                           l = fold_build2 (TRUNC_DIV_EXPR, itype, l, step);
+                       }
+                     t = fold_build2 (MULT_EXPR, stept,
+                                      fold_convert (stept, l), t);
+                     if (POINTER_TYPE_P (TREE_TYPE (x)))
+                       x = fold_build2 (POINTER_PLUS_EXPR,
+                                        TREE_TYPE (x), x, t);
+                     else
+                       x = fold_build2 (PLUS_EXPR, TREE_TYPE (x), x, t);
+                   }
+
+                 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR
+                      || TREE_ADDRESSABLE (new_var))
+                     && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
+                                                      idx, lane, ivar, lvar))
+                   {
+                     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
+                       {
+                         tree iv = create_tmp_var (TREE_TYPE (new_var), NULL);
+                         x = lang_hooks.decls.omp_clause_copy_ctor (c, iv, x);
+                         gimplify_and_add (x, ilist);
+                         gimple_stmt_iterator gsi
+                           = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
+                         gimple g
+                           = gimple_build_assign (unshare_expr (lvar), iv);
+                         gsi_insert_before_without_update (&gsi, g,
+                                                           GSI_SAME_STMT);
+                         tree stept = POINTER_TYPE_P (TREE_TYPE (x))
+                                      ? sizetype : TREE_TYPE (x);
+                         tree t = fold_convert (stept,
+                                                OMP_CLAUSE_LINEAR_STEP (c));
+                         enum tree_code code = PLUS_EXPR;
+                         if (POINTER_TYPE_P (TREE_TYPE (new_var)))
+                           code = POINTER_PLUS_EXPR;
+                         g = gimple_build_assign_with_ops (code, iv, iv, t);
+                         gsi_insert_before_without_update (&gsi, g,
+                                                           GSI_SAME_STMT);
+                         break;
+                       }
+                     x = lang_hooks.decls.omp_clause_copy_ctor
+                                               (c, unshare_expr (ivar), x);
+                     gimplify_and_add (x, &llist[0]);
+                     x = lang_hooks.decls.omp_clause_dtor (c, ivar);
+                     if (x)
+                       {
+                         gimple_seq tseq = NULL;
+
+                         dtor = x;
+                         gimplify_stmt (&dtor, &tseq);
+                         gimple_seq_add_seq (&llist[1], tseq);
+                       }
+                     break;
+                   }
+               }
              x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
              gimplify_and_add (x, ilist);
              goto do_dtor;
+
+           case OMP_CLAUSE__LOOPTEMP_:
+             gcc_assert (is_parallel_ctx (ctx));
+             x = build_outer_var_ref (var, ctx);
+             x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
+             gimplify_and_add (x, ilist);
              break;
 
            case OMP_CLAUSE_COPYIN:
@@ -2487,23 +3451,148 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
              if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
                {
                  tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
+                 gimple tseq;
                  x = build_outer_var_ref (var, ctx);
 
-                 if (is_reference (var))
+                 if (is_reference (var)
+                     && !useless_type_conversion_p (TREE_TYPE (placeholder),
+                                                    TREE_TYPE (x)))
                    x = build_fold_addr_expr_loc (clause_loc, x);
                  SET_DECL_VALUE_EXPR (placeholder, x);
                  DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
-                 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
-                 gimple_seq_add_seq (ilist,
-                                     OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c));
+                 tree new_vard = new_var;
+                 if (is_reference (var))
+                   {
+                     gcc_assert (TREE_CODE (new_var) == MEM_REF);
+                     new_vard = TREE_OPERAND (new_var, 0);
+                     gcc_assert (DECL_P (new_vard));
+                   }
+                 if (is_simd
+                     && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
+                                                      idx, lane, ivar, lvar))
+                   {
+                     if (new_vard == new_var)
+                       {
+                         gcc_assert (DECL_VALUE_EXPR (new_var) == lvar);
+                         SET_DECL_VALUE_EXPR (new_var, ivar);
+                       }
+                     else
+                       {
+                         SET_DECL_VALUE_EXPR (new_vard,
+                                              build_fold_addr_expr (ivar));
+                         DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
+                       }
+                     x = lang_hooks.decls.omp_clause_default_ctor
+                               (c, unshare_expr (ivar),
+                                build_outer_var_ref (var, ctx));
+                     if (x)
+                       gimplify_and_add (x, &llist[0]);
+                     if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+                       {
+                         tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+                         lower_omp (&tseq, ctx);
+                         gimple_seq_add_seq (&llist[0], tseq);
+                       }
+                     OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+                     tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+                     lower_omp (&tseq, ctx);
+                     gimple_seq_add_seq (&llist[1], tseq);
+                     OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
+                     DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                     if (new_vard == new_var)
+                       SET_DECL_VALUE_EXPR (new_var, lvar);
+                     else
+                       SET_DECL_VALUE_EXPR (new_vard,
+                                            build_fold_addr_expr (lvar));
+                     x = lang_hooks.decls.omp_clause_dtor (c, ivar);
+                     if (x)
+                       {
+                         tseq = NULL;
+                         dtor = x;
+                         gimplify_stmt (&dtor, &tseq);
+                         gimple_seq_add_seq (&llist[1], tseq);
+                       }
+                     break;
+                   }
+                 /* If this is a reference to constant size reduction var
+                    with placeholder, we haven't emitted the initializer
+                    for it because it is undesirable if SIMD arrays are used.
+                    But if they aren't used, we need to emit the deferred
+                    initialization now.  */
+                 else if (is_reference (var) && is_simd)
+                   {
+                     tree z
+                       = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_vard)));
+                     if (TREE_CONSTANT (z))
+                       {
+                         const char *name = NULL;
+                         if (DECL_NAME (var))
+                           name = IDENTIFIER_POINTER (DECL_NAME (new_vard));
+
+                         z = create_tmp_var_raw
+                               (TREE_TYPE (TREE_TYPE (new_vard)), name);
+                         gimple_add_tmp_var (z);
+                         TREE_ADDRESSABLE (z) = 1;
+                         z = build_fold_addr_expr_loc (clause_loc, z);
+                         gimplify_assign (new_vard, z, ilist);
+                       }
+                   }
+                 x = lang_hooks.decls.omp_clause_default_ctor
+                               (c, new_var, unshare_expr (x));
+                 if (x)
+                   gimplify_and_add (x, ilist);
+                 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
+                   {
+                     tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
+                     lower_omp (&tseq, ctx);
+                     gimple_seq_add_seq (ilist, tseq);
+                   }
                  OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
+                 if (is_simd)
+                   {
+                     tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
+                     lower_omp (&tseq, ctx);
+                     gimple_seq_add_seq (dlist, tseq);
+                     OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
+                   }
                  DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
+                 goto do_dtor;
                }
              else
                {
                  x = omp_reduction_init (c, TREE_TYPE (new_var));
                  gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
-                 gimplify_assign (new_var, x, ilist);
+                 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+
+                 /* reduction(-:var) sums up the partial results, so it
+                    acts identically to reduction(+:var).  */
+                 if (code == MINUS_EXPR)
+                   code = PLUS_EXPR;
+
+                 if (is_simd
+                     && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
+                                                      idx, lane, ivar, lvar))
+                   {
+                     tree ref = build_outer_var_ref (var, ctx);
+
+                     gimplify_assign (unshare_expr (ivar), x, &llist[0]);
+
+                     x = build2 (code, TREE_TYPE (ref), ref, ivar);
+                     ref = build_outer_var_ref (var, ctx);
+                     gimplify_assign (ref, x, &llist[1]);
+                   }
+                 else
+                   {
+                     gimplify_assign (new_var, x, ilist);
+                     if (is_simd)
+                       {
+                         tree ref = build_outer_var_ref (var, ctx);
+
+                         x = build2 (code, TREE_TYPE (ref), ref, new_var);
+                         ref = build_outer_var_ref (var, ctx);
+                         gimplify_assign (ref, x, dlist);
+                       }
+                   }
                }
              break;
 
@@ -2513,6 +3602,52 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
        }
     }
 
+  if (lane)
+    {
+      tree uid = create_tmp_var (ptr_type_node, "simduid");
+      /* Don't want uninit warnings on simduid, it is always uninitialized,
+        but we use it not for the value, but for the DECL_UID only.  */
+      TREE_NO_WARNING (uid) = 1;
+      gimple g
+       = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
+      gimple_call_set_lhs (g, lane);
+      gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
+      gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
+      OMP_CLAUSE__SIMDUID__DECL (c) = uid;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
+      gimple_omp_for_set_clauses (ctx->stmt, c);
+      g = gimple_build_assign_with_ops (INTEGER_CST, lane,
+                                       build_int_cst (unsigned_type_node, 0),
+                                       NULL_TREE);
+      gimple_seq_add_stmt (ilist, g);
+      for (int i = 0; i < 2; i++)
+       if (llist[i])
+         {
+           tree vf = create_tmp_var (unsigned_type_node, NULL);
+           g = gimple_build_call_internal (IFN_GOMP_SIMD_VF, 1, uid);
+           gimple_call_set_lhs (g, vf);
+           gimple_seq *seq = i == 0 ? ilist : dlist;
+           gimple_seq_add_stmt (seq, g);
+           tree t = build_int_cst (unsigned_type_node, 0);
+           g = gimple_build_assign_with_ops (INTEGER_CST, idx, t, NULL_TREE);
+           gimple_seq_add_stmt (seq, g);
+           tree body = create_artificial_label (UNKNOWN_LOCATION);
+           tree header = create_artificial_label (UNKNOWN_LOCATION);
+           tree end = create_artificial_label (UNKNOWN_LOCATION);
+           gimple_seq_add_stmt (seq, gimple_build_goto (header));
+           gimple_seq_add_stmt (seq, gimple_build_label (body));
+           gimple_seq_add_seq (seq, llist[i]);
+           t = build_int_cst (unsigned_type_node, 1);
+           g = gimple_build_assign_with_ops (PLUS_EXPR, idx, idx, t);
+           gimple_seq_add_stmt (seq, g);
+           gimple_seq_add_stmt (seq, gimple_build_label (header));
+           g = gimple_build_cond (LT_EXPR, idx, vf, body, end);
+           gimple_seq_add_stmt (seq, g);
+           gimple_seq_add_stmt (seq, gimple_build_label (end));
+         }
+    }
+
   /* The copyin sequence is not to be executed by the main thread, since
      that would result in self-copies.  Perhaps not visible to scalars,
      but it certainly is to C++ operator=.  */
@@ -2530,9 +3665,34 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
      master thread doesn't modify it before it is copied over in all
      threads.  Similarly for variables in both firstprivate and
      lastprivate clauses we need to ensure the lastprivate copying
-     happens after firstprivate copying in all threads.  */
-  if (copyin_by_ref || lastprivate_firstprivate)
-    gimplify_and_add (build_omp_barrier (), ilist);
+     happens after firstprivate copying in all threads.  And similarly
+     for UDRs if initializer expression refers to omp_orig.  */
+  if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
+    {
+      /* Don't add any barrier for #pragma omp simd or
+        #pragma omp distribute.  */
+      if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
+         || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)
+       gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE));
+    }
+
+  /* If max_vf is non-zero, then we can use only a vectorization factor
+     up to the max_vf we chose.  So stick it into the safelen clause.  */
+  if (max_vf)
+    {
+      tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
+                               OMP_CLAUSE_SAFELEN);
+      if (c == NULL_TREE
+         || compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
+                              max_vf) == 1)
+       {
+         c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
+         OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
+                                                      max_vf);
+         OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
+         gimple_omp_for_set_clauses (ctx->stmt, c);
+       }
+    }
 }
 
 
@@ -2542,13 +3702,18 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 
 static void
 lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
-                           omp_context *ctx)
+                          omp_context *ctx)
 {
-  tree x, c, label = NULL;
+  tree x, c, label = NULL, orig_clauses = clauses;
   bool par_clauses = false;
+  tree simduid = NULL, lastlane = NULL;
 
-  /* Early exit if there are no lastprivate clauses.  */
-  clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
+  /* Early exit if there are no lastprivate or linear clauses.  */
+  for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
+    if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LASTPRIVATE
+       || (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LINEAR
+           && !OMP_CLAUSE_LINEAR_NO_COPYOUT (clauses)))
+      break;
   if (clauses == NULL)
     {
       /* If this was a workshare clause, see if it had been combined
@@ -2585,23 +3750,59 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
       gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
     }
 
+  if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+      && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD)
+    {
+      simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
+      if (simduid)
+       simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
+    }
+
   for (c = clauses; c ;)
     {
       tree var, new_var;
       location_t clause_loc = OMP_CLAUSE_LOCATION (c);
 
-      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+         || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
+             && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
        {
          var = OMP_CLAUSE_DECL (c);
          new_var = lookup_decl (var, ctx);
 
-         if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
+         if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
+           {
+             tree val = DECL_VALUE_EXPR (new_var);
+             if (TREE_CODE (val) == ARRAY_REF
+                 && VAR_P (TREE_OPERAND (val, 0))
+                 && lookup_attribute ("omp simd array",
+                                      DECL_ATTRIBUTES (TREE_OPERAND (val,
+                                                                     0))))
+               {
+                 if (lastlane == NULL)
+                   {
+                     lastlane = create_tmp_var (unsigned_type_node, NULL);
+                     gimple g
+                       = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
+                                                     2, simduid,
+                                                     TREE_OPERAND (val, 1));
+                     gimple_call_set_lhs (g, lastlane);
+                     gimple_seq_add_stmt (stmt_list, g);
+                   }
+                 new_var = build4 (ARRAY_REF, TREE_TYPE (val),
+                                   TREE_OPERAND (val, 0), lastlane,
+                                   NULL_TREE, NULL_TREE);
+               }
+           }
+
+         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+             && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
            {
              lower_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
              gimple_seq_add_seq (stmt_list,
                                  OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
+             OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
            }
-         OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
 
          x = build_outer_var_ref (var, ctx);
          if (is_reference (var))
@@ -2643,6 +3844,11 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
   tree x, c;
   int count = 0;
 
+  /* SIMD reductions are handled in lower_rec_input_clauses.  */
+  if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
+      && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_SIMD)
+    return;
+
   /* First see if there is exactly one reduction clause.  Use OMP_ATOMIC
      update in that case, otherwise use a lock.  */
   for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
@@ -2650,7 +3856,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
       {
        if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
          {
-           /* Never use OMP_ATOMIC for array reductions.  */
+           /* Never use OMP_ATOMIC for array reductions or UDRs.  */
            count = -1;
            break;
          }
@@ -2697,7 +3903,9 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
        {
          tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
 
-         if (is_reference (var))
+         if (is_reference (var)
+             && !useless_type_conversion_p (TREE_TYPE (placeholder),
+                                            TREE_TYPE (ref)))
            ref = build_fold_addr_expr_loc (clause_loc, ref);
          SET_DECL_VALUE_EXPR (placeholder, ref);
          DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
@@ -2800,6 +4008,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
        case OMP_CLAUSE_COPYIN:
        case OMP_CLAUSE_LASTPRIVATE:
        case OMP_CLAUSE_REDUCTION:
+       case OMP_CLAUSE__LOOPTEMP_:
          break;
        default:
          continue;
@@ -2820,6 +4029,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
        case OMP_CLAUSE_PRIVATE:
        case OMP_CLAUSE_FIRSTPRIVATE:
        case OMP_CLAUSE_COPYIN:
+       case OMP_CLAUSE__LOOPTEMP_:
          do_in = true;
          break;
 
@@ -2940,35 +4150,35 @@ gimple_build_cond_empty (tree cond)
 
 static void
 expand_parallel_call (struct omp_region *region, basic_block bb,
-                     gimple entry_stmt, VEC(tree,gc) *ws_args)
+                     gimple entry_stmt, vec<tree, va_gc> *ws_args)
 {
-  tree t, t1, t2, val, cond, c, clauses;
+  tree t, t1, t2, val, cond, c, clauses, flags;
   gimple_stmt_iterator gsi;
   gimple stmt;
   enum built_in_function start_ix;
   int start_ix2;
   location_t clause_loc;
-  VEC(tree,gc) *args;
+  vec<tree, va_gc> *args;
 
   clauses = gimple_omp_parallel_clauses (entry_stmt);
 
-  /* Determine what flavor of GOMP_parallel_start we will be
+  /* Determine what flavor of GOMP_parallel we will be
      emitting.  */
-  start_ix = BUILT_IN_GOMP_PARALLEL_START;
+  start_ix = BUILT_IN_GOMP_PARALLEL;
   if (is_combined_parallel (region))
     {
       switch (region->inner->type)
        {
        case GIMPLE_OMP_FOR:
          gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
-         start_ix2 = ((int)BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
+         start_ix2 = ((int)BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
                       + (region->inner->sched_kind
                          == OMP_CLAUSE_SCHEDULE_RUNTIME
                          ? 3 : region->inner->sched_kind));
          start_ix = (enum built_in_function)start_ix2;
          break;
        case GIMPLE_OMP_SECTIONS:
-         start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
+         start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
          break;
        default:
          gcc_unreachable ();
@@ -2979,6 +4189,7 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
      and there is no conditional.  */
   cond = NULL_TREE;
   val = build_int_cst (unsigned_type_node, 0);
+  flags = build_int_cst (unsigned_type_node, 0);
 
   c = find_omp_clause (clauses, OMP_CLAUSE_IF);
   if (c)
@@ -2993,6 +4204,10 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
   else
     clause_loc = gimple_location (entry_stmt);
 
+  c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND);
+  if (c)
+    flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
+
   /* Ensure 'val' is of the correct type.  */
   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
 
@@ -3053,13 +4268,17 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
 
          make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
          make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
+         if (current_loops)
+           {
+             add_bb_to_loop (then_bb, cond_bb->loop_father);
+             add_bb_to_loop (else_bb, cond_bb->loop_father);
+           }
          e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
          e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
 
          if (gimple_in_ssa_p (cfun))
            {
              gimple phi = create_phi_node (tmp_join, bb);
-             SSA_NAME_DEF_STMT (tmp_join) = phi;
              add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
              add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
            }
@@ -3080,33 +4299,19 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
     t1 = build_fold_addr_expr (t);
   t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
 
-  args = VEC_alloc (tree, gc, 3 + VEC_length (tree, ws_args));
-  VEC_quick_push (tree, args, t2);
-  VEC_quick_push (tree, args, t1);
-  VEC_quick_push (tree, args, val);
-  VEC_splice (tree, args, ws_args);
+  vec_alloc (args, 4 + vec_safe_length (ws_args));
+  args->quick_push (t2);
+  args->quick_push (t1);
+  args->quick_push (val);
+  if (ws_args)
+    args->splice (*ws_args);
+  args->quick_push (flags);
 
   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
                               builtin_decl_explicit (start_ix), args);
 
   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                            false, GSI_CONTINUE_LINKING);
-
-  t = gimple_omp_parallel_data_arg (entry_stmt);
-  if (t == NULL)
-    t = null_pointer_node;
-  else
-    t = build_fold_addr_expr (t);
-  t = build_call_expr_loc (gimple_location (entry_stmt),
-                          gimple_omp_parallel_child_fn (entry_stmt), 1, t);
-  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-                           false, GSI_CONTINUE_LINKING);
-
-  t = build_call_expr_loc (gimple_location (entry_stmt),
-                          builtin_decl_explicit (BUILT_IN_GOMP_PARALLEL_END),
-                          0);
-  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-                           false, GSI_CONTINUE_LINKING);
 }
 
 
@@ -3116,7 +4321,7 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
 static void
 expand_task_call (basic_block bb, gimple entry_stmt)
 {
-  tree t, t1, t2, t3, flags, cond, c, c2, clauses;
+  tree t, t1, t2, t3, flags, cond, c, c2, clauses, depend;
   gimple_stmt_iterator gsi;
   location_t loc = gimple_location (entry_stmt);
 
@@ -3130,8 +4335,9 @@ expand_task_call (basic_block bb, gimple entry_stmt)
 
   c = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
   c2 = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE);
+  depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
   flags = build_int_cst (unsigned_type_node,
-                        (c ? 1 : 0) + (c2 ? 4 : 0));
+                        (c ? 1 : 0) + (c2 ? 4 : 0) + (depend ? 8 : 0));
 
   c = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
   if (c)
@@ -3142,6 +4348,10 @@ expand_task_call (basic_block bb, gimple entry_stmt)
                           build_int_cst (unsigned_type_node, 0));
       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, c);
     }
+  if (depend)
+    depend = OMP_CLAUSE_DECL (depend);
+  else
+    depend = build_int_cst (ptr_type_node, 0);
 
   gsi = gsi_last_bb (bb);
   t = gimple_omp_task_data_arg (entry_stmt);
@@ -3157,9 +4367,10 @@ expand_task_call (basic_block bb, gimple entry_stmt)
     t3 = build_fold_addr_expr_loc (loc, t);
 
   t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
-                      7, t1, t2, t3,
+                      8, t1, t2, t3,
                       gimple_omp_task_arg_size (entry_stmt),
-                      gimple_omp_task_arg_align (entry_stmt), cond, flags);
+                      gimple_omp_task_arg_align (entry_stmt), cond, flags,
+                      depend);
 
   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                            false, GSI_CONTINUE_LINKING);
@@ -3194,12 +4405,12 @@ maybe_catch_exception (gimple_seq body)
 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
 
 static tree
-vec2chain (VEC(tree,gc) *v)
+vec2chain (vec<tree, va_gc> *v)
 {
   tree chain = NULL_TREE, t;
   unsigned ix;
 
-  FOR_EACH_VEC_ELT_REVERSE (tree, v, ix, t)
+  FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
     {
       DECL_CHAIN (t) = chain;
       chain = t;
@@ -3337,7 +4548,7 @@ optimize_omp_library_calls (gimple entry_stmt)
                      && find_omp_clause (gimple_omp_task_clauses (entry_stmt),
                                          OMP_CLAUSE_UNTIED) != NULL);
 
-  FOR_EACH_BB (bb)
+  FOR_EACH_BB_FN (bb, cfun)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
        gimple call = gsi_stmt (gsi);
@@ -3381,6 +4592,43 @@ optimize_omp_library_calls (gimple entry_stmt)
       }
 }
 
+/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
+   regimplified.  */
+
+static tree
+expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
+{
+  tree t = *tp;
+
+  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
+  if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t))
+    return t;
+
+  if (TREE_CODE (t) == ADDR_EXPR)
+    recompute_tree_invariant_for_addr_expr (t);
+
+  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
+  return NULL_TREE;
+}
+
+/* Prepend TO = FROM assignment before *GSI_P.  */
+
+static void
+expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from)
+{
+  bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
+  from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
+                                  true, GSI_SAME_STMT);
+  gimple stmt = gimple_build_assign (to, from);
+  gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
+  if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
+      || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
+    {
+      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+      gimple_regimplify_operands (stmt, &gsi);
+    }
+}
+
 /* Expand the OpenMP parallel or task directive starting at REGION.  */
 
 static void
@@ -3389,11 +4637,10 @@ expand_omp_taskreg (struct omp_region *region)
   basic_block entry_bb, exit_bb, new_bb;
   struct function *child_cfun;
   tree child_fn, block, t;
-  tree save_current;
   gimple_stmt_iterator gsi;
   gimple entry_stmt, stmt;
   edge e;
-  VEC(tree,gc) *ws_args;
+  vec<tree, va_gc> *ws_args;
 
   entry_stmt = last_stmt (region->entry);
   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
@@ -3499,9 +4746,9 @@ expand_omp_taskreg (struct omp_region *region)
              /* If we are in ssa form, we must load the value from the default
                 definition of the argument.  That should not be defined now,
                 since the argument is not used uninitialized.  */
-             gcc_assert (gimple_default_def (cfun, arg) == NULL);
+             gcc_assert (ssa_default_def (cfun, arg) == NULL);
              narg = make_ssa_name (arg, gimple_build_nop ());
-             set_default_def (arg, narg);
+             set_ssa_default_def (cfun, arg, narg);
              /* ?? Is setting the subcode really necessary ??  */
              gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (narg));
              gimple_assign_set_rhs1 (parcopy_stmt, narg);
@@ -3566,31 +4813,32 @@ expand_omp_taskreg (struct omp_region *region)
       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
       if (exit_bb)
        single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
+      /* When the OMP expansion process cannot guarantee an up-to-date
+         loop tree arrange for the child function to fixup loops.  */
+      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+       child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
 
       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
-      num = VEC_length (tree, child_cfun->local_decls);
+      num = vec_safe_length (child_cfun->local_decls);
       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
        {
-         t = VEC_index (tree, child_cfun->local_decls, srcidx);
+         t = (*child_cfun->local_decls)[srcidx];
          if (DECL_CONTEXT (t) == cfun->decl)
            continue;
          if (srcidx != dstidx)
-           VEC_replace (tree, child_cfun->local_decls, dstidx, t);
+           (*child_cfun->local_decls)[dstidx] = t;
          dstidx++;
        }
       if (dstidx != num)
-       VEC_truncate (tree, child_cfun->local_decls, dstidx);
+       vec_safe_truncate (child_cfun->local_decls, dstidx);
 
       /* Inform the callgraph about the new function.  */
-      DECL_STRUCT_FUNCTION (child_fn)->curr_properties
-       = cfun->curr_properties & ~PROP_loops;
+      DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
       cgraph_add_new_function (child_fn, true);
 
       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
         fixed in a following pass.  */
       push_cfun (child_cfun);
-      save_current = current_function_decl;
-      current_function_decl = child_fn;
       if (optimize)
        optimize_omp_library_calls (entry_stmt);
       rebuild_cgraph_edges ();
@@ -3604,14 +4852,13 @@ expand_omp_taskreg (struct omp_region *region)
          basic_block bb;
          bool changed = false;
 
-         FOR_EACH_BB (bb)
+         FOR_EACH_BB_FN (bb, cfun)
            changed |= gimple_purge_dead_eh_edges (bb);
          if (changed)
            cleanup_tree_cfg ();
        }
       if (gimple_in_ssa_p (cfun))
        update_ssa (TODO_update_ssa);
-      current_function_decl = save_current;
       pop_cfun ();
     }
 
@@ -3625,118 +4872,490 @@ expand_omp_taskreg (struct omp_region *region)
 }
 
 
-/* A subroutine of expand_omp_for.  Generate code for a parallel
-   loop with any schedule.  Given parameters:
-
-       for (V = N1; V cond N2; V += STEP) BODY;
-
-   where COND is "<" or ">", we generate pseudocode
-
-       more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
-       if (more) goto L0; else goto L3;
-    L0:
-       V = istart0;
-       iend = iend0;
-    L1:
-       BODY;
-       V += STEP;
-       if (V cond iend) goto L1; else goto L2;
-    L2:
-       if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
-    L3:
-
-    If this is a combined omp parallel loop, instead of the call to
-    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
-
-    For collapsed loops, given parameters:
-      collapse(3)
-      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
-       for (V2 = N21; V2 cond2 N22; V2 += STEP2)
-         for (V3 = N31; V3 cond3 N32; V3 += STEP3)
-           BODY;
-
-    we generate pseudocode
-
+/* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
+   of the combined collapse > 1 loop constructs, generate code like:
+       if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
        if (cond3 is <)
          adj = STEP3 - 1;
        else
          adj = STEP3 + 1;
        count3 = (adj + N32 - N31) / STEP3;
+       if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
        if (cond2 is <)
          adj = STEP2 - 1;
        else
          adj = STEP2 + 1;
        count2 = (adj + N22 - N21) / STEP2;
+       if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
        if (cond1 is <)
          adj = STEP1 - 1;
        else
          adj = STEP1 + 1;
        count1 = (adj + N12 - N11) / STEP1;
        count = count1 * count2 * count3;
-       more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
-       if (more) goto L0; else goto L3;
-    L0:
-       V = istart0;
-       T = V;
-       V3 = N31 + (T % count3) * STEP3;
-       T = T / count3;
-       V2 = N21 + (T % count2) * STEP2;
-       T = T / count2;
-       V1 = N11 + T * STEP1;
-       iend = iend0;
-    L1:
-       BODY;
-       V += 1;
-       if (V < iend) goto L10; else goto L2;
-    L10:
-       V3 += STEP3;
-       if (V3 cond3 N32) goto L1; else goto L11;
-    L11:
-       V3 = N31;
-       V2 += STEP2;
-       if (V2 cond2 N22) goto L1; else goto L12;
-    L12:
-       V2 = N21;
-       V1 += STEP1;
-       goto L1;
-    L2:
-       if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
-    L3:
-
-      */
+   Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
+       count = 0;
+   and set ZERO_ITER_BB to that bb.  If this isn't the outermost
+   of the combined loop constructs, just initialize COUNTS array
+   from the _looptemp_ clauses.  */
+
+/* NOTE: It *could* be better to moosh all of the BBs together,
+   creating one larger BB with all the computation and the unexpected
+   jump at the end.  I.e.
+
+   bool zero3, zero2, zero1, zero;
+
+   zero3 = N32 c3 N31;
+   count3 = (N32 - N31) /[cl] STEP3;
+   zero2 = N22 c2 N21;
+   count2 = (N22 - N21) /[cl] STEP2;
+   zero1 = N12 c1 N11;
+   count1 = (N12 - N11) /[cl] STEP1;
+   zero = zero3 || zero2 || zero1;
+   count = count1 * count2 * count3;
+   if (__builtin_expect(zero, false)) goto zero_iter_bb;
+
+   After all, we expect the zero=false, and thus we expect to have to
+   evaluate all of the comparison expressions, so short-circuiting
+   oughtn't be a win.  Since the condition isn't protecting a
+   denominator, we're not concerned about divide-by-zero, so we can
+   fully evaluate count even if a numerator turned out to be wrong.
+
+   It seems like putting this all together would create much better
+   scheduling opportunities, and less pressure on the chip's branch
+   predictor.  */
 
 static void
-expand_omp_for_generic (struct omp_region *region,
-                       struct omp_for_data *fd,
-                       enum built_in_function start_fn,
-                       enum built_in_function next_fn)
+expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
+                           basic_block &entry_bb, tree *counts,
+                           basic_block &zero_iter_bb, int &first_zero_iter,
+                           basic_block &l2_dom_bb)
 {
-  tree type, istart0, iend0, iend;
-  tree t, vmain, vback, bias = NULL_TREE;
-  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
-  basic_block l2_bb = NULL, l3_bb = NULL;
-  gimple_stmt_iterator gsi;
+  tree t, type = TREE_TYPE (fd->loop.v);
   gimple stmt;
-  bool in_combined_parallel = is_combined_parallel (region);
-  bool broken_loop = region->cont == NULL;
   edge e, ne;
-  tree *counts = NULL;
   int i;
 
-  gcc_assert (!broken_loop || !in_combined_parallel);
-  gcc_assert (fd->iter_type == long_integer_type_node
-             || !in_combined_parallel);
+  /* Collapsed loops need work for expansion into SSA form.  */
+  gcc_assert (!gimple_in_ssa_p (cfun));
+
+  if (gimple_omp_for_combined_into_p (fd->for_stmt)
+      && TREE_CODE (fd->loop.n2) != INTEGER_CST)
+    {
+      /* First two _looptemp_ clauses are for istart/iend, counts[0]
+        isn't supposed to be handled, as the inner loop doesn't
+        use it.  */
+      tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                    OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      for (i = 0; i < fd->collapse; i++)
+       {
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         if (i)
+           counts[i] = OMP_CLAUSE_DECL (innerc);
+         else
+           counts[0] = NULL_TREE;
+       }
+      return;
+    }
+
+  for (i = 0; i < fd->collapse; i++)
+    {
+      tree itype = TREE_TYPE (fd->loops[i].v);
+
+      if (SSA_VAR_P (fd->loop.n2)
+         && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
+                               fold_convert (itype, fd->loops[i].n1),
+                               fold_convert (itype, fd->loops[i].n2)))
+             == NULL_TREE || !integer_onep (t)))
+       {
+         tree n1, n2;
+         n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+         n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
+                                        true, GSI_SAME_STMT);
+         n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+         n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
+                                        true, GSI_SAME_STMT);
+         stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+                                   NULL_TREE, NULL_TREE);
+         gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+         if (walk_tree (gimple_cond_lhs_ptr (stmt),
+                        expand_omp_regimplify_p, NULL, NULL)
+             || walk_tree (gimple_cond_rhs_ptr (stmt),
+                           expand_omp_regimplify_p, NULL, NULL))
+           {
+             *gsi = gsi_for_stmt (stmt);
+             gimple_regimplify_operands (stmt, gsi);
+           }
+         e = split_block (entry_bb, stmt);
+         if (zero_iter_bb == NULL)
+           {
+             first_zero_iter = i;
+             zero_iter_bb = create_empty_bb (entry_bb);
+             if (current_loops)
+               add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
+             *gsi = gsi_after_labels (zero_iter_bb);
+             stmt = gimple_build_assign (fd->loop.n2,
+                                         build_zero_cst (type));
+             gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+             set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
+                                      entry_bb);
+           }
+         ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
+         ne->probability = REG_BR_PROB_BASE / 2000 - 1;
+         e->flags = EDGE_TRUE_VALUE;
+         e->probability = REG_BR_PROB_BASE - ne->probability;
+         if (l2_dom_bb == NULL)
+           l2_dom_bb = entry_bb;
+         entry_bb = e->dest;
+         *gsi = gsi_last_bb (entry_bb);
+       }
+
+      if (POINTER_TYPE_P (itype))
+       itype = signed_type_for (itype);
+      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+                                ? -1 : 1));
+      t = fold_build2 (PLUS_EXPR, itype,
+                      fold_convert (itype, fd->loops[i].step), t);
+      t = fold_build2 (PLUS_EXPR, itype, t,
+                      fold_convert (itype, fd->loops[i].n2));
+      t = fold_build2 (MINUS_EXPR, itype, t,
+                      fold_convert (itype, fd->loops[i].n1));
+      /* ?? We could probably use CEIL_DIV_EXPR instead of
+        TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
+        generate the same code in the end because generically we
+        don't know that the values involved must be negative for
+        GT??  */
+      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+       t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                        fold_build1 (NEGATE_EXPR, itype, t),
+                        fold_build1 (NEGATE_EXPR, itype,
+                                     fold_convert (itype,
+                                                   fd->loops[i].step)));
+      else
+       t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+                        fold_convert (itype, fd->loops[i].step));
+      t = fold_convert (type, t);
+      if (TREE_CODE (t) == INTEGER_CST)
+       counts[i] = t;
+      else
+       {
+         counts[i] = create_tmp_reg (type, ".count");
+         expand_omp_build_assign (gsi, counts[i], t);
+       }
+      if (SSA_VAR_P (fd->loop.n2))
+       {
+         if (i == 0)
+           t = counts[0];
+         else
+           t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
+         expand_omp_build_assign (gsi, fd->loop.n2, t);
+       }
+    }
+}
+
+
+/* Helper function for expand_omp_{for_*,simd}.  Generate code like:
+       T = V;
+       V3 = N31 + (T % count3) * STEP3;
+       T = T / count3;
+       V2 = N21 + (T % count2) * STEP2;
+       T = T / count2;
+       V1 = N11 + T * STEP1;
+   if this loop doesn't have an inner loop construct combined with it.
+   If it does have an inner loop construct combined with it and the
+   iteration count isn't known constant, store values from counts array
+   into its _looptemp_ temporaries instead.  */
+
+static void
+expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
+                         tree *counts, gimple inner_stmt, tree startvar)
+{
+  int i;
+  if (gimple_omp_for_combined_p (fd->for_stmt))
+    {
+      /* If fd->loop.n2 is constant, then no propagation of the counts
+        is needed, they are constant.  */
+      if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
+       return;
+
+      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
+                    ? gimple_omp_parallel_clauses (inner_stmt)
+                    : gimple_omp_for_clauses (inner_stmt);
+      /* First two _looptemp_ clauses are for istart/iend, counts[0]
+        isn't supposed to be handled, as the inner loop doesn't
+        use it.  */
+      tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      for (i = 0; i < fd->collapse; i++)
+       {
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         if (i)
+           {
+             tree tem = OMP_CLAUSE_DECL (innerc);
+             tree t = fold_convert (TREE_TYPE (tem), counts[i]);
+             t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
+                                           false, GSI_CONTINUE_LINKING);
+             gimple stmt = gimple_build_assign (tem, t);
+             gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+           }
+       }
+      return;
+    }
+
+  tree type = TREE_TYPE (fd->loop.v);
+  tree tem = create_tmp_reg (type, ".tem");
+  gimple stmt = gimple_build_assign (tem, startvar);
+  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+
+  for (i = fd->collapse - 1; i >= 0; i--)
+    {
+      tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
+      itype = vtype;
+      if (POINTER_TYPE_P (vtype))
+       itype = signed_type_for (vtype);
+      if (i != 0)
+       t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
+      else
+       t = tem;
+      t = fold_convert (itype, t);
+      t = fold_build2 (MULT_EXPR, itype, t,
+                      fold_convert (itype, fd->loops[i].step));
+      if (POINTER_TYPE_P (vtype))
+       t = fold_build_pointer_plus (fd->loops[i].n1, t);
+      else
+       t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+      t = force_gimple_operand_gsi (gsi, t,
+                                   DECL_P (fd->loops[i].v)
+                                   && TREE_ADDRESSABLE (fd->loops[i].v),
+                                   NULL_TREE, false,
+                                   GSI_CONTINUE_LINKING);
+      stmt = gimple_build_assign (fd->loops[i].v, t);
+      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+      if (i != 0)
+       {
+         t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
+         t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
+                                       false, GSI_CONTINUE_LINKING);
+         stmt = gimple_build_assign (tem, t);
+         gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+       }
+    }
+}
+
+
+/* Helper function for expand_omp_for_*.  Generate code like:
+    L10:
+       V3 += STEP3;
+       if (V3 cond3 N32) goto BODY_BB; else goto L11;
+    L11:
+       V3 = N31;
+       V2 += STEP2;
+       if (V2 cond2 N22) goto BODY_BB; else goto L12;
+    L12:
+       V2 = N21;
+       V1 += STEP1;
+       goto BODY_BB;  */
+
+static basic_block
+extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
+                            basic_block body_bb)
+{
+  basic_block last_bb, bb, collapse_bb = NULL;
+  int i;
+  gimple_stmt_iterator gsi;
+  edge e;
+  tree t;
+  gimple stmt;
+
+  last_bb = cont_bb;
+  for (i = fd->collapse - 1; i >= 0; i--)
+    {
+      tree vtype = TREE_TYPE (fd->loops[i].v);
+
+      bb = create_empty_bb (last_bb);
+      if (current_loops)
+       add_bb_to_loop (bb, last_bb->loop_father);
+      gsi = gsi_start_bb (bb);
+
+      if (i < fd->collapse - 1)
+       {
+         e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
+         e->probability = REG_BR_PROB_BASE / 8;
+
+         t = fd->loops[i + 1].n1;
+         t = force_gimple_operand_gsi (&gsi, t,
+                                       DECL_P (fd->loops[i + 1].v)
+                                       && TREE_ADDRESSABLE (fd->loops[i
+                                                                      + 1].v),
+                                       NULL_TREE, false,
+                                       GSI_CONTINUE_LINKING);
+         stmt = gimple_build_assign (fd->loops[i + 1].v, t);
+         gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+       }
+      else
+       collapse_bb = bb;
+
+      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
+
+      if (POINTER_TYPE_P (vtype))
+       t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
+      else
+       t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
+      t = force_gimple_operand_gsi (&gsi, t,
+                                   DECL_P (fd->loops[i].v)
+                                   && TREE_ADDRESSABLE (fd->loops[i].v),
+                                   NULL_TREE, false, GSI_CONTINUE_LINKING);
+      stmt = gimple_build_assign (fd->loops[i].v, t);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      if (i > 0)
+       {
+         t = fd->loops[i].n2;
+         t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+                                       false, GSI_CONTINUE_LINKING);
+         tree v = fd->loops[i].v;
+         if (DECL_P (v) && TREE_ADDRESSABLE (v))
+           v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+                                         false, GSI_CONTINUE_LINKING);
+         t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
+         stmt = gimple_build_cond_empty (t);
+         gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+         e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
+         e->probability = REG_BR_PROB_BASE * 7 / 8;
+       }
+      else
+       make_edge (bb, body_bb, EDGE_FALLTHRU);
+      last_bb = bb;
+    }
+
+  return collapse_bb;
+}
+
+
+/* A subroutine of expand_omp_for.  Generate code for a parallel
+   loop with any schedule.  Given parameters:
+
+       for (V = N1; V cond N2; V += STEP) BODY;
+
+   where COND is "<" or ">", we generate pseudocode
+
+       more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
+       if (more) goto L0; else goto L3;
+    L0:
+       V = istart0;
+       iend = iend0;
+    L1:
+       BODY;
+       V += STEP;
+       if (V cond iend) goto L1; else goto L2;
+    L2:
+       if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
+    L3:
+
+    If this is a combined omp parallel loop, instead of the call to
+    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
+    If this is gimple_omp_for_combined_p loop, then instead of assigning
+    V and iend in L0 we assign the first two _looptemp_ clause decls of the
+    inner GIMPLE_OMP_FOR and V += STEP; and
+    if (V cond iend) goto L1; else goto L2; are removed.
+
+    For collapsed loops, given parameters:
+      collapse(3)
+      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
+       for (V2 = N21; V2 cond2 N22; V2 += STEP2)
+         for (V3 = N31; V3 cond3 N32; V3 += STEP3)
+           BODY;
+
+    we generate pseudocode
+
+       if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
+       if (cond3 is <)
+         adj = STEP3 - 1;
+       else
+         adj = STEP3 + 1;
+       count3 = (adj + N32 - N31) / STEP3;
+       if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
+       if (cond2 is <)
+         adj = STEP2 - 1;
+       else
+         adj = STEP2 + 1;
+       count2 = (adj + N22 - N21) / STEP2;
+       if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
+       if (cond1 is <)
+         adj = STEP1 - 1;
+       else
+         adj = STEP1 + 1;
+       count1 = (adj + N12 - N11) / STEP1;
+       count = count1 * count2 * count3;
+       goto Z1;
+    Z0:
+       count = 0;
+    Z1:
+       more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
+       if (more) goto L0; else goto L3;
+    L0:
+       V = istart0;
+       T = V;
+       V3 = N31 + (T % count3) * STEP3;
+       T = T / count3;
+       V2 = N21 + (T % count2) * STEP2;
+       T = T / count2;
+       V1 = N11 + T * STEP1;
+       iend = iend0;
+    L1:
+       BODY;
+       V += 1;
+       if (V < iend) goto L10; else goto L2;
+    L10:
+       V3 += STEP3;
+       if (V3 cond3 N32) goto L1; else goto L11;
+    L11:
+       V3 = N31;
+       V2 += STEP2;
+       if (V2 cond2 N22) goto L1; else goto L12;
+    L12:
+       V2 = N21;
+       V1 += STEP1;
+       goto L1;
+    L2:
+       if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
+    L3:
+
+      */
+
+static void
+expand_omp_for_generic (struct omp_region *region,
+                       struct omp_for_data *fd,
+                       enum built_in_function start_fn,
+                       enum built_in_function next_fn,
+                       gimple inner_stmt)
+{
+  tree type, istart0, iend0, iend;
+  tree t, vmain, vback, bias = NULL_TREE;
+  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
+  basic_block l2_bb = NULL, l3_bb = NULL;
+  gimple_stmt_iterator gsi;
+  gimple stmt;
+  bool in_combined_parallel = is_combined_parallel (region);
+  bool broken_loop = region->cont == NULL;
+  edge e, ne;
+  tree *counts = NULL;
+  int i;
+
+  gcc_assert (!broken_loop || !in_combined_parallel);
+  gcc_assert (fd->iter_type == long_integer_type_node
+             || !in_combined_parallel);
 
   type = TREE_TYPE (fd->loop.v);
   istart0 = create_tmp_var (fd->iter_type, ".istart0");
   iend0 = create_tmp_var (fd->iter_type, ".iend0");
   TREE_ADDRESSABLE (istart0) = 1;
   TREE_ADDRESSABLE (iend0) = 1;
-  if (gimple_referenced_vars (cfun))
-    {
-      add_referenced_var (istart0);
-      add_referenced_var (iend0);
-    }
 
   /* See if we need to bias by LLONG_MIN.  */
   if (fd->iter_type == long_long_unsigned_type_node
@@ -3785,56 +5404,30 @@ expand_omp_for_generic (struct omp_region *region,
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
   if (fd->collapse > 1)
     {
-      /* collapsed loops need work for expansion in SSA form.  */
-      gcc_assert (!gimple_in_ssa_p (cfun));
-      counts = (tree *) alloca (fd->collapse * sizeof (tree));
-      for (i = 0; i < fd->collapse; i++)
+      int first_zero_iter = -1;
+      basic_block zero_iter_bb = NULL, l2_dom_bb = NULL;
+
+      counts = XALLOCAVEC (tree, fd->collapse);
+      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
+                                 zero_iter_bb, first_zero_iter,
+                                 l2_dom_bb);
+
+      if (zero_iter_bb)
        {
-         tree itype = TREE_TYPE (fd->loops[i].v);
-
-         if (POINTER_TYPE_P (itype))
-           itype = signed_type_for (itype);
-         t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
-                                    ? -1 : 1));
-         t = fold_build2 (PLUS_EXPR, itype,
-                          fold_convert (itype, fd->loops[i].step), t);
-         t = fold_build2 (PLUS_EXPR, itype, t,
-                          fold_convert (itype, fd->loops[i].n2));
-         t = fold_build2 (MINUS_EXPR, itype, t,
-                          fold_convert (itype, fd->loops[i].n1));
-         if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
-           t = fold_build2 (TRUNC_DIV_EXPR, itype,
-                            fold_build1 (NEGATE_EXPR, itype, t),
-                            fold_build1 (NEGATE_EXPR, itype,
-                                         fold_convert (itype,
-                                                       fd->loops[i].step)));
-         else
-           t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
-                            fold_convert (itype, fd->loops[i].step));
-         t = fold_convert (type, t);
-         if (TREE_CODE (t) == INTEGER_CST)
-           counts[i] = t;
-         else
-           {
-             counts[i] = make_rename_temp (type, ".count");
-             t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                           true, GSI_SAME_STMT);
-             stmt = gimple_build_assign (counts[i], t);
-             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
-           }
-         if (SSA_VAR_P (fd->loop.n2))
-           {
-             if (i == 0)
-               t = counts[0];
-             else
-               {
-                 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
-                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                               true, GSI_SAME_STMT);
-               }
-             stmt = gimple_build_assign (fd->loop.n2, t);
-             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
-           }
+         /* Some counts[i] vars might be uninitialized if
+            some loop has zero iterations.  But the body shouldn't
+            be executed in that case, so just avoid uninit warnings.  */
+         for (i = first_zero_iter; i < fd->collapse; i++)
+           if (SSA_VAR_P (counts[i]))
+             TREE_NO_WARNING (counts[i]) = 1;
+         gsi_prev (&gsi);
+         e = split_block (entry_bb, gsi_stmt (gsi));
+         entry_bb = e->dest;
+         make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
+         gsi = gsi_last_bb (entry_bb);
+         set_immediate_dominator (CDI_DOMINATORS, entry_bb,
+                                  get_immediate_dominator (CDI_DOMINATORS,
+                                                           zero_iter_bb));
        }
     }
   if (in_combined_parallel)
@@ -3853,18 +5446,32 @@ expand_omp_for_generic (struct omp_region *region,
       t4 = build_fold_addr_expr (iend0);
       t3 = build_fold_addr_expr (istart0);
       t2 = fold_convert (fd->iter_type, fd->loop.step);
-      if (POINTER_TYPE_P (type)
-         && TYPE_PRECISION (type) != TYPE_PRECISION (fd->iter_type))
+      t1 = fd->loop.n2;
+      t0 = fd->loop.n1;
+      if (gimple_omp_for_combined_into_p (fd->for_stmt))
+       {
+         tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                        OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         t0 = OMP_CLAUSE_DECL (innerc);
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         t1 = OMP_CLAUSE_DECL (innerc);
+       }
+      if (POINTER_TYPE_P (TREE_TYPE (t0))
+         && TYPE_PRECISION (TREE_TYPE (t0))
+            != TYPE_PRECISION (fd->iter_type))
        {
          /* Avoid casting pointers to integer of a different size.  */
          tree itype = signed_type_for (type);
-         t1 = fold_convert (fd->iter_type, fold_convert (itype, fd->loop.n2));
-         t0 = fold_convert (fd->iter_type, fold_convert (itype, fd->loop.n1));
+         t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
+         t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
        }
       else
        {
-         t1 = fold_convert (fd->iter_type, fd->loop.n2);
-         t0 = fold_convert (fd->iter_type, fd->loop.n1);
+         t1 = fold_convert (fd->iter_type, t1);
+         t0 = fold_convert (fd->iter_type, t0);
        }
       if (bias)
        {
@@ -3919,59 +5526,53 @@ expand_omp_for_generic (struct omp_region *region,
   gsi_remove (&gsi, true);
 
   /* Iteration setup for sequential loop goes in L0_BB.  */
+  tree startvar = fd->loop.v;
+  tree endvar = NULL_TREE;
+
+  if (gimple_omp_for_combined_p (fd->for_stmt))
+    {
+      gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
+                 && gimple_omp_for_kind (inner_stmt)
+                    == GF_OMP_FOR_KIND_SIMD);
+      tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt),
+                                    OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      startvar = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      endvar = OMP_CLAUSE_DECL (innerc);
+    }
+
   gsi = gsi_start_bb (l0_bb);
   t = istart0;
   if (bias)
     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
-  if (POINTER_TYPE_P (type))
-    t = fold_convert (signed_type_for (type), t);
-  t = fold_convert (type, t);
-  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                               false, GSI_CONTINUE_LINKING);
-  stmt = gimple_build_assign (fd->loop.v, t);
+  if (POINTER_TYPE_P (TREE_TYPE (startvar)))
+    t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
+  t = fold_convert (TREE_TYPE (startvar), t);
+  t = force_gimple_operand_gsi (&gsi, t,
+                               DECL_P (startvar)
+                               && TREE_ADDRESSABLE (startvar),
+                               NULL_TREE, false, GSI_CONTINUE_LINKING);
+  stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
   t = iend0;
   if (bias)
     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
-  if (POINTER_TYPE_P (type))
-    t = fold_convert (signed_type_for (type), t);
-  t = fold_convert (type, t);
+  if (POINTER_TYPE_P (TREE_TYPE (startvar)))
+    t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
+  t = fold_convert (TREE_TYPE (startvar), t);
   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                   false, GSI_CONTINUE_LINKING);
-  if (fd->collapse > 1)
+  if (endvar)
     {
-      tree tem = make_rename_temp (type, ".tem");
-      stmt = gimple_build_assign (tem, fd->loop.v);
+      stmt = gimple_build_assign (endvar, iend);
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-      for (i = fd->collapse - 1; i >= 0; i--)
-       {
-         tree vtype = TREE_TYPE (fd->loops[i].v), itype;
-         itype = vtype;
-         if (POINTER_TYPE_P (vtype))
-           itype = signed_type_for (vtype);
-         t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
-         t = fold_convert (itype, t);
-         t = fold_build2 (MULT_EXPR, itype, t,
-                          fold_convert (itype, fd->loops[i].step));
-         if (POINTER_TYPE_P (vtype))
-           t = fold_build_pointer_plus (fd->loops[i].n1, t);
-         else
-           t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
-         t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                       false, GSI_CONTINUE_LINKING);
-         stmt = gimple_build_assign (fd->loops[i].v, t);
-         gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-         if (i != 0)
-           {
-             t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
-             t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                           false, GSI_CONTINUE_LINKING);
-             stmt = gimple_build_assign (tem, t);
-             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-           }
-       }
     }
+  if (fd->collapse > 1)
+    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
 
   if (!broken_loop)
     {
@@ -3983,80 +5584,34 @@ expand_omp_for_generic (struct omp_region *region,
       vmain = gimple_omp_continue_control_use (stmt);
       vback = gimple_omp_continue_control_def (stmt);
 
-      if (POINTER_TYPE_P (type))
-       t = fold_build_pointer_plus (vmain, fd->loop.step);
-      else
-       t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
-      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                   true, GSI_SAME_STMT);
-      stmt = gimple_build_assign (vback, t);
-      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
-
-      t = build2 (fd->loop.cond_code, boolean_type_node, vback, iend);
-      stmt = gimple_build_cond_empty (t);
-      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      if (!gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         if (POINTER_TYPE_P (type))
+           t = fold_build_pointer_plus (vmain, fd->loop.step);
+         else
+           t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
+         t = force_gimple_operand_gsi (&gsi, t,
+                                       DECL_P (vback)
+                                       && TREE_ADDRESSABLE (vback),
+                                       NULL_TREE, true, GSI_SAME_STMT);
+         stmt = gimple_build_assign (vback, t);
+         gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+         t = build2 (fd->loop.cond_code, boolean_type_node,
+                     DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
+                     iend);
+         stmt = gimple_build_cond_empty (t);
+         gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+       }
 
       /* Remove GIMPLE_OMP_CONTINUE.  */
       gsi_remove (&gsi, true);
 
-      if (fd->collapse > 1)
-       {
-         basic_block last_bb, bb;
-
-         last_bb = cont_bb;
-         for (i = fd->collapse - 1; i >= 0; i--)
-           {
-             tree vtype = TREE_TYPE (fd->loops[i].v);
-
-             bb = create_empty_bb (last_bb);
-             gsi = gsi_start_bb (bb);
+      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
+       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
 
-             if (i < fd->collapse - 1)
-               {
-                 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
-                 e->probability = REG_BR_PROB_BASE / 8;
-
-                 t = fd->loops[i + 1].n1;
-                 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                               false, GSI_CONTINUE_LINKING);
-                 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
-                 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-               }
-             else
-               collapse_bb = bb;
-
-             set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
-
-             if (POINTER_TYPE_P (vtype))
-               t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
-             else
-               t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v,
-                                fd->loops[i].step);
-             t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                                           false, GSI_CONTINUE_LINKING);
-             stmt = gimple_build_assign (fd->loops[i].v, t);
-             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-
-             if (i > 0)
-               {
-                 t = fd->loops[i].n2;
-                 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-                                               false, GSI_CONTINUE_LINKING);
-                 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
-                                  fd->loops[i].v, t);
-                 stmt = gimple_build_cond_empty (t);
-                 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-                 e = make_edge (bb, l1_bb, EDGE_TRUE_VALUE);
-                 e->probability = REG_BR_PROB_BASE * 7 / 8;
-               }
-             else
-               make_edge (bb, l1_bb, EDGE_FALLTHRU);
-             last_bb = bb;
-           }
-       }
-
-      /* Emit code to get the next parallel iteration in L2_BB.  */
-      gsi = gsi_start_bb (l2_bb);
+      /* Emit code to get the next parallel iteration in L2_BB.  */
+      gsi = gsi_start_bb (l2_bb);
 
       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
                           build_fold_addr_expr (istart0),
@@ -4074,9 +5629,13 @@ expand_omp_for_generic (struct omp_region *region,
   gsi = gsi_last_bb (exit_bb);
   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
+  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
   else
     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
   stmt = gimple_build_call (t, 0);
+  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
   gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
   gsi_remove (&gsi, true);
 
@@ -4101,19 +5660,31 @@ expand_omp_for_generic (struct omp_region *region,
       remove_edge (e);
 
       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
-      if (fd->collapse > 1)
+      if (current_loops)
+       add_bb_to_loop (l2_bb, cont_bb->loop_father);
+      e = find_edge (cont_bb, l1_bb);
+      if (gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         remove_edge (e);
+         e = NULL;
+       }
+      else if (fd->collapse > 1)
        {
-         e = find_edge (cont_bb, l1_bb);
          remove_edge (e);
          e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
        }
       else
+       e->flags = EDGE_TRUE_VALUE;
+      if (e)
        {
-         e = find_edge (cont_bb, l1_bb);
-         e->flags = EDGE_TRUE_VALUE;
+         e->probability = REG_BR_PROB_BASE * 7 / 8;
+         find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
+       }
+      else
+       {
+         e = find_edge (cont_bb, l2_bb);
+         e->flags = EDGE_FALLTHRU;
        }
-      e->probability = REG_BR_PROB_BASE * 7 / 8;
-      find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
 
       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
@@ -4124,6 +5695,19 @@ expand_omp_for_generic (struct omp_region *region,
                               recompute_dominator (CDI_DOMINATORS, l0_bb));
       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
                               recompute_dominator (CDI_DOMINATORS, l1_bb));
+
+      struct loop *outer_loop = alloc_loop ();
+      outer_loop->header = l0_bb;
+      outer_loop->latch = l2_bb;
+      add_loop (outer_loop, l0_bb->loop_father);
+
+      if (!gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         struct loop *loop = alloc_loop ();
+         loop->header = l1_bb;
+         /* The loop may have multiple latches.  */
+         add_loop (loop, outer_loop);
+       }
     }
 }
 
@@ -4136,6 +5720,7 @@ expand_omp_for_generic (struct omp_region *region,
 
    where COND is "<" or ">", we generate pseudocode
 
+       if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
        if (cond is <)
          adj = STEP - 1;
        else
@@ -4166,16 +5751,22 @@ expand_omp_for_generic (struct omp_region *region,
 
 static void
 expand_omp_for_static_nochunk (struct omp_region *region,
-                              struct omp_for_data *fd)
+                              struct omp_for_data *fd,
+                              gimple inner_stmt)
 {
   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
   tree type, itype, vmain, vback;
   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
-  basic_block body_bb, cont_bb;
+  basic_block body_bb, cont_bb, collapse_bb = NULL;
   basic_block fin_bb;
   gimple_stmt_iterator gsi;
   gimple stmt;
   edge ep;
+  enum built_in_function get_num_threads = BUILT_IN_OMP_GET_NUM_THREADS;
+  enum built_in_function get_thread_num = BUILT_IN_OMP_GET_THREAD_NUM;
+  bool broken_loop = region->cont == NULL;
+  tree *counts = NULL;
+  tree n1, n2, step;
 
   itype = type = TREE_TYPE (fd->loop.v);
   if (POINTER_TYPE_P (type))
@@ -4184,57 +5775,136 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   entry_bb = region->entry;
   cont_bb = region->cont;
   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
-  gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
+  fin_bb = BRANCH_EDGE (entry_bb)->dest;
+  gcc_assert (broken_loop
+             || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
   body_bb = single_succ (seq_start_bb);
-  gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
-  gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
-  fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
+  if (!broken_loop)
+    {
+      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
+      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
+    }
   exit_bb = region->exit;
 
   /* Iteration space partitioning goes in ENTRY_BB.  */
   gsi = gsi_last_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
 
-  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
+  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
+    {
+      get_num_threads = BUILT_IN_OMP_GET_NUM_TEAMS;
+      get_thread_num = BUILT_IN_OMP_GET_TEAM_NUM;
+    }
+
+  if (fd->collapse > 1)
+    {
+      int first_zero_iter = -1;
+      basic_block l2_dom_bb = NULL;
+
+      counts = XALLOCAVEC (tree, fd->collapse);
+      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
+                                 fin_bb, first_zero_iter,
+                                 l2_dom_bb);
+      t = NULL_TREE;
+    }
+  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
+    t = integer_one_node;
+  else
+    t = fold_binary (fd->loop.cond_code, boolean_type_node,
+                    fold_convert (type, fd->loop.n1),
+                    fold_convert (type, fd->loop.n2));
+  if (fd->collapse == 1
+      && TYPE_UNSIGNED (type)
+      && (t == NULL_TREE || !integer_onep (t)))
+    {
+      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
+      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
+                                    true, GSI_SAME_STMT);
+      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
+      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
+                                    true, GSI_SAME_STMT);
+      stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
+                               NULL_TREE, NULL_TREE);
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      if (walk_tree (gimple_cond_lhs_ptr (stmt),
+                    expand_omp_regimplify_p, NULL, NULL)
+         || walk_tree (gimple_cond_rhs_ptr (stmt),
+                       expand_omp_regimplify_p, NULL, NULL))
+       {
+         gsi = gsi_for_stmt (stmt);
+         gimple_regimplify_operands (stmt, &gsi);
+       }
+      ep = split_block (entry_bb, stmt);
+      ep->flags = EDGE_TRUE_VALUE;
+      entry_bb = ep->dest;
+      ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
+      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
+      ep->probability = REG_BR_PROB_BASE / 2000 - 1;
+      if (gimple_in_ssa_p (cfun))
+       {
+         int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+         for (gsi = gsi_start_phis (fin_bb);
+              !gsi_end_p (gsi); gsi_next (&gsi))
+           {
+             gimple phi = gsi_stmt (gsi);
+             add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
+                          ep, UNKNOWN_LOCATION);
+           }
+       }
+      gsi = gsi_last_bb (entry_bb);
+    }
+
+  t = build_call_expr (builtin_decl_explicit (get_num_threads), 0);
   t = fold_convert (itype, t);
   nthreads = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                       true, GSI_SAME_STMT);
 
-  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM), 0);
+  t = build_call_expr (builtin_decl_explicit (get_thread_num), 0);
   t = fold_convert (itype, t);
   threadid = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                       true, GSI_SAME_STMT);
 
-  fd->loop.n1
-    = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loop.n1),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
-  fd->loop.n2
-    = force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->loop.n2),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
-  fd->loop.step
-    = force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->loop.step),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
+  n1 = fd->loop.n1;
+  n2 = fd->loop.n2;
+  step = fd->loop.step;
+  if (gimple_omp_for_combined_into_p (fd->for_stmt))
+    {
+      tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                    OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n1 = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n2 = OMP_CLAUSE_DECL (innerc);
+    }
+  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
+                                true, NULL_TREE, true, GSI_SAME_STMT);
+  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
+                                true, NULL_TREE, true, GSI_SAME_STMT);
+  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
+                                  true, NULL_TREE, true, GSI_SAME_STMT);
 
   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
-  t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
-  t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
-  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+  t = fold_build2 (PLUS_EXPR, itype, step, t);
+  t = fold_build2 (PLUS_EXPR, itype, t, n2);
+  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
     t = fold_build2 (TRUNC_DIV_EXPR, itype,
                     fold_build1 (NEGATE_EXPR, itype, t),
-                    fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
+                    fold_build1 (NEGATE_EXPR, itype, step));
   else
-    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
   t = fold_convert (itype, t);
   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
 
-  q = make_rename_temp (itype, "q");
+  q = create_tmp_reg (itype, "q");
   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
 
-  tt = make_rename_temp (itype, "tt");
+  tt = create_tmp_reg (itype, "tt");
   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
@@ -4273,54 +5943,96 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   gsi = gsi_start_bb (seq_start_bb);
 
+  tree startvar = fd->loop.v;
+  tree endvar = NULL_TREE;
+
+  if (gimple_omp_for_combined_p (fd->for_stmt))
+    {
+      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
+                    ? gimple_omp_parallel_clauses (inner_stmt)
+                    : gimple_omp_for_clauses (inner_stmt);
+      tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      startvar = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      endvar = OMP_CLAUSE_DECL (innerc);
+    }
   t = fold_convert (itype, s0);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (fd->loop.n1, t);
+    t = fold_build_pointer_plus (n1, t);
   else
-    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
-  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                               false, GSI_CONTINUE_LINKING);
-  stmt = gimple_build_assign (fd->loop.v, t);
+    t = fold_build2 (PLUS_EXPR, type, t, n1);
+  t = fold_convert (TREE_TYPE (startvar), t);
+  t = force_gimple_operand_gsi (&gsi, t,
+                               DECL_P (startvar)
+                               && TREE_ADDRESSABLE (startvar),
+                               NULL_TREE, false, GSI_CONTINUE_LINKING);
+  stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
   t = fold_convert (itype, e0);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (fd->loop.n1, t);
+    t = fold_build_pointer_plus (n1, t);
   else
-    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+    t = fold_build2 (PLUS_EXPR, type, t, n1);
+  t = fold_convert (TREE_TYPE (startvar), t);
   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                false, GSI_CONTINUE_LINKING);
+  if (endvar)
+    {
+      stmt = gimple_build_assign (endvar, e);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+    }
+  if (fd->collapse > 1)
+    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
 
-  /* The code controlling the sequential loop replaces the
-     GIMPLE_OMP_CONTINUE.  */
-  gsi = gsi_last_bb (cont_bb);
-  stmt = gsi_stmt (gsi);
-  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
-  vmain = gimple_omp_continue_control_use (stmt);
-  vback = gimple_omp_continue_control_def (stmt);
+  if (!broken_loop)
+    {
+      /* The code controlling the sequential loop replaces the
+        GIMPLE_OMP_CONTINUE.  */
+      gsi = gsi_last_bb (cont_bb);
+      stmt = gsi_stmt (gsi);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+      vmain = gimple_omp_continue_control_use (stmt);
+      vback = gimple_omp_continue_control_def (stmt);
 
-  if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (vmain, fd->loop.step);
-  else
-    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
-  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
-                               true, GSI_SAME_STMT);
-  stmt = gimple_build_assign (vback, t);
-  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      if (!gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         if (POINTER_TYPE_P (type))
+           t = fold_build_pointer_plus (vmain, step);
+         else
+           t = fold_build2 (PLUS_EXPR, type, vmain, step);
+         t = force_gimple_operand_gsi (&gsi, t,
+                                       DECL_P (vback)
+                                       && TREE_ADDRESSABLE (vback),
+                                       NULL_TREE, true, GSI_SAME_STMT);
+         stmt = gimple_build_assign (vback, t);
+         gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+         t = build2 (fd->loop.cond_code, boolean_type_node,
+                     DECL_P (vback) && TREE_ADDRESSABLE (vback)
+                     ? t : vback, e);
+         gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
+       }
 
-  t = build2 (fd->loop.cond_code, boolean_type_node, vback, e);
-  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
+      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
+      gsi_remove (&gsi, true);
 
-  /* Remove the GIMPLE_OMP_CONTINUE statement.  */
-  gsi_remove (&gsi, true);
+      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
+       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+    }
 
   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
   gsi = gsi_last_bb (exit_bb);
   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
-    force_gimple_operand_gsi (&gsi, build_omp_barrier (), false, NULL_TREE,
-                             false, GSI_SAME_STMT);
+    {
+      t = gimple_omp_return_lhs (gsi_stmt (gsi));
+      gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
+    }
   gsi_remove (&gsi, true);
 
   /* Connect all the blocks.  */
@@ -4332,16 +6044,42 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
 
-  find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
-  find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
+  if (!broken_loop)
+    {
+      ep = find_edge (cont_bb, body_bb);
+      if (gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         remove_edge (ep);
+         ep = NULL;
+       }
+      else if (fd->collapse > 1)
+       {
+         remove_edge (ep);
+         ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
+       }
+      else
+       ep->flags = EDGE_TRUE_VALUE;
+      find_edge (cont_bb, fin_bb)->flags
+       = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
+    }
 
   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
+
   set_immediate_dominator (CDI_DOMINATORS, body_bb,
                           recompute_dominator (CDI_DOMINATORS, body_bb));
   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
                           recompute_dominator (CDI_DOMINATORS, fin_bb));
+
+  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
+    {
+      struct loop *loop = alloc_loop ();
+      loop->header = body_bb;
+      if (collapse_bb == NULL)
+       loop->latch = cont_bb;
+      add_loop (loop, body_bb->loop_father);
+    }
 }
 
 
@@ -4353,6 +6091,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
 
    where COND is "<" or ">", we generate pseudocode
 
+       if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
        if (cond is <)
          adj = STEP - 1;
        else
@@ -4383,16 +6122,22 @@ expand_omp_for_static_nochunk (struct omp_region *region,
 */
 
 static void
-expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
+expand_omp_for_static_chunk (struct omp_region *region,
+                            struct omp_for_data *fd, gimple inner_stmt)
 {
   tree n, s0, e0, e, t;
   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
   tree type, itype, v_main, v_back, v_extra;
   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
-  basic_block trip_update_bb, cont_bb, fin_bb;
+  basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
   gimple_stmt_iterator si;
   gimple stmt;
   edge se;
+  enum built_in_function get_num_threads = BUILT_IN_OMP_GET_NUM_THREADS;
+  enum built_in_function get_thread_num = BUILT_IN_OMP_GET_THREAD_NUM;
+  bool broken_loop = region->cont == NULL;
+  tree *counts = NULL;
+  tree n1, n2, step;
 
   itype = type = TREE_TYPE (fd->loop.v);
   if (POINTER_TYPE_P (type))
@@ -4404,53 +6149,131 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
   iter_part_bb = se->dest;
   cont_bb = region->cont;
   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
-  gcc_assert (BRANCH_EDGE (iter_part_bb)->dest
-             == FALLTHRU_EDGE (cont_bb)->dest);
+  fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
+  gcc_assert (broken_loop
+             || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
   body_bb = single_succ (seq_start_bb);
-  gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
-  gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
-  fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
-  trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
+  if (!broken_loop)
+    {
+      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
+      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
+      trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
+    }
   exit_bb = region->exit;
 
   /* Trip and adjustment setup goes in ENTRY_BB.  */
   si = gsi_last_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_FOR);
 
-  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS), 0);
+  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
+    {
+      get_num_threads = BUILT_IN_OMP_GET_NUM_TEAMS;
+      get_thread_num = BUILT_IN_OMP_GET_TEAM_NUM;
+    }
+
+  if (fd->collapse > 1)
+    {
+      int first_zero_iter = -1;
+      basic_block l2_dom_bb = NULL;
+
+      counts = XALLOCAVEC (tree, fd->collapse);
+      expand_omp_for_init_counts (fd, &si, entry_bb, counts,
+                                 fin_bb, first_zero_iter,
+                                 l2_dom_bb);
+      t = NULL_TREE;
+    }
+  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
+    t = integer_one_node;
+  else
+    t = fold_binary (fd->loop.cond_code, boolean_type_node,
+                    fold_convert (type, fd->loop.n1),
+                    fold_convert (type, fd->loop.n2));
+  if (fd->collapse == 1
+      && TYPE_UNSIGNED (type)
+      && (t == NULL_TREE || !integer_onep (t)))
+    {
+      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
+      n1 = force_gimple_operand_gsi (&si, n1, true, NULL_TREE,
+                                    true, GSI_SAME_STMT);
+      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
+      n2 = force_gimple_operand_gsi (&si, n2, true, NULL_TREE,
+                                    true, GSI_SAME_STMT);
+      stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
+                               NULL_TREE, NULL_TREE);
+      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
+      if (walk_tree (gimple_cond_lhs_ptr (stmt),
+                    expand_omp_regimplify_p, NULL, NULL)
+         || walk_tree (gimple_cond_rhs_ptr (stmt),
+                       expand_omp_regimplify_p, NULL, NULL))
+       {
+         si = gsi_for_stmt (stmt);
+         gimple_regimplify_operands (stmt, &si);
+       }
+      se = split_block (entry_bb, stmt);
+      se->flags = EDGE_TRUE_VALUE;
+      entry_bb = se->dest;
+      se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
+      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
+      se->probability = REG_BR_PROB_BASE / 2000 - 1;
+      if (gimple_in_ssa_p (cfun))
+       {
+         int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+         for (si = gsi_start_phis (fin_bb);
+              !gsi_end_p (si); gsi_next (&si))
+           {
+             gimple phi = gsi_stmt (si);
+             add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
+                          se, UNKNOWN_LOCATION);
+           }
+       }
+      si = gsi_last_bb (entry_bb);
+    }
+
+  t = build_call_expr (builtin_decl_explicit (get_num_threads), 0);
   t = fold_convert (itype, t);
   nthreads = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
                                       true, GSI_SAME_STMT);
 
-  t = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM), 0);
+  t = build_call_expr (builtin_decl_explicit (get_thread_num), 0);
   t = fold_convert (itype, t);
   threadid = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
                                       true, GSI_SAME_STMT);
 
-  fd->loop.n1
-    = force_gimple_operand_gsi (&si, fold_convert (type, fd->loop.n1),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
-  fd->loop.n2
-    = force_gimple_operand_gsi (&si, fold_convert (itype, fd->loop.n2),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
-  fd->loop.step
-    = force_gimple_operand_gsi (&si, fold_convert (itype, fd->loop.step),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
+  n1 = fd->loop.n1;
+  n2 = fd->loop.n2;
+  step = fd->loop.step;
+  if (gimple_omp_for_combined_into_p (fd->for_stmt))
+    {
+      tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                    OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n1 = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n2 = OMP_CLAUSE_DECL (innerc);
+    }
+  n1 = force_gimple_operand_gsi (&si, fold_convert (type, n1),
+                                true, NULL_TREE, true, GSI_SAME_STMT);
+  n2 = force_gimple_operand_gsi (&si, fold_convert (itype, n2),
+                                true, NULL_TREE, true, GSI_SAME_STMT);
+  step = force_gimple_operand_gsi (&si, fold_convert (itype, step),
+                                  true, NULL_TREE, true, GSI_SAME_STMT);
   fd->chunk_size
     = force_gimple_operand_gsi (&si, fold_convert (itype, fd->chunk_size),
                                true, NULL_TREE, true, GSI_SAME_STMT);
 
   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
-  t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
-  t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
-  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+  t = fold_build2 (PLUS_EXPR, itype, step, t);
+  t = fold_build2 (PLUS_EXPR, itype, t, n2);
+  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
     t = fold_build2 (TRUNC_DIV_EXPR, itype,
                     fold_build1 (NEGATE_EXPR, itype, t),
-                    fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
+                    fold_build1 (NEGATE_EXPR, itype, step));
   else
-    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
   t = fold_convert (itype, t);
   n = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
                                true, GSI_SAME_STMT);
@@ -4458,7 +6281,6 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
   trip_var = create_tmp_reg (itype, ".trip");
   if (gimple_in_ssa_p (cfun))
     {
-      add_referenced_var (trip_var);
       trip_init = make_ssa_name (trip_var, NULL);
       trip_main = make_ssa_name (trip_var, NULL);
       trip_back = make_ssa_name (trip_var, NULL);
@@ -4474,11 +6296,11 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
 
   t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (fd->loop.n1, t);
+    t = fold_build_pointer_plus (n1, t);
   else
-    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+    t = fold_build2 (PLUS_EXPR, type, t, n1);
   v_extra = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
                                      true, GSI_SAME_STMT);
 
@@ -4505,80 +6327,142 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   si = gsi_start_bb (seq_start_bb);
 
+  tree startvar = fd->loop.v;
+  tree endvar = NULL_TREE;
+
+  if (gimple_omp_for_combined_p (fd->for_stmt))
+    {
+      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
+                    ? gimple_omp_parallel_clauses (inner_stmt)
+                    : gimple_omp_for_clauses (inner_stmt);
+      tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      startvar = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      endvar = OMP_CLAUSE_DECL (innerc);
+    }
+
   t = fold_convert (itype, s0);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (fd->loop.n1, t);
+    t = fold_build_pointer_plus (n1, t);
   else
-    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
-  t = force_gimple_operand_gsi (&si, t, false, NULL_TREE,
-                               false, GSI_CONTINUE_LINKING);
-  stmt = gimple_build_assign (fd->loop.v, t);
+    t = fold_build2 (PLUS_EXPR, type, t, n1);
+  t = fold_convert (TREE_TYPE (startvar), t);
+  t = force_gimple_operand_gsi (&si, t,
+                               DECL_P (startvar)
+                               && TREE_ADDRESSABLE (startvar),
+                               NULL_TREE, false, GSI_CONTINUE_LINKING);
+  stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
 
   t = fold_convert (itype, e0);
-  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  t = fold_build2 (MULT_EXPR, itype, t, step);
   if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (fd->loop.n1, t);
+    t = fold_build_pointer_plus (n1, t);
   else
-    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+    t = fold_build2 (PLUS_EXPR, type, t, n1);
+  t = fold_convert (TREE_TYPE (startvar), t);
   e = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
                                false, GSI_CONTINUE_LINKING);
+  if (endvar)
+    {
+      stmt = gimple_build_assign (endvar, e);
+      gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
+    }
+  if (fd->collapse > 1)
+    expand_omp_for_init_vars (fd, &si, counts, inner_stmt, startvar);
 
-  /* The code controlling the sequential loop goes in CONT_BB,
-     replacing the GIMPLE_OMP_CONTINUE.  */
-  si = gsi_last_bb (cont_bb);
-  stmt = gsi_stmt (si);
-  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
-  v_main = gimple_omp_continue_control_use (stmt);
-  v_back = gimple_omp_continue_control_def (stmt);
+  if (!broken_loop)
+    {
+      /* The code controlling the sequential loop goes in CONT_BB,
+        replacing the GIMPLE_OMP_CONTINUE.  */
+      si = gsi_last_bb (cont_bb);
+      stmt = gsi_stmt (si);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+      v_main = gimple_omp_continue_control_use (stmt);
+      v_back = gimple_omp_continue_control_def (stmt);
 
-  if (POINTER_TYPE_P (type))
-    t = fold_build_pointer_plus (v_main, fd->loop.step);
-  else
-    t = fold_build2 (PLUS_EXPR, type, v_main, fd->loop.step);
-  stmt = gimple_build_assign (v_back, t);
-  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
+      if (!gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         if (POINTER_TYPE_P (type))
+           t = fold_build_pointer_plus (v_main, step);
+         else
+           t = fold_build2 (PLUS_EXPR, type, v_main, step);
+         if (DECL_P (v_back) && TREE_ADDRESSABLE (v_back))
+           t = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
+                                         true, GSI_SAME_STMT);
+         stmt = gimple_build_assign (v_back, t);
+         gsi_insert_before (&si, stmt, GSI_SAME_STMT);
 
-  t = build2 (fd->loop.cond_code, boolean_type_node, v_back, e);
-  gsi_insert_before (&si, gimple_build_cond_empty (t), GSI_SAME_STMT);
+         t = build2 (fd->loop.cond_code, boolean_type_node,
+                     DECL_P (v_back) && TREE_ADDRESSABLE (v_back)
+                     ? t : v_back, e);
+         gsi_insert_before (&si, gimple_build_cond_empty (t), GSI_SAME_STMT);
+       }
 
-  /* Remove GIMPLE_OMP_CONTINUE.  */
-  gsi_remove (&si, true);
+      /* Remove GIMPLE_OMP_CONTINUE.  */
+      gsi_remove (&si, true);
 
-  /* Trip update code goes into TRIP_UPDATE_BB.  */
-  si = gsi_start_bb (trip_update_bb);
+      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
+       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
 
-  t = build_int_cst (itype, 1);
-  t = build2 (PLUS_EXPR, itype, trip_main, t);
-  stmt = gimple_build_assign (trip_back, t);
-  gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
+      /* Trip update code goes into TRIP_UPDATE_BB.  */
+      si = gsi_start_bb (trip_update_bb);
+
+      t = build_int_cst (itype, 1);
+      t = build2 (PLUS_EXPR, itype, trip_main, t);
+      stmt = gimple_build_assign (trip_back, t);
+      gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
+    }
 
   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
   si = gsi_last_bb (exit_bb);
   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
-    force_gimple_operand_gsi (&si, build_omp_barrier (), false, NULL_TREE,
-                             false, GSI_SAME_STMT);
+    {
+      t = gimple_omp_return_lhs (gsi_stmt (si));
+      gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT);
+    }
   gsi_remove (&si, true);
 
   /* Connect the new blocks.  */
   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
 
-  find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
-  find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
+  if (!broken_loop)
+    {
+      se = find_edge (cont_bb, body_bb);
+      if (gimple_omp_for_combined_p (fd->for_stmt))
+       {
+         remove_edge (se);
+         se = NULL;
+       }
+      else if (fd->collapse > 1)
+       {
+         remove_edge (se);
+         se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
+       }
+      else
+       se->flags = EDGE_TRUE_VALUE;
+      find_edge (cont_bb, trip_update_bb)->flags
+       = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
 
-  redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
+      redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
+    }
 
   if (gimple_in_ssa_p (cfun))
     {
       gimple_stmt_iterator psi;
       gimple phi;
       edge re, ene;
-      edge_var_map_vector head;
+      edge_var_map_vector *head;
       edge_var_map *vm;
       size_t i;
 
+      gcc_assert (fd->collapse == 1 && !broken_loop);
+
       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
         remove arguments of the phi nodes in fin_bb.  We need to create
         appropriate phi nodes in iter_part_bb instead.  */
@@ -4588,7 +6472,7 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
       ene = single_succ_edge (entry_bb);
 
       psi = gsi_start_phis (fin_bb);
-      for (i = 0; !gsi_end_p (psi) && VEC_iterate (edge_var_map, head, i, vm);
+      for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
           gsi_next (&psi), ++i)
        {
          gimple nphi;
@@ -4598,7 +6482,6 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
          t = gimple_phi_result (phi);
          gcc_assert (t == redirect_edge_var_map_result (vm));
          nphi = create_phi_node (t, iter_part_bb);
-         SSA_NAME_DEF_STMT (t) = nphi;
 
          t = PHI_ARG_DEF_FROM_EDGE (phi, se);
          locus = gimple_phi_arg_location_from_edge (phi, se);
@@ -4611,7 +6494,7 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
          locus = redirect_edge_var_map_location (vm);
          add_phi_arg (nphi, redirect_edge_var_map_def (vm), re, locus);
        }
-      gcc_assert (!gsi_end_p (psi) && i == VEC_length (edge_var_map, head));
+      gcc_assert (!gsi_end_p (psi) && i == head->length ());
       redirect_edge_var_map_clear (re);
       while (1)
        {
@@ -4623,14 +6506,14 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
 
       /* Make phi node for trip.  */
       phi = create_phi_node (trip_main, iter_part_bb);
-      SSA_NAME_DEF_STMT (trip_main) = phi;
       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
                   UNKNOWN_LOCATION);
       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
                   UNKNOWN_LOCATION);
     }
 
-  set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
+  if (!broken_loop)
+    set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
                           recompute_dominator (CDI_DOMINATORS, iter_part_bb));
   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
@@ -4639,109 +6522,440 @@ expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
                           recompute_dominator (CDI_DOMINATORS, seq_start_bb));
   set_immediate_dominator (CDI_DOMINATORS, body_bb,
                           recompute_dominator (CDI_DOMINATORS, body_bb));
-}
-
-
-/* Expand the OpenMP loop defined by REGION.  */
-
-static void
-expand_omp_for (struct omp_region *region)
-{
-  struct omp_for_data fd;
-  struct omp_for_data_loop *loops;
-
-  loops
-    = (struct omp_for_data_loop *)
-      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
-             * sizeof (struct omp_for_data_loop));
-  extract_omp_for_data (last_stmt (region->entry), &fd, loops);
-  region->sched_kind = fd.sched_kind;
-
-  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
-  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
-  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
-  if (region->cont)
-    {
-      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
-      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
-      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
-    }
 
-  if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
-      && !fd.have_ordered
-      && fd.collapse == 1
-      && region->cont != NULL)
-    {
-      if (fd.chunk_size == NULL)
-       expand_omp_for_static_nochunk (region, &fd);
-      else
-       expand_omp_for_static_chunk (region, &fd);
-    }
-  else
+  if (!broken_loop)
     {
-      int fn_index, start_ix, next_ix;
+      struct loop *trip_loop = alloc_loop ();
+      trip_loop->header = iter_part_bb;
+      trip_loop->latch = trip_update_bb;
+      add_loop (trip_loop, iter_part_bb->loop_father);
 
-      if (fd.chunk_size == NULL
-         && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
-       fd.chunk_size = integer_zero_node;
-      gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
-      fn_index = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
-                 ? 3 : fd.sched_kind;
-      fn_index += fd.have_ordered * 4;
-      start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
-      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
-      if (fd.iter_type == long_long_unsigned_type_node)
+      if (!gimple_omp_for_combined_p (fd->for_stmt))
        {
-         start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
-                       - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
-         next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
-                     - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
+         struct loop *loop = alloc_loop ();
+         loop->header = body_bb;
+         if (collapse_bb == NULL)
+           loop->latch = cont_bb;
+         add_loop (loop, trip_loop);
        }
-      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
-                             (enum built_in_function) next_ix);
     }
-
-  if (gimple_in_ssa_p (cfun))
-    update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 
-/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
+/* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
+   loop.  Given parameters:
 
-       v = GOMP_sections_start (n);
+       for (V = N1; V cond N2; V += STEP) BODY;
+
+   where COND is "<" or ">", we generate pseudocode
+
+       V = N1;
+       goto L1;
     L0:
-       switch (v)
-         {
-         case 0:
-           goto L2;
-         case 1:
-           section 1;
-           goto L1;
-         case 2:
-           ...
-         case n:
-           ...
-         default:
-           abort ();
-         }
+       BODY;
+       V += STEP;
     L1:
-       v = GOMP_sections_next ();
-       goto L0;
+       if (V cond N2) goto L0; else goto L2;
     L2:
-       reduction;
 
-    If this is a combined parallel sections, replace the call to
-    GOMP_sections_start with call to GOMP_sections_next.  */
+    For collapsed loops, given parameters:
+      collapse(3)
+      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
+       for (V2 = N21; V2 cond2 N22; V2 += STEP2)
+         for (V3 = N31; V3 cond3 N32; V3 += STEP3)
+           BODY;
 
-static void
-expand_omp_sections (struct omp_region *region)
-{
-  tree t, u, vin = NULL, vmain, vnext, l2;
-  VEC (tree,heap) *label_vec;
-  unsigned len;
-  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
-  gimple_stmt_iterator si, switch_si;
-  gimple sections_stmt, stmt, cont;
+    we generate pseudocode
+
+       if (cond3 is <)
+         adj = STEP3 - 1;
+       else
+         adj = STEP3 + 1;
+       count3 = (adj + N32 - N31) / STEP3;
+       if (cond2 is <)
+         adj = STEP2 - 1;
+       else
+         adj = STEP2 + 1;
+       count2 = (adj + N22 - N21) / STEP2;
+       if (cond1 is <)
+         adj = STEP1 - 1;
+       else
+         adj = STEP1 + 1;
+       count1 = (adj + N12 - N11) / STEP1;
+       count = count1 * count2 * count3;
+       V = 0;
+       V1 = N11;
+       V2 = N21;
+       V3 = N31;
+       goto L1;
+    L0:
+       BODY;
+       V += 1;
+       V3 += STEP3;
+       V2 += (V3 cond3 N32) ? 0 : STEP2;
+       V3 = (V3 cond3 N32) ? V3 : N31;
+       V1 += (V2 cond2 N22) ? 0 : STEP1;
+       V2 = (V2 cond2 N22) ? V2 : N21;
+    L1:
+       if (V < count) goto L0; else goto L2;
+    L2:
+
+      */
+
+static void
+expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
+{
+  tree type, t;
+  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
+  gimple_stmt_iterator gsi;
+  gimple stmt;
+  bool broken_loop = region->cont == NULL;
+  edge e, ne;
+  tree *counts = NULL;
+  int i;
+  tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                 OMP_CLAUSE_SAFELEN);
+  tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                 OMP_CLAUSE__SIMDUID_);
+  tree n1, n2;
+
+  type = TREE_TYPE (fd->loop.v);
+  entry_bb = region->entry;
+  cont_bb = region->cont;
+  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
+  gcc_assert (broken_loop
+             || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
+  l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
+  if (!broken_loop)
+    {
+      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
+      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
+      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
+      l2_bb = BRANCH_EDGE (entry_bb)->dest;
+    }
+  else
+    {
+      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
+      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
+      l2_bb = single_succ (l1_bb);
+    }
+  exit_bb = region->exit;
+  l2_dom_bb = NULL;
+
+  gsi = gsi_last_bb (entry_bb);
+
+  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+  /* Not needed in SSA form right now.  */
+  gcc_assert (!gimple_in_ssa_p (cfun));
+  if (fd->collapse > 1)
+    {
+      int first_zero_iter = -1;
+      basic_block zero_iter_bb = l2_bb;
+
+      counts = XALLOCAVEC (tree, fd->collapse);
+      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
+                                 zero_iter_bb, first_zero_iter,
+                                 l2_dom_bb);
+    }
+  if (l2_dom_bb == NULL)
+    l2_dom_bb = l1_bb;
+
+  n1 = fd->loop.n1;
+  n2 = fd->loop.n2;
+  if (gimple_omp_for_combined_into_p (fd->for_stmt))
+    {
+      tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                    OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n1 = OMP_CLAUSE_DECL (innerc);
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+      n2 = OMP_CLAUSE_DECL (innerc);
+      expand_omp_build_assign (&gsi, fd->loop.v,
+                              fold_convert (type, n1));
+      if (fd->collapse > 1)
+       {
+         gsi_prev (&gsi);
+         expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
+         gsi_next (&gsi);
+       }
+    }
+  else
+    {
+      expand_omp_build_assign (&gsi, fd->loop.v,
+                              fold_convert (type, fd->loop.n1));
+      if (fd->collapse > 1)
+       for (i = 0; i < fd->collapse; i++)
+         {
+           tree itype = TREE_TYPE (fd->loops[i].v);
+           if (POINTER_TYPE_P (itype))
+             itype = signed_type_for (itype);
+           t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
+           expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+         }
+      }
+
+  /* Remove the GIMPLE_OMP_FOR statement.  */
+  gsi_remove (&gsi, true);
+
+  if (!broken_loop)
+    {
+      /* Code to control the increment goes in the CONT_BB.  */
+      gsi = gsi_last_bb (cont_bb);
+      stmt = gsi_stmt (gsi);
+      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
+
+      if (POINTER_TYPE_P (type))
+       t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
+      else
+       t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
+      expand_omp_build_assign (&gsi, fd->loop.v, t);
+
+      if (fd->collapse > 1)
+       {
+         i = fd->collapse - 1;
+         if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
+           {
+             t = fold_convert (sizetype, fd->loops[i].step);
+             t = fold_build_pointer_plus (fd->loops[i].v, t);
+           }
+         else
+           {
+             t = fold_convert (TREE_TYPE (fd->loops[i].v),
+                               fd->loops[i].step);
+             t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
+                              fd->loops[i].v, t);
+           }
+         expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+
+         for (i = fd->collapse - 1; i > 0; i--)
+           {
+             tree itype = TREE_TYPE (fd->loops[i].v);
+             tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
+             if (POINTER_TYPE_P (itype2))
+               itype2 = signed_type_for (itype2);
+             t = build3 (COND_EXPR, itype2,
+                         build2 (fd->loops[i].cond_code, boolean_type_node,
+                                 fd->loops[i].v,
+                                 fold_convert (itype, fd->loops[i].n2)),
+                         build_int_cst (itype2, 0),
+                         fold_convert (itype2, fd->loops[i - 1].step));
+             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
+               t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
+             else
+               t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
+             expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
+
+             t = build3 (COND_EXPR, itype,
+                         build2 (fd->loops[i].cond_code, boolean_type_node,
+                                 fd->loops[i].v,
+                                 fold_convert (itype, fd->loops[i].n2)),
+                         fd->loops[i].v,
+                         fold_convert (itype, fd->loops[i].n1));
+             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+           }
+       }
+
+      /* Remove GIMPLE_OMP_CONTINUE.  */
+      gsi_remove (&gsi, true);
+    }
+
+  /* Emit the condition in L1_BB.  */
+  gsi = gsi_start_bb (l1_bb);
+
+  t = fold_convert (type, n2);
+  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+                               false, GSI_CONTINUE_LINKING);
+  t = build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t);
+  stmt = gimple_build_cond_empty (t);
+  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+  if (walk_tree (gimple_cond_lhs_ptr (stmt), expand_omp_regimplify_p,
+                NULL, NULL)
+      || walk_tree (gimple_cond_rhs_ptr (stmt), expand_omp_regimplify_p,
+                   NULL, NULL))
+    {
+      gsi = gsi_for_stmt (stmt);
+      gimple_regimplify_operands (stmt, &gsi);
+    }
+
+  /* Remove GIMPLE_OMP_RETURN.  */
+  gsi = gsi_last_bb (exit_bb);
+  gsi_remove (&gsi, true);
+
+  /* Connect the new blocks.  */
+  remove_edge (FALLTHRU_EDGE (entry_bb));
+
+  if (!broken_loop)
+    {
+      remove_edge (BRANCH_EDGE (entry_bb));
+      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
+
+      e = BRANCH_EDGE (l1_bb);
+      ne = FALLTHRU_EDGE (l1_bb);
+      e->flags = EDGE_TRUE_VALUE;
+    }
+  else
+    {
+      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+      ne = single_succ_edge (l1_bb);
+      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
+
+    }
+  ne->flags = EDGE_FALSE_VALUE;
+  e->probability = REG_BR_PROB_BASE * 7 / 8;
+  ne->probability = REG_BR_PROB_BASE / 8;
+
+  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
+  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
+
+  if (!broken_loop)
+    {
+      struct loop *loop = alloc_loop ();
+      loop->header = l1_bb;
+      loop->latch = cont_bb;
+      add_loop (loop, l1_bb->loop_father);
+      if (safelen == NULL_TREE)
+       loop->safelen = INT_MAX;
+      else
+       {
+         safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
+         if (!tree_fits_uhwi_p (safelen)
+             || tree_to_uhwi (safelen) > INT_MAX)
+           loop->safelen = INT_MAX;
+         else
+           loop->safelen = tree_to_uhwi (safelen);
+         if (loop->safelen == 1)
+           loop->safelen = 0;
+       }
+      if (simduid)
+       {
+         loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
+         cfun->has_simduid_loops = true;
+       }
+      /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
+        the loop.  */
+      if ((flag_tree_loop_vectorize
+          || (!global_options_set.x_flag_tree_loop_vectorize
+               && !global_options_set.x_flag_tree_vectorize))
+         && flag_tree_loop_optimize
+         && loop->safelen > 1)
+       {
+         loop->force_vect = true;
+         cfun->has_force_vect_loops = true;
+       }
+    }
+}
+
+
+/* Expand the OpenMP loop defined by REGION.  */
+
+static void
+expand_omp_for (struct omp_region *region, gimple inner_stmt)
+{
+  struct omp_for_data fd;
+  struct omp_for_data_loop *loops;
+
+  loops
+    = (struct omp_for_data_loop *)
+      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
+             * sizeof (struct omp_for_data_loop));
+  extract_omp_for_data (last_stmt (region->entry), &fd, loops);
+  region->sched_kind = fd.sched_kind;
+
+  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
+  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
+  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
+  if (region->cont)
+    {
+      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
+      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
+      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
+    }
+  else
+    /* If there isn't a continue then this is a degerate case where
+       the introduction of abnormal edges during lowering will prevent
+       original loops from being detected.  Fix that up.  */
+    loops_state_set (LOOPS_NEED_FIXUP);
+
+  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_KIND_SIMD)
+    expand_omp_simd (region, &fd);
+  else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
+          && !fd.have_ordered)
+    {
+      if (fd.chunk_size == NULL)
+       expand_omp_for_static_nochunk (region, &fd, inner_stmt);
+      else
+       expand_omp_for_static_chunk (region, &fd, inner_stmt);
+    }
+  else
+    {
+      int fn_index, start_ix, next_ix;
+
+      gcc_assert (gimple_omp_for_kind (fd.for_stmt)
+                 == GF_OMP_FOR_KIND_FOR);
+      if (fd.chunk_size == NULL
+         && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
+       fd.chunk_size = integer_zero_node;
+      gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
+      fn_index = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
+                 ? 3 : fd.sched_kind;
+      fn_index += fd.have_ordered * 4;
+      start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
+      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
+      if (fd.iter_type == long_long_unsigned_type_node)
+       {
+         start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
+                       - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
+         next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
+                     - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
+       }
+      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
+                             (enum built_in_function) next_ix, inner_stmt);
+    }
+
+  if (gimple_in_ssa_p (cfun))
+    update_ssa (TODO_update_ssa_only_virtuals);
+}
+
+
+/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
+
+       v = GOMP_sections_start (n);
+    L0:
+       switch (v)
+         {
+         case 0:
+           goto L2;
+         case 1:
+           section 1;
+           goto L1;
+         case 2:
+           ...
+         case n:
+           ...
+         default:
+           abort ();
+         }
+    L1:
+       v = GOMP_sections_next ();
+       goto L0;
+    L2:
+       reduction;
+
+    If this is a combined parallel sections, replace the call to
+    GOMP_sections_start with call to GOMP_sections_next.  */
+
+static void
+expand_omp_sections (struct omp_region *region)
+{
+  tree t, u, vin = NULL, vmain, vnext, l2;
+  unsigned len;
+  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
+  gimple_stmt_iterator si, switch_si;
+  gimple sections_stmt, stmt, cont;
   edge_iterator ei;
   edge e;
   struct omp_region *inner;
@@ -4788,9 +7002,9 @@ expand_omp_sections (struct omp_region *region)
      and a default case to abort if something goes wrong.  */
   len = EDGE_COUNT (l0_bb->succs);
 
-  /* Use VEC_quick_push on label_vec throughout, since we know the size
+  /* Use vec::quick_push on label_vec throughout, since we know the size
      in advance.  */
-  label_vec = VEC_alloc (tree, heap, len);
+  auto_vec<tree> label_vec (len);
 
   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
      GIMPLE_OMP_SECTIONS statement.  */
@@ -4802,8 +7016,7 @@ expand_omp_sections (struct omp_region *region)
     {
       /* If we are not inside a combined parallel+sections region,
         call GOMP_sections_start.  */
-      t = build_int_cst (unsigned_type_node,
-                        exit_reachable ? len - 1 : len);
+      t = build_int_cst (unsigned_type_node, len - 1);
       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
       stmt = gimple_build_call (u, 1, t);
     }
@@ -4835,7 +7048,7 @@ expand_omp_sections (struct omp_region *region)
     }
 
   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
-  VEC_quick_push (tree, label_vec, t);
+  label_vec.quick_push (t);
   i = 1;
 
   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
@@ -4859,7 +7072,7 @@ expand_omp_sections (struct omp_region *region)
       t = gimple_block_label (s_entry_bb);
       u = build_int_cst (unsigned_type_node, casei);
       u = build_case_label (u, NULL, t);
-      VEC_quick_push (tree, label_vec, u);
+      label_vec.quick_push (u);
 
       si = gsi_last_bb (s_entry_bb);
       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
@@ -4881,11 +7094,12 @@ expand_omp_sections (struct omp_region *region)
   t = gimple_block_label (default_bb);
   u = build_case_label (NULL, NULL, t);
   make_edge (l0_bb, default_bb, 0);
+  if (current_loops)
+    add_bb_to_loop (default_bb, current_loops->tree_root);
 
-  stmt = gimple_build_switch_vec (vmain, u, label_vec);
+  stmt = gimple_build_switch (vmain, u, label_vec);
   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
   gsi_remove (&switch_si, true);
-  VEC_free (tree, heap, label_vec);
 
   si = gsi_start_bb (default_bb);
   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
@@ -4912,9 +7126,13 @@ expand_omp_sections (struct omp_region *region)
   si = gsi_last_bb (l2_bb);
   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
+  else if (gimple_omp_return_lhs (gsi_stmt (si)))
+    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
   else
     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
   stmt = gimple_build_call (t, 0);
+  if (gimple_omp_return_lhs (gsi_stmt (si)))
+    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
   gsi_remove (&si, true);
 
@@ -4930,26 +7148,21 @@ expand_omp_single (struct omp_region *region)
 {
   basic_block entry_bb, exit_bb;
   gimple_stmt_iterator si;
-  bool need_barrier = false;
 
   entry_bb = region->entry;
   exit_bb = region->exit;
 
   si = gsi_last_bb (entry_bb);
-  /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
-     be removed.  We need to ensure that the thread that entered the single
-     does not exit before the data is copied out by the other threads.  */
-  if (find_omp_clause (gimple_omp_single_clauses (gsi_stmt (si)),
-                      OMP_CLAUSE_COPYPRIVATE))
-    need_barrier = true;
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
   gsi_remove (&si, true);
   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
 
   si = gsi_last_bb (exit_bb);
-  if (!gimple_omp_return_nowait_p (gsi_stmt (si)) || need_barrier)
-    force_gimple_operand_gsi (&si, build_omp_barrier (), false, NULL_TREE,
-                             false, GSI_SAME_STMT);
+  if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
+    {
+      tree t = gimple_omp_return_lhs (gsi_stmt (si));
+      gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT);
+    }
   gsi_remove (&si, true);
   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
 }
@@ -4971,8 +7184,10 @@ expand_omp_synch (struct omp_region *region)
   si = gsi_last_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
+             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
-             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL);
+             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
+             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
   gsi_remove (&si, true);
   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
 
@@ -5017,7 +7232,10 @@ expand_omp_atomic_load (basic_block load_bb, tree addr,
   itype = TREE_TYPE (TREE_TYPE (decl));
 
   call = build_call_expr_loc (loc, decl, 2, addr,
-                             build_int_cst (NULL, MEMMODEL_RELAXED));
+                             build_int_cst (NULL,
+                                            gimple_omp_atomic_seq_cst_p (stmt)
+                                            ? MEMMODEL_SEQ_CST
+                                            : MEMMODEL_RELAXED));
   if (!useless_type_conversion_p (type, itype))
     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
@@ -5089,7 +7307,10 @@ expand_omp_atomic_store (basic_block load_bb, tree addr,
   if (!useless_type_conversion_p (itype, type))
     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
-                             build_int_cst (NULL, MEMMODEL_RELAXED));
+                             build_int_cst (NULL,
+                                            gimple_omp_atomic_seq_cst_p (stmt)
+                                            ? MEMMODEL_SEQ_CST
+                                            : MEMMODEL_RELAXED));
   if (exchange)
     {
       if (!useless_type_conversion_p (type, itype))
@@ -5130,6 +7351,7 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   enum tree_code code;
   bool need_old, need_new;
   enum machine_mode imode;
+  bool seq_cst;
 
   /* We expect to find the following sequences:
 
@@ -5155,6 +7377,7 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
     return false;
   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
+  seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
   gcc_checking_assert (!need_old || !need_new);
 
   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
@@ -5221,7 +7444,9 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
      use the RELAXED memory model.  */
   call = build_call_expr_loc (loc, decl, 3, addr,
                              fold_convert_loc (loc, itype, rhs),
-                             build_int_cst (NULL, MEMMODEL_RELAXED));
+                             build_int_cst (NULL,
+                                            seq_cst ? MEMMODEL_SEQ_CST
+                                                    : MEMMODEL_RELAXED));
 
   if (need_old || need_new)
     {
@@ -5295,8 +7520,8 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
     {
       tree iaddr_val;
 
-      iaddr = make_rename_temp (build_pointer_type_for_mode (itype, ptr_mode,
-                                                            true), NULL);
+      iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
+                                                          true), NULL);
       iaddr_val
        = force_gimple_operand_gsi (&si,
                                    fold_convert (TREE_TYPE (iaddr), addr),
@@ -5305,11 +7530,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
       loadedi = create_tmp_var (itype, NULL);
       if (gimple_in_ssa_p (cfun))
-       {
-         add_referenced_var (iaddr);
-         add_referenced_var (loadedi);
-         loadedi = make_ssa_name (loadedi, NULL);
-       }
+       loadedi = make_ssa_name (loadedi, NULL);
     }
   else
     {
@@ -5317,19 +7538,27 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
       loadedi = loaded_val;
     }
 
+  fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
+  tree loaddecl = builtin_decl_explicit (fncode);
+  if (loaddecl)
+    initial
+      = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
+                     build_call_expr (loaddecl, 2, iaddr,
+                                      build_int_cst (NULL_TREE,
+                                                     MEMMODEL_RELAXED)));
+  else
+    initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
+                     build_int_cst (TREE_TYPE (iaddr), 0));
+
   initial
-    = force_gimple_operand_gsi (&si,
-                               build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)),
-                                       iaddr,
-                                       build_int_cst (TREE_TYPE (iaddr), 0)),
-                               true, NULL_TREE, true, GSI_SAME_STMT);
+    = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
+                               GSI_SAME_STMT);
 
   /* Move the value to the LOADEDI temporary.  */
   if (gimple_in_ssa_p (cfun))
     {
       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
       phi = create_phi_node (loadedi, loop_header);
-      SSA_NAME_DEF_STMT (loadedi) = phi;
       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
               initial);
     }
@@ -5386,8 +7615,6 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   else
     {
       old_vali = create_tmp_var (TREE_TYPE (loadedi), NULL);
-      if (gimple_in_ssa_p (cfun))
-       add_referenced_var (old_vali);
       stmt = gimple_build_assign (old_vali, loadedi);
       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
 
@@ -5421,6 +7648,11 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
   gsi_remove (&si, true);
 
+  struct loop *loop = alloc_loop ();
+  loop->header = loop_header;
+  loop->latch = store_bb;
+  add_loop (loop, loop_header->loop_father);
+
   if (gimple_in_ssa_p (cfun))
     update_ssa (TODO_update_ssa_no_phi);
 
@@ -5506,7 +7738,7 @@ expand_omp_atomic (struct omp_region *region)
   HOST_WIDE_INT index;
 
   /* Make sure the type is one of the supported sizes.  */
-  index = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+  index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
   index = exact_log2 (index);
   if (index >= 0 && index <= 4)
     {
@@ -5553,24 +7785,344 @@ expand_omp_atomic (struct omp_region *region)
 }
 
 
-/* Expand the parallel region tree rooted at REGION.  Expansion
-   proceeds in depth-first order.  Innermost regions are expanded
-   first.  This way, parallel regions that require a new function to
-   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
-   internal dependencies in their body.  */
+/* Expand the OpenMP target{, data, update} directive starting at REGION.  */
 
 static void
-expand_omp (struct omp_region *region)
+expand_omp_target (struct omp_region *region)
 {
-  while (region)
-    {
-      location_t saved_location;
-
-      /* First, determine whether this is a combined parallel+workshare
-                region.  */
+  basic_block entry_bb, exit_bb, new_bb;
+  struct function *child_cfun = NULL;
+  tree child_fn = NULL_TREE, block, t;
+  gimple_stmt_iterator gsi;
+  gimple entry_stmt, stmt;
+  edge e;
+
+  entry_stmt = last_stmt (region->entry);
+  new_bb = region->entry;
+  int kind = gimple_omp_target_kind (entry_stmt);
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      child_fn = gimple_omp_target_child_fn (entry_stmt);
+      child_cfun = DECL_STRUCT_FUNCTION (child_fn);
+    }
+
+  entry_bb = region->entry;
+  exit_bb = region->exit;
+
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      unsigned srcidx, dstidx, num;
+
+      /* If the target region needs data sent from the parent
+        function, then the very first statement (except possible
+        tree profile counter updates) of the parallel body
+        is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
+        &.OMP_DATA_O is passed as an argument to the child function,
+        we need to replace it with the argument as seen by the child
+        function.
+
+        In most cases, this will end up being the identity assignment
+        .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
+        a function call that has been inlined, the original PARM_DECL
+        .OMP_DATA_I may have been converted into a different local
+        variable.  In which case, we need to keep the assignment.  */
+      if (gimple_omp_target_data_arg (entry_stmt))
+       {
+         basic_block entry_succ_bb = single_succ (entry_bb);
+         gimple_stmt_iterator gsi;
+         tree arg;
+         gimple tgtcopy_stmt = NULL;
+         tree sender
+           = TREE_VEC_ELT (gimple_omp_target_data_arg (entry_stmt), 0);
+
+         for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
+           {
+             gcc_assert (!gsi_end_p (gsi));
+             stmt = gsi_stmt (gsi);
+             if (gimple_code (stmt) != GIMPLE_ASSIGN)
+               continue;
+
+             if (gimple_num_ops (stmt) == 2)
+               {
+                 tree arg = gimple_assign_rhs1 (stmt);
+
+                 /* We're ignoring the subcode because we're
+                    effectively doing a STRIP_NOPS.  */
+
+                 if (TREE_CODE (arg) == ADDR_EXPR
+                     && TREE_OPERAND (arg, 0) == sender)
+                   {
+                     tgtcopy_stmt = stmt;
+                     break;
+                   }
+               }
+           }
+
+         gcc_assert (tgtcopy_stmt != NULL);
+         arg = DECL_ARGUMENTS (child_fn);
+
+         gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
+         gsi_remove (&gsi, true);
+       }
+
+      /* Declare local variables needed in CHILD_CFUN.  */
+      block = DECL_INITIAL (child_fn);
+      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
+      /* The gimplifier could record temporaries in target block
+        rather than in containing function's local_decls chain,
+        which would mean cgraph missed finalizing them.  Do it now.  */
+      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
+       if (TREE_CODE (t) == VAR_DECL
+           && TREE_STATIC (t)
+           && !DECL_EXTERNAL (t))
+         varpool_finalize_decl (t);
+      DECL_SAVED_TREE (child_fn) = NULL;
+      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
+      gimple_set_body (child_fn, NULL);
+      TREE_USED (block) = 1;
+
+      /* Reset DECL_CONTEXT on function arguments.  */
+      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
+       DECL_CONTEXT (t) = child_fn;
+
+      /* Split ENTRY_BB at GIMPLE_OMP_TARGET,
+        so that it can be moved to the child function.  */
+      gsi = gsi_last_bb (entry_bb);
+      stmt = gsi_stmt (gsi);
+      gcc_assert (stmt && gimple_code (stmt) == GIMPLE_OMP_TARGET
+                 && gimple_omp_target_kind (stmt)
+                    == GF_OMP_TARGET_KIND_REGION);
+      gsi_remove (&gsi, true);
+      e = split_block (entry_bb, stmt);
+      entry_bb = e->dest;
+      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
+
+      /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
+      if (exit_bb)
+       {
+         gsi = gsi_last_bb (exit_bb);
+         gcc_assert (!gsi_end_p (gsi)
+                     && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
+         stmt = gimple_build_return (NULL);
+         gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+         gsi_remove (&gsi, true);
+       }
+
+      /* Move the target region into CHILD_CFUN.  */
+
+      block = gimple_block (entry_stmt);
+
+      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
+      if (exit_bb)
+       single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
+      /* When the OMP expansion process cannot guarantee an up-to-date
+        loop tree arrange for the child function to fixup loops.  */
+      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+       child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
+
+      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
+      num = vec_safe_length (child_cfun->local_decls);
+      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
+       {
+         t = (*child_cfun->local_decls)[srcidx];
+         if (DECL_CONTEXT (t) == cfun->decl)
+           continue;
+         if (srcidx != dstidx)
+           (*child_cfun->local_decls)[dstidx] = t;
+         dstidx++;
+       }
+      if (dstidx != num)
+       vec_safe_truncate (child_cfun->local_decls, dstidx);
+
+      /* Inform the callgraph about the new function.  */
+      DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
+      cgraph_add_new_function (child_fn, true);
+
+      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
+        fixed in a following pass.  */
+      push_cfun (child_cfun);
+      rebuild_cgraph_edges ();
+
+      /* Some EH regions might become dead, see PR34608.  If
+        pass_cleanup_cfg isn't the first pass to happen with the
+        new child, these dead EH edges might cause problems.
+        Clean them up now.  */
+      if (flag_exceptions)
+       {
+         basic_block bb;
+         bool changed = false;
+
+         FOR_EACH_BB_FN (bb, cfun)
+           changed |= gimple_purge_dead_eh_edges (bb);
+         if (changed)
+           cleanup_tree_cfg ();
+       }
+      pop_cfun ();
+    }
+
+  /* Emit a library call to launch the target region, or do data
+     transfers.  */
+  tree t1, t2, t3, t4, device, cond, c, clauses;
+  enum built_in_function start_ix;
+  location_t clause_loc;
+
+  clauses = gimple_omp_target_clauses (entry_stmt);
+
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    start_ix = BUILT_IN_GOMP_TARGET;
+  else if (kind == GF_OMP_TARGET_KIND_DATA)
+    start_ix = BUILT_IN_GOMP_TARGET_DATA;
+  else
+    start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
+
+  /* By default, the value of DEVICE is -1 (let runtime library choose)
+     and there is no conditional.  */
+  cond = NULL_TREE;
+  device = build_int_cst (integer_type_node, -1);
+
+  c = find_omp_clause (clauses, OMP_CLAUSE_IF);
+  if (c)
+    cond = OMP_CLAUSE_IF_EXPR (c);
+
+  c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE);
+  if (c)
+    {
+      device = OMP_CLAUSE_DEVICE_ID (c);
+      clause_loc = OMP_CLAUSE_LOCATION (c);
+    }
+  else
+    clause_loc = gimple_location (entry_stmt);
+
+  /* Ensure 'device' is of the correct type.  */
+  device = fold_convert_loc (clause_loc, integer_type_node, device);
+
+  /* If we found the clause 'if (cond)', build
+     (cond ? device : -2).  */
+  if (cond)
+    {
+      cond = gimple_boolify (cond);
+
+      basic_block cond_bb, then_bb, else_bb;
+      edge e;
+      tree tmp_var;
+
+      tmp_var = create_tmp_var (TREE_TYPE (device), NULL);
+      if (kind != GF_OMP_TARGET_KIND_REGION)
+       {
+         gsi = gsi_last_bb (new_bb);
+         gsi_prev (&gsi);
+         e = split_block (new_bb, gsi_stmt (gsi));
+       }
+      else
+       e = split_block (new_bb, NULL);
+      cond_bb = e->src;
+      new_bb = e->dest;
+      remove_edge (e);
+
+      then_bb = create_empty_bb (cond_bb);
+      else_bb = create_empty_bb (then_bb);
+      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+      set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
+
+      stmt = gimple_build_cond_empty (cond);
+      gsi = gsi_last_bb (cond_bb);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      gsi = gsi_start_bb (then_bb);
+      stmt = gimple_build_assign (tmp_var, device);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      gsi = gsi_start_bb (else_bb);
+      stmt = gimple_build_assign (tmp_var,
+                                 build_int_cst (integer_type_node, -2));
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
+      make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
+      if (current_loops)
+       {
+         add_bb_to_loop (then_bb, cond_bb->loop_father);
+         add_bb_to_loop (else_bb, cond_bb->loop_father);
+       }
+      make_edge (then_bb, new_bb, EDGE_FALLTHRU);
+      make_edge (else_bb, new_bb, EDGE_FALLTHRU);
+
+      device = tmp_var;
+    }
+
+  gsi = gsi_last_bb (new_bb);
+  t = gimple_omp_target_data_arg (entry_stmt);
+  if (t == NULL)
+    {
+      t1 = size_zero_node;
+      t2 = build_zero_cst (ptr_type_node);
+      t3 = t2;
+      t4 = t2;
+    }
+  else
+    {
+      t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
+      t1 = size_binop (PLUS_EXPR, t1, size_int (1));
+      t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
+      t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
+      t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
+    }
+
+  gimple g;
+  /* FIXME: This will be address of
+     extern char __OPENMP_TARGET__[] __attribute__((visibility ("hidden")))
+     symbol, as soon as the linker plugin is able to create it for us.  */
+  tree openmp_target = build_zero_cst (ptr_type_node);
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      tree fnaddr = build_fold_addr_expr (child_fn);
+      g = gimple_build_call (builtin_decl_explicit (start_ix), 7,
+                            device, fnaddr, openmp_target, t1, t2, t3, t4);
+    }
+  else
+    g = gimple_build_call (builtin_decl_explicit (start_ix), 6,
+                          device, openmp_target, t1, t2, t3, t4);
+  gimple_set_location (g, gimple_location (entry_stmt));
+  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+  if (kind != GF_OMP_TARGET_KIND_REGION)
+    {
+      g = gsi_stmt (gsi);
+      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
+      gsi_remove (&gsi, true);
+    }
+  if (kind == GF_OMP_TARGET_KIND_DATA && region->exit)
+    {
+      gsi = gsi_last_bb (region->exit);
+      g = gsi_stmt (gsi);
+      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
+      gsi_remove (&gsi, true);
+    }
+}
+
+
+/* Expand the parallel region tree rooted at REGION.  Expansion
+   proceeds in depth-first order.  Innermost regions are expanded
+   first.  This way, parallel regions that require a new function to
+   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
+   internal dependencies in their body.  */
+
+static void
+expand_omp (struct omp_region *region)
+{
+  while (region)
+    {
+      location_t saved_location;
+      gimple inner_stmt = NULL;
+
+      /* First, determine whether this is a combined parallel+workshare
+                region.  */
       if (region->type == GIMPLE_OMP_PARALLEL)
        determine_parallel_type (region);
 
+      if (region->type == GIMPLE_OMP_FOR
+         && gimple_omp_for_combined_p (last_stmt (region->entry)))
+       inner_stmt = last_stmt (region->inner->entry);
+
       if (region->inner)
        expand_omp (region->inner);
 
@@ -5586,7 +8138,7 @@ expand_omp (struct omp_region *region)
          break;
 
        case GIMPLE_OMP_FOR:
-         expand_omp_for (region);
+         expand_omp_for (region, inner_stmt);
          break;
 
        case GIMPLE_OMP_SECTIONS:
@@ -5603,8 +8155,10 @@ expand_omp (struct omp_region *region)
          break;
 
        case GIMPLE_OMP_MASTER:
+       case GIMPLE_OMP_TASKGROUP:
        case GIMPLE_OMP_ORDERED:
        case GIMPLE_OMP_CRITICAL:
+       case GIMPLE_OMP_TEAMS:
          expand_omp_synch (region);
          break;
 
@@ -5612,6 +8166,10 @@ expand_omp (struct omp_region *region)
          expand_omp_atomic (region);
          break;
 
+       case GIMPLE_OMP_TARGET:
+         expand_omp_target (region);
+         break;
+
        default:
          gcc_unreachable ();
        }
@@ -5676,6 +8234,9 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
             GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
          ;
        }
+      else if (code == GIMPLE_OMP_TARGET
+              && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_UPDATE)
+       new_omp_region (bb, code, parent);
       else
        {
          /* Otherwise, this directive becomes the parent for a new
@@ -5732,7 +8293,7 @@ build_omp_regions (void)
 {
   gcc_assert (root_omp_region == NULL);
   calculate_dominance_info (CDI_DOMINATORS);
-  build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
+  build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
 }
 
 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
@@ -5768,30 +8329,76 @@ execute_expand_omp (void)
 static bool
 gate_expand_omp (void)
 {
-  return (flag_openmp != 0 && !seen_error ());
-}
-
-struct gimple_opt_pass pass_expand_omp =
-{
- {
-  GIMPLE_PASS,
-  "ompexp",                            /* name */
-  gate_expand_omp,                     /* gate */
-  execute_expand_omp,                  /* execute */
-  NULL,                                        /* sub */
-  NULL,                                        /* next */
-  0,                                   /* static_pass_number */
-  TV_NONE,                             /* tv_id */
-  PROP_gimple_any,                     /* properties_required */
-  0,                                   /* properties_provided */
-  0,                                   /* properties_destroyed */
-  0,                                   /* todo_flags_start */
-  0                                    /* todo_flags_finish */
- }
+  return ((flag_openmp != 0 || flag_openmp_simd != 0
+          || flag_cilkplus != 0) && !seen_error ());
+}
+
+namespace {
+
+const pass_data pass_data_expand_omp =
+{
+  GIMPLE_PASS, /* type */
+  "ompexp", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  PROP_gimple_any, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
 };
+
+class pass_expand_omp : public gimple_opt_pass
+{
+public:
+  pass_expand_omp (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_expand_omp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_expand_omp (); }
+  unsigned int execute () { return execute_expand_omp (); }
+
+}; // class pass_expand_omp
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_expand_omp (gcc::context *ctxt)
+{
+  return new pass_expand_omp (ctxt);
+}
 \f
 /* Routines to lower OpenMP directives into OMP-GIMPLE.  */
 
+/* If ctx is a worksharing context inside of a cancellable parallel
+   region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN
+   and conditional branch to parallel's cancel_label to handle
+   cancellation in the implicit barrier.  */
+
+static void
+maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
+{
+  gimple omp_return = gimple_seq_last_stmt (*body);
+  gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
+  if (gimple_omp_return_nowait_p (omp_return))
+    return;
+  if (ctx->outer
+      && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
+      && ctx->outer->cancellable)
+    {
+      tree lhs = create_tmp_var (boolean_type_node, NULL);
+      gimple_omp_return_set_lhs (omp_return, lhs);
+      tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
+      gimple g = gimple_build_cond (NE_EXPR, lhs, boolean_false_node,
+                                   ctx->outer->cancel_label, fallthru_label);
+      gimple_seq_add_stmt (body, g);
+      gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
+    }
+}
+
 /* Lower the OpenMP sections directive in the current statement in GSI_P.
    CTX is the enclosing OMP context for the current statement.  */
 
@@ -5802,16 +8409,15 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gimple_stmt_iterator tgsi;
   gimple stmt, new_stmt, bind, t;
   gimple_seq ilist, dlist, olist, new_body;
-  struct gimplify_ctx gctx;
 
   stmt = gsi_stmt (*gsi_p);
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   dlist = NULL;
   ilist = NULL;
   lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
-                          &ilist, &dlist, ctx);
+                          &ilist, &dlist, ctx, NULL);
 
   new_body = gimple_omp_body (stmt);
   gimple_omp_set_body (stmt, NULL);
@@ -5871,6 +8477,8 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gimple_seq_add_stmt (&new_body, t);
 
   gimple_seq_add_seq (&new_body, olist);
+  if (ctx->cancellable)
+    gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
   gimple_seq_add_seq (&new_body, dlist);
 
   new_body = maybe_catch_exception (new_body);
@@ -5879,6 +8487,7 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
         (!!find_omp_clause (gimple_omp_sections_clauses (stmt),
                            OMP_CLAUSE_NOWAIT));
   gimple_seq_add_stmt (&new_body, t);
+  maybe_add_implicit_barrier_cancel (ctx, &new_body);
 
   gimple_bind_set_body (new_stmt, new_body);
 }
@@ -6008,10 +8617,9 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 {
   tree block;
   gimple t, bind, single_stmt = gsi_stmt (*gsi_p);
-  gimple_seq bind_body, dlist;
-  struct gimplify_ctx gctx;
+  gimple_seq bind_body, bind_body_tail = NULL, dlist;
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   block = make_node (BLOCK);
   bind = gimple_build_bind (NULL, NULL, block);
@@ -6019,7 +8627,7 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   bind_body = NULL;
   dlist = NULL;
   lower_rec_input_clauses (gimple_omp_single_clauses (single_stmt),
-                          &bind_body, &dlist, ctx);
+                          &bind_body, &dlist, ctx, NULL);
   lower_omp (gimple_omp_body_ptr (single_stmt), ctx);
 
   gimple_seq_add_stmt (&bind_body, single_stmt);
@@ -6038,7 +8646,17 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   t = gimple_build_omp_return
         (!!find_omp_clause (gimple_omp_single_clauses (single_stmt),
                            OMP_CLAUSE_NOWAIT));
-  gimple_seq_add_stmt (&bind_body, t);
+  gimple_seq_add_stmt (&bind_body_tail, t);
+  maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
+  if (ctx->record_type)
+    {
+      gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
+      tree clobber = build_constructor (ctx->record_type, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gsi_insert_after (&gsi, gimple_build_assign (ctx->sender_decl,
+                                                  clobber), GSI_SAME_STMT);
+    }
+  gimple_seq_add_seq (&bind_body, bind_body_tail);
   gimple_bind_set_body (bind, bind_body);
 
   pop_gimplify_context (bind);
@@ -6059,9 +8677,8 @@ lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gimple stmt = gsi_stmt (*gsi_p), bind;
   location_t loc = gimple_location (stmt);
   gimple_seq tseq;
-  struct gimplify_ctx gctx;
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   block = make_node (BLOCK);
   bind = gimple_build_bind (NULL, NULL, block);
@@ -6092,47 +8709,73 @@ lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 
-/* Expand code for an OpenMP ordered directive.  */
+/* Expand code for an OpenMP taskgroup directive.  */
 
 static void
-lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 {
-  tree block;
   gimple stmt = gsi_stmt (*gsi_p), bind, x;
-  struct gimplify_ctx gctx;
-
-  push_gimplify_context (&gctx);
+  tree block = make_node (BLOCK);
 
-  block = make_node (BLOCK);
   bind = gimple_build_bind (NULL, NULL, block);
   gsi_replace (gsi_p, bind, true);
   gimple_bind_add_stmt (bind, stmt);
 
-  x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
+  x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_START),
                         0);
   gimple_bind_add_stmt (bind, x);
 
   lower_omp (gimple_omp_body_ptr (stmt), ctx);
-  gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
   gimple_bind_add_seq (bind, gimple_omp_body (stmt));
   gimple_omp_set_body (stmt, NULL);
 
-  x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), 0);
-  gimple_bind_add_stmt (bind, x);
-
   gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
 
-  pop_gimplify_context (bind);
-
   gimple_bind_append_vars (bind, ctx->block_vars);
-  BLOCK_VARS (block) = gimple_bind_vars (bind);
+  BLOCK_VARS (block) = ctx->block_vars;
 }
 
 
-/* Gimplify a GIMPLE_OMP_CRITICAL statement.  This is a relatively simple
-   substitution of a couple of function calls.  But in the NAMED case,
-   requires that languages coordinate a symbol name.  It is therefore
-   best put here in common code.  */
+/* Expand code for an OpenMP ordered directive.  */
+
+static void
+lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+{
+  tree block;
+  gimple stmt = gsi_stmt (*gsi_p), bind, x;
+
+  push_gimplify_context ();
+
+  block = make_node (BLOCK);
+  bind = gimple_build_bind (NULL, NULL, block);
+  gsi_replace (gsi_p, bind, true);
+  gimple_bind_add_stmt (bind, stmt);
+
+  x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
+                        0);
+  gimple_bind_add_stmt (bind, x);
+
+  lower_omp (gimple_omp_body_ptr (stmt), ctx);
+  gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
+  gimple_bind_add_seq (bind, gimple_omp_body (stmt));
+  gimple_omp_set_body (stmt, NULL);
+
+  x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), 0);
+  gimple_bind_add_stmt (bind, x);
+
+  gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
+
+  pop_gimplify_context (bind);
+
+  gimple_bind_append_vars (bind, ctx->block_vars);
+  BLOCK_VARS (block) = gimple_bind_vars (bind);
+}
+
+
+/* Gimplify a GIMPLE_OMP_CRITICAL statement.  This is a relatively simple
+   substitution of a couple of function calls.  But in the NAMED case,
+   requires that languages coordinate a symbol name.  It is therefore
+   best put here in common code.  */
 
 static GTY((param1_is (tree), param2_is (tree)))
   splay_tree critical_name_mutexes;
@@ -6145,7 +8788,6 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gimple stmt = gsi_stmt (*gsi_p), bind;
   location_t loc = gimple_location (stmt);
   gimple_seq tbody;
-  struct gimplify_ctx gctx;
 
   name = gimple_omp_critical_name (stmt);
   if (name)
@@ -6198,7 +8840,7 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       unlock = build_call_expr_loc (loc, unlock, 0);
     }
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   block = make_node (BLOCK);
   bind = gimple_build_bind (NULL, NULL, block);
@@ -6245,9 +8887,9 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
 
   /* When possible, use a strict equality expression.  This can let VRP
      type optimizations deduce the value and remove a copy.  */
-  if (host_integerp (fd->loop.step, 0))
+  if (tree_fits_shwi_p (fd->loop.step))
     {
-      HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->loop.step);
+      HOST_WIDE_INT step = tree_to_shwi (fd->loop.step);
       if (step == 1 || step == -1)
        cond_code = EQ_EXPR;
     }
@@ -6265,9 +8907,11 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
       /* Optimize: v = 0; is usually cheaper than v = some_other_constant.  */
       vinit = fd->loop.n1;
       if (cond_code == EQ_EXPR
-         && host_integerp (fd->loop.n2, 0)
+         && tree_fits_shwi_p (fd->loop.n2)
          && ! integer_zerop (fd->loop.n2))
        vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
+      else
+       vinit = unshare_expr (vinit);
 
       /* Initialize the iterator variable, so that threads that don't execute
         any iterations don't execute the lastprivate clauses by accident.  */
@@ -6282,16 +8926,14 @@ static void
 lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 {
   tree *rhs_p, block;
-  struct omp_for_data fd;
+  struct omp_for_data fd, *fdp = NULL;
   gimple stmt = gsi_stmt (*gsi_p), new_stmt;
   gimple_seq omp_for_body, body, dlist;
   size_t i;
-  struct gimplify_ctx gctx;
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
-  lower_omp (gimple_omp_body_ptr (stmt), ctx);
 
   block = make_node (BLOCK);
   new_stmt = gimple_build_bind (NULL, NULL, block);
@@ -6306,16 +8948,64 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   if (!gimple_seq_empty_p (omp_for_body)
       && gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
     {
-      tree vars = gimple_bind_vars (gimple_seq_first_stmt (omp_for_body));
+      gimple inner_bind = gimple_seq_first_stmt (omp_for_body);
+      tree vars = gimple_bind_vars (inner_bind);
       gimple_bind_append_vars (new_stmt, vars);
+      /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
+        keep them on the inner_bind and it's block.  */
+      gimple_bind_set_vars (inner_bind, NULL_TREE);
+      if (gimple_bind_block (inner_bind))
+       BLOCK_VARS (gimple_bind_block (inner_bind)) = NULL_TREE;
+    }
+
+  if (gimple_omp_for_combined_into_p (stmt))
+    {
+      extract_omp_for_data (stmt, &fd, NULL);
+      fdp = &fd;
+
+      /* We need two temporaries with fd.loop.v type (istart/iend)
+        and then (fd.collapse - 1) temporaries with the same
+        type for count2 ... countN-1 vars if not constant.  */
+      size_t count = 2;
+      tree type = fd.iter_type;
+      if (fd.collapse > 1
+         && TREE_CODE (fd.loop.n2) != INTEGER_CST)
+       count += fd.collapse - 1;
+      bool parallel_for = gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR;
+      tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
+      tree clauses = *pc;
+      if (parallel_for)
+       outerc
+         = find_omp_clause (gimple_omp_parallel_clauses (ctx->outer->stmt),
+                            OMP_CLAUSE__LOOPTEMP_);
+      for (i = 0; i < count; i++)
+       {
+         tree temp;
+         if (parallel_for)
+           {
+             gcc_assert (outerc);
+             temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer);
+             outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc),
+                                       OMP_CLAUSE__LOOPTEMP_);
+           }
+         else
+           temp = create_tmp_var (type, NULL);
+         *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
+         OMP_CLAUSE_DECL (*pc) = temp;
+         pc = &OMP_CLAUSE_CHAIN (*pc);
+       }
+      *pc = clauses;
     }
 
   /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR.  */
   dlist = NULL;
   body = NULL;
-  lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx);
+  lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
+                          fdp);
   gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
 
+  lower_omp (gimple_omp_body_ptr (stmt), ctx);
+
   /* Lower the header expressions.  At this point, we can assume that
      the header is of the form:
 
@@ -6351,13 +9041,17 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   /* After the loop, add exit clauses.  */
   lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx);
+
+  if (ctx->cancellable)
+    gimple_seq_add_stmt (&body, gimple_build_label (ctx->cancel_label));
+
   gimple_seq_add_seq (&body, dlist);
 
   body = maybe_catch_exception (body);
 
   /* Region exit marker goes at the end of the loop body.  */
   gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
-
+  maybe_add_implicit_barrier_cancel (ctx, &body);
   pop_gimplify_context (new_stmt);
 
   gimple_bind_append_vars (new_stmt, ctx->block_vars);
@@ -6371,7 +9065,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 /* Callback for walk_stmts.  Check if the current statement only contains
-   GIMPLE_OMP_FOR or GIMPLE_OMP_PARALLEL.  */
+   GIMPLE_OMP_FOR or GIMPLE_OMP_SECTIONS.  */
 
 static tree
 check_combined_parallel (gimple_stmt_iterator *gsi_p,
@@ -6458,7 +9152,6 @@ create_task_copyfn (gimple task_stmt, omp_context *ctx)
   bool record_needs_remap = false, srecord_needs_remap = false;
   splay_tree_node n;
   struct omp_taskcopy_context tcctx;
-  struct gimplify_ctx gctx;
   location_t loc = gimple_location (task_stmt);
 
   child_fn = gimple_omp_task_copy_fn (task_stmt);
@@ -6471,8 +9164,8 @@ create_task_copyfn (gimple task_stmt, omp_context *ctx)
     DECL_CONTEXT (t) = child_fn;
 
   /* Populate the function.  */
-  push_gimplify_context (&gctx);
-  current_function_decl = child_fn;
+  push_gimplify_context ();
+  push_cfun (child_cfun);
 
   bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
   TREE_SIDE_EFFECTS (bind) = 1;
@@ -6519,8 +9212,6 @@ create_task_copyfn (gimple task_stmt, omp_context *ctx)
   else
     tcctx.cb.decl_map = NULL;
 
-  push_cfun (child_cfun);
-
   arg = DECL_ARGUMENTS (child_fn);
   TREE_TYPE (arg) = build_pointer_type (record_type);
   sarg = DECL_CHAIN (arg);
@@ -6678,7 +9369,68 @@ create_task_copyfn (gimple task_stmt, omp_context *ctx)
   pop_gimplify_context (NULL);
   BIND_EXPR_BODY (bind) = list;
   pop_cfun ();
-  current_function_decl = ctx->cb.src_fn;
+}
+
+static void
+lower_depend_clauses (gimple stmt, gimple_seq *iseq, gimple_seq *oseq)
+{
+  tree c, clauses;
+  gimple g;
+  size_t n_in = 0, n_out = 0, idx = 2, i;
+
+  clauses = find_omp_clause (gimple_omp_task_clauses (stmt),
+                            OMP_CLAUSE_DEPEND);
+  gcc_assert (clauses);
+  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND)
+      switch (OMP_CLAUSE_DEPEND_KIND (c))
+       {
+       case OMP_CLAUSE_DEPEND_IN:
+         n_in++;
+         break;
+       case OMP_CLAUSE_DEPEND_OUT:
+       case OMP_CLAUSE_DEPEND_INOUT:
+         n_out++;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+  tree type = build_array_type_nelts (ptr_type_node, n_in + n_out + 2);
+  tree array = create_tmp_var (type, NULL);
+  tree r = build4 (ARRAY_REF, ptr_type_node, array, size_int (0), NULL_TREE,
+                  NULL_TREE);
+  g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_in + n_out));
+  gimple_seq_add_stmt (iseq, g);
+  r = build4 (ARRAY_REF, ptr_type_node, array, size_int (1), NULL_TREE,
+             NULL_TREE);
+  g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_out));
+  gimple_seq_add_stmt (iseq, g);
+  for (i = 0; i < 2; i++)
+    {
+      if ((i ? n_in : n_out) == 0)
+       continue;
+      for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+           && ((OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_IN) ^ i))
+         {
+           tree t = OMP_CLAUSE_DECL (c);
+           t = fold_convert (ptr_type_node, t);
+           gimplify_expr (&t, iseq, NULL, is_gimple_val, fb_rvalue);
+           r = build4 (ARRAY_REF, ptr_type_node, array, size_int (idx++),
+                       NULL_TREE, NULL_TREE);
+           g = gimple_build_assign (r, t);
+           gimple_seq_add_stmt (iseq, g);
+         }
+    }
+  tree *p = gimple_omp_task_clauses_ptr (stmt);
+  c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+  OMP_CLAUSE_DECL (c) = build_fold_addr_expr (array);
+  OMP_CLAUSE_CHAIN (c) = *p;
+  *p = c;
+  tree clobber = build_constructor (type, NULL);
+  TREE_THIS_VOLATILE (clobber) = 1;
+  g = gimple_build_assign (array, clobber);
+  gimple_seq_add_stmt (oseq, g);
 }
 
 /* Lower the OpenMP parallel or task directive in the current statement
@@ -6690,9 +9442,8 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   tree clauses;
   tree child_fn, t;
   gimple stmt = gsi_stmt (*gsi_p);
-  gimple par_bind, bind;
-  gimple_seq par_body, olist, ilist, par_olist, par_ilist, new_body;
-  struct gimplify_ctx gctx;
+  gimple par_bind, bind, dep_bind = NULL;
+  gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body;
   location_t loc = gimple_location (stmt);
 
   clauses = gimple_omp_taskreg_clauses (stmt);
@@ -6712,17 +9463,28 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       if (ws_num == 1)
        gimple_omp_parallel_set_combined_p (stmt, true);
     }
+  gimple_seq dep_ilist = NULL;
+  gimple_seq dep_olist = NULL;
+  if (gimple_code (stmt) == GIMPLE_OMP_TASK
+      && find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
+    {
+      push_gimplify_context ();
+      dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
+      lower_depend_clauses (stmt, &dep_ilist, &dep_olist);
+    }
+
   if (ctx->srecord_type)
     create_task_copyfn (stmt, ctx);
 
-  push_gimplify_context (&gctx);
+  push_gimplify_context ();
 
   par_olist = NULL;
   par_ilist = NULL;
-  lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
+  par_rlist = NULL;
+  lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL);
   lower_omp (&par_body, ctx);
   if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
-    lower_reduction_clauses (clauses, &par_olist, ctx);
+    lower_reduction_clauses (clauses, &par_rlist, ctx);
 
   /* Declare all the variables created by mapping and the variables
      declared in the scope of the parallel body.  */
@@ -6744,6 +9506,14 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   lower_send_clauses (clauses, &ilist, &olist, ctx);
   lower_send_shared_vars (&ilist, &olist, ctx);
 
+  if (ctx->record_type)
+    {
+      tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
+                                                       clobber));
+    }
+
   /* Once all the expansions are done, sequence all the different
      fragments inside gimple_omp_body.  */
 
@@ -6760,12 +9530,335 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   gimple_seq_add_seq (&new_body, par_ilist);
   gimple_seq_add_seq (&new_body, par_body);
+  gimple_seq_add_seq (&new_body, par_rlist);
+  if (ctx->cancellable)
+    gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
   gimple_seq_add_seq (&new_body, par_olist);
   new_body = maybe_catch_exception (new_body);
   gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
   gimple_omp_set_body (stmt, new_body);
 
   bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
+  gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
+  gimple_bind_add_seq (bind, ilist);
+  gimple_bind_add_stmt (bind, stmt);
+  gimple_bind_add_seq (bind, olist);
+
+  pop_gimplify_context (NULL);
+
+  if (dep_bind)
+    {
+      gimple_bind_add_seq (dep_bind, dep_ilist);
+      gimple_bind_add_stmt (dep_bind, bind);
+      gimple_bind_add_seq (dep_bind, dep_olist);
+      pop_gimplify_context (dep_bind);
+    }
+}
+
+/* Lower the OpenMP target directive in the current statement
+   in GSI_P.  CTX holds context information for the directive.  */
+
+static void
+lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+{
+  tree clauses;
+  tree child_fn, t, c;
+  gimple stmt = gsi_stmt (*gsi_p);
+  gimple tgt_bind = NULL, bind;
+  gimple_seq tgt_body = NULL, olist, ilist, new_body;
+  location_t loc = gimple_location (stmt);
+  int kind = gimple_omp_target_kind (stmt);
+  unsigned int map_cnt = 0;
+
+  clauses = gimple_omp_target_clauses (stmt);
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      tgt_bind = gimple_seq_first_stmt (gimple_omp_body (stmt));
+      tgt_body = gimple_bind_body (tgt_bind);
+    }
+  else if (kind == GF_OMP_TARGET_KIND_DATA)
+    tgt_body = gimple_omp_body (stmt);
+  child_fn = ctx->cb.dst_fn;
+
+  push_gimplify_context ();
+
+  for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+    switch (OMP_CLAUSE_CODE (c))
+      {
+       tree var, x;
+
+      default:
+       break;
+      case OMP_CLAUSE_MAP:
+      case OMP_CLAUSE_TO:
+      case OMP_CLAUSE_FROM:
+       var = OMP_CLAUSE_DECL (c);
+       if (!DECL_P (var))
+         {
+           if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
+               || !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
+             map_cnt++;
+           continue;
+         }
+
+       if (DECL_SIZE (var)
+           && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
+         {
+           tree var2 = DECL_VALUE_EXPR (var);
+           gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
+           var2 = TREE_OPERAND (var2, 0);
+           gcc_assert (DECL_P (var2));
+           var = var2;
+         }
+
+       if (!maybe_lookup_field (var, ctx))
+         continue;
+
+       if (kind == GF_OMP_TARGET_KIND_REGION)
+         {
+           x = build_receiver_ref (var, true, ctx);
+           tree new_var = lookup_decl (var, ctx);
+           if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+               && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER
+               && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
+               && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
+             x = build_simple_mem_ref (x);
+           SET_DECL_VALUE_EXPR (new_var, x);
+           DECL_HAS_VALUE_EXPR_P (new_var) = 1;
+         }
+       map_cnt++;
+      }
+
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      target_nesting_level++;
+      lower_omp (&tgt_body, ctx);
+      target_nesting_level--;
+    }
+  else if (kind == GF_OMP_TARGET_KIND_DATA)
+    lower_omp (&tgt_body, ctx);
+
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      /* Declare all the variables created by mapping and the variables
+        declared in the scope of the target body.  */
+      record_vars_into (ctx->block_vars, child_fn);
+      record_vars_into (gimple_bind_vars (tgt_bind), child_fn);
+    }
+
+  olist = NULL;
+  ilist = NULL;
+  if (ctx->record_type)
+    {
+      ctx->sender_decl
+       = create_tmp_var (ctx->record_type, ".omp_data_arr");
+      DECL_NAMELESS (ctx->sender_decl) = 1;
+      TREE_ADDRESSABLE (ctx->sender_decl) = 1;
+      t = make_tree_vec (3);
+      TREE_VEC_ELT (t, 0) = ctx->sender_decl;
+      TREE_VEC_ELT (t, 1)
+       = create_tmp_var (build_array_type_nelts (size_type_node, map_cnt),
+                         ".omp_data_sizes");
+      DECL_NAMELESS (TREE_VEC_ELT (t, 1)) = 1;
+      TREE_ADDRESSABLE (TREE_VEC_ELT (t, 1)) = 1;
+      TREE_STATIC (TREE_VEC_ELT (t, 1)) = 1;
+      TREE_VEC_ELT (t, 2)
+       = create_tmp_var (build_array_type_nelts (unsigned_char_type_node,
+                                                 map_cnt),
+                         ".omp_data_kinds");
+      DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
+      TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
+      TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
+      gimple_omp_target_set_data_arg (stmt, t);
+
+      vec<constructor_elt, va_gc> *vsize;
+      vec<constructor_elt, va_gc> *vkind;
+      vec_alloc (vsize, map_cnt);
+      vec_alloc (vkind, map_cnt);
+      unsigned int map_idx = 0;
+
+      for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+       switch (OMP_CLAUSE_CODE (c))
+         {
+           tree ovar, nc;
+
+         default:
+           break;
+         case OMP_CLAUSE_MAP:
+         case OMP_CLAUSE_TO:
+         case OMP_CLAUSE_FROM:
+           nc = c;
+           ovar = OMP_CLAUSE_DECL (c);
+           if (!DECL_P (ovar))
+             {
+               if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+                   && OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
+                 {
+                   gcc_checking_assert (OMP_CLAUSE_DECL (OMP_CLAUSE_CHAIN (c))
+                                        == get_base_address (ovar));
+                   nc = OMP_CLAUSE_CHAIN (c);
+                   ovar = OMP_CLAUSE_DECL (nc);
+                 }
+               else
+                 {
+                   tree x = build_sender_ref (ovar, ctx);
+                   tree v
+                     = build_fold_addr_expr_with_type (ovar, ptr_type_node);
+                   gimplify_assign (x, v, &ilist);
+                   nc = NULL_TREE;
+                 }
+             }
+           else
+             {
+               if (DECL_SIZE (ovar)
+                   && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST)
+                 {
+                   tree ovar2 = DECL_VALUE_EXPR (ovar);
+                   gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
+                   ovar2 = TREE_OPERAND (ovar2, 0);
+                   gcc_assert (DECL_P (ovar2));
+                   ovar = ovar2;
+                 }
+               if (!maybe_lookup_field (ovar, ctx))
+                 continue;
+             }
+
+           if (nc)
+             {
+               tree var = lookup_decl_in_outer_ctx (ovar, ctx);
+               tree x = build_sender_ref (ovar, ctx);
+               if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+                   && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER
+                   && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
+                   && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE)
+                 {
+                   gcc_assert (kind == GF_OMP_TARGET_KIND_REGION);
+                   tree avar
+                     = create_tmp_var (TREE_TYPE (TREE_TYPE (x)), NULL);
+                   mark_addressable (avar);
+                   gimplify_assign (avar, build_fold_addr_expr (var), &ilist);
+                   avar = build_fold_addr_expr (avar);
+                   gimplify_assign (x, avar, &ilist);
+                 }
+               else if (is_gimple_reg (var))
+                 {
+                   gcc_assert (kind == GF_OMP_TARGET_KIND_REGION);
+                   tree avar = create_tmp_var (TREE_TYPE (var), NULL);
+                   mark_addressable (avar);
+                   if (OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_ALLOC
+                       && OMP_CLAUSE_MAP_KIND (c) != OMP_CLAUSE_MAP_FROM)
+                     gimplify_assign (avar, var, &ilist);
+                   avar = build_fold_addr_expr (avar);
+                   gimplify_assign (x, avar, &ilist);
+                   if ((OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_FROM
+                        || OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_TOFROM)
+                       && !TYPE_READONLY (TREE_TYPE (var)))
+                     {
+                       x = build_sender_ref (ovar, ctx);
+                       x = build_simple_mem_ref (x);
+                       gimplify_assign (var, x, &olist);
+                     }
+                 }
+               else
+                 {
+                   var = build_fold_addr_expr (var);
+                   gimplify_assign (x, var, &ilist);
+                 }
+             }
+           tree s = OMP_CLAUSE_SIZE (c);
+           if (s == NULL_TREE)
+             s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
+           s = fold_convert (size_type_node, s);
+           tree purpose = size_int (map_idx++);
+           CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
+           if (TREE_CODE (s) != INTEGER_CST)
+             TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
+
+           unsigned char tkind = 0;
+           switch (OMP_CLAUSE_CODE (c))
+             {
+             case OMP_CLAUSE_MAP:
+               tkind = OMP_CLAUSE_MAP_KIND (c);
+               break;
+             case OMP_CLAUSE_TO:
+               tkind = OMP_CLAUSE_MAP_TO;
+               break;
+             case OMP_CLAUSE_FROM:
+               tkind = OMP_CLAUSE_MAP_FROM;
+               break;
+             default:
+               gcc_unreachable ();
+             }
+           unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
+           if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
+             talign = DECL_ALIGN_UNIT (ovar);
+           talign = ceil_log2 (talign);
+           tkind |= talign << 3;
+           CONSTRUCTOR_APPEND_ELT (vkind, purpose,
+                                   build_int_cst (unsigned_char_type_node,
+                                                  tkind));
+           if (nc && nc != c)
+             c = nc;
+         }
+
+      gcc_assert (map_idx == map_cnt);
+
+      DECL_INITIAL (TREE_VEC_ELT (t, 1))
+       = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)), vsize);
+      DECL_INITIAL (TREE_VEC_ELT (t, 2))
+       = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), vkind);
+      if (!TREE_STATIC (TREE_VEC_ELT (t, 1)))
+       {
+         gimple_seq initlist = NULL;
+         force_gimple_operand (build1 (DECL_EXPR, void_type_node,
+                                       TREE_VEC_ELT (t, 1)),
+                               &initlist, true, NULL_TREE);
+         gimple_seq_add_seq (&ilist, initlist);
+
+         tree clobber = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)),
+                                           NULL);
+         TREE_THIS_VOLATILE (clobber) = 1;
+         gimple_seq_add_stmt (&olist,
+                              gimple_build_assign (TREE_VEC_ELT (t, 1),
+                                                   clobber));
+       }
+
+      tree clobber = build_constructor (ctx->record_type, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
+                                                       clobber));
+    }
+
+  /* Once all the expansions are done, sequence all the different
+     fragments inside gimple_omp_body.  */
+
+  new_body = NULL;
+
+  if (ctx->record_type && kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
+      /* fixup_child_record_type might have changed receiver_decl's type.  */
+      t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
+      gimple_seq_add_stmt (&new_body,
+                          gimple_build_assign (ctx->receiver_decl, t));
+    }
+
+  if (kind == GF_OMP_TARGET_KIND_REGION)
+    {
+      gimple_seq_add_seq (&new_body, tgt_body);
+      new_body = maybe_catch_exception (new_body);
+    }
+  else if (kind == GF_OMP_TARGET_KIND_DATA)
+    new_body = tgt_body;
+  if (kind != GF_OMP_TARGET_KIND_UPDATE)
+    {
+      gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
+      gimple_omp_set_body (stmt, new_body);
+    }
+
+  bind = gimple_build_bind (NULL, NULL,
+                           tgt_bind ? gimple_bind_block (tgt_bind)
+                                    : NULL_TREE);
   gsi_replace (gsi_p, bind, true);
   gimple_bind_add_seq (bind, ilist);
   gimple_bind_add_stmt (bind, stmt);
@@ -6774,6 +9867,71 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   pop_gimplify_context (NULL);
 }
 
+/* Expand code for an OpenMP teams directive.  */
+
+static void
+lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
+{
+  gimple teams_stmt = gsi_stmt (*gsi_p);
+  push_gimplify_context ();
+
+  tree block = make_node (BLOCK);
+  gimple bind = gimple_build_bind (NULL, NULL, block);
+  gsi_replace (gsi_p, bind, true);
+  gimple_seq bind_body = NULL;
+  gimple_seq dlist = NULL;
+  gimple_seq olist = NULL;
+
+  tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
+                                   OMP_CLAUSE_NUM_TEAMS);
+  if (num_teams == NULL_TREE)
+    num_teams = build_int_cst (unsigned_type_node, 0);
+  else
+    {
+      num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
+      num_teams = fold_convert (unsigned_type_node, num_teams);
+      gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
+    }
+  tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
+                                      OMP_CLAUSE_THREAD_LIMIT);
+  if (thread_limit == NULL_TREE)
+    thread_limit = build_int_cst (unsigned_type_node, 0);
+  else
+    {
+      thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
+      thread_limit = fold_convert (unsigned_type_node, thread_limit);
+      gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
+                    fb_rvalue);
+    }
+
+  lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
+                          &bind_body, &dlist, ctx, NULL);
+  lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
+  lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, ctx);
+  gimple_seq_add_stmt (&bind_body, teams_stmt);
+
+  location_t loc = gimple_location (teams_stmt);
+  tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
+  gimple call = gimple_build_call (decl, 2, num_teams, thread_limit);
+  gimple_set_location (call, loc);
+  gimple_seq_add_stmt (&bind_body, call);
+
+  gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
+  gimple_omp_set_body (teams_stmt, NULL);
+  gimple_seq_add_seq (&bind_body, olist);
+  gimple_seq_add_seq (&bind_body, dlist);
+  gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
+  gimple_bind_set_body (bind, bind_body);
+
+  pop_gimplify_context (bind);
+
+  gimple_bind_append_vars (bind, ctx->block_vars);
+  BLOCK_VARS (block) = ctx->block_vars;
+  if (BLOCK_VARS (block))
+    TREE_USED (block) = 1;
+}
+
+
 /* Callback for lower_omp_1.  Return non-NULL if *tp needs to be
    regimplified.  If DATA is non-NULL, lower_omp_1 is outside
    of OpenMP context, but with task_shared_vars set.  */
@@ -6843,22 +10001,32 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       lower_omp (gimple_try_eval_ptr (stmt), ctx);
       lower_omp (gimple_try_cleanup_ptr (stmt), ctx);
       break;
+    case GIMPLE_TRANSACTION:
+      lower_omp (gimple_transaction_body_ptr (stmt), ctx);
+      break;
     case GIMPLE_BIND:
       lower_omp (gimple_bind_body_ptr (stmt), ctx);
       break;
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       ctx = maybe_lookup_ctx (stmt);
+      gcc_assert (ctx);
+      if (ctx->cancellable)
+       ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
       lower_omp_taskreg (gsi_p, ctx);
       break;
     case GIMPLE_OMP_FOR:
       ctx = maybe_lookup_ctx (stmt);
       gcc_assert (ctx);
+      if (ctx->cancellable)
+       ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
       lower_omp_for (gsi_p, ctx);
       break;
     case GIMPLE_OMP_SECTIONS:
       ctx = maybe_lookup_ctx (stmt);
       gcc_assert (ctx);
+      if (ctx->cancellable)
+       ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
       lower_omp_sections (gsi_p, ctx);
       break;
     case GIMPLE_OMP_SINGLE:
@@ -6871,6 +10039,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       gcc_assert (ctx);
       lower_omp_master (gsi_p, ctx);
       break;
+    case GIMPLE_OMP_TASKGROUP:
+      ctx = maybe_lookup_ctx (stmt);
+      gcc_assert (ctx);
+      lower_omp_taskgroup (gsi_p, ctx);
+      break;
     case GIMPLE_OMP_ORDERED:
       ctx = maybe_lookup_ctx (stmt);
       gcc_assert (ctx);
@@ -6887,11 +10060,84 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
                        lower_omp_regimplify_p, ctx ? NULL : &wi, NULL))
        gimple_regimplify_operands (stmt, gsi_p);
       break;
+    case GIMPLE_OMP_TARGET:
+      ctx = maybe_lookup_ctx (stmt);
+      gcc_assert (ctx);
+      lower_omp_target (gsi_p, ctx);
+      break;
+    case GIMPLE_OMP_TEAMS:
+      ctx = maybe_lookup_ctx (stmt);
+      gcc_assert (ctx);
+      lower_omp_teams (gsi_p, ctx);
+      break;
+    case GIMPLE_CALL:
+      tree fndecl;
+      fndecl = gimple_call_fndecl (stmt);
+      if (fndecl
+         && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+       switch (DECL_FUNCTION_CODE (fndecl))
+         {
+         case BUILT_IN_GOMP_BARRIER:
+           if (ctx == NULL)
+             break;
+           /* FALLTHRU */
+         case BUILT_IN_GOMP_CANCEL:
+         case BUILT_IN_GOMP_CANCELLATION_POINT:
+           omp_context *cctx;
+           cctx = ctx;
+           if (gimple_code (cctx->stmt) == GIMPLE_OMP_SECTION)
+             cctx = cctx->outer;
+           gcc_assert (gimple_call_lhs (stmt) == NULL_TREE);
+           if (!cctx->cancellable)
+             {
+               if (DECL_FUNCTION_CODE (fndecl)
+                   == BUILT_IN_GOMP_CANCELLATION_POINT)
+                 {
+                   stmt = gimple_build_nop ();
+                   gsi_replace (gsi_p, stmt, false);
+                 }
+               break;
+             }
+           tree lhs;
+           lhs = create_tmp_var (boolean_type_node, NULL);
+           if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
+             {
+               fndecl = builtin_decl_explicit (BUILT_IN_GOMP_BARRIER_CANCEL);
+               gimple_call_set_fndecl (stmt, fndecl);
+               gimple_call_set_fntype (stmt, TREE_TYPE (fndecl));
+             }
+           gimple_call_set_lhs (stmt, lhs);
+           tree fallthru_label;
+           fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
+           gimple g;
+           g = gimple_build_label (fallthru_label);
+           gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
+           g = gimple_build_cond (NE_EXPR, lhs, boolean_false_node,
+                                  cctx->cancel_label, fallthru_label);
+           gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
+           break;
+         default:
+           break;
+         }
+      /* FALLTHRU */
     default:
       if ((ctx || task_shared_vars)
          && walk_gimple_op (stmt, lower_omp_regimplify_p,
                             ctx ? NULL : &wi))
-       gimple_regimplify_operands (stmt, gsi_p);
+       {
+         /* Just remove clobbers, this should happen only if we have
+            "privatized" local addressable variables in SIMD regions,
+            the clobber isn't needed in that case and gimplifying address
+            of the ARRAY_REF into a pointer and creating MEM_REF based
+            clobber would create worse code than we get with the clobber
+            dropped.  */
+         if (gimple_clobber_p (stmt))
+           {
+             gsi_replace (gsi_p, gimple_build_nop (), true);
+             break;
+           }
+         gimple_regimplify_operands (stmt, gsi_p);
+       }
       break;
     }
 }
@@ -6903,6 +10149,11 @@ lower_omp (gimple_seq *body, omp_context *ctx)
   gimple_stmt_iterator gsi;
   for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
     lower_omp_1 (&gsi, ctx);
+  /* During gimplification, we have not always invoked fold_stmt
+     (gimplify.c:maybe_fold_stmt); call it now.  */
+  if (target_nesting_level)
+    for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
+      fold_stmt (&gsi);
   input_location = saved_location;
 }
 \f
@@ -6915,7 +10166,7 @@ execute_lower_omp (void)
 
   /* This pass always runs, to provide PROP_gimple_lomp.
      But there is nothing to do unless -fopenmp is given.  */
-  if (flag_openmp == 0)
+  if (flag_openmp == 0 && flag_openmp_simd == 0 && flag_cilkplus == 0)
     return 0;
 
   all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
@@ -6927,10 +10178,8 @@ execute_lower_omp (void)
 
   if (all_contexts->root)
     {
-      struct gimplify_ctx gctx;
-
       if (task_shared_vars)
-       push_gimplify_context (&gctx);
+       push_gimplify_context ();
       lower_omp (&body, NULL);
       if (task_shared_vars)
        pop_gimplify_context (NULL);
@@ -6945,24 +10194,42 @@ execute_lower_omp (void)
   return 0;
 }
 
-struct gimple_opt_pass pass_lower_omp =
-{
- {
-  GIMPLE_PASS,
-  "omplower",                          /* name */
-  NULL,                                        /* gate */
-  execute_lower_omp,                   /* execute */
-  NULL,                                        /* sub */
-  NULL,                                        /* next */
-  0,                                   /* static_pass_number */
-  TV_NONE,                             /* tv_id */
-  PROP_gimple_any,                     /* properties_required */
-  PROP_gimple_lomp,                    /* properties_provided */
-  0,                                   /* properties_destroyed */
-  0,                                   /* todo_flags_start */
-  0                                     /* todo_flags_finish */
- }
+namespace {
+
+const pass_data pass_data_lower_omp =
+{
+  GIMPLE_PASS, /* type */
+  "omplower", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  PROP_gimple_any, /* properties_required */
+  PROP_gimple_lomp, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
 };
+
+class pass_lower_omp : public gimple_opt_pass
+{
+public:
+  pass_lower_omp (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_lower_omp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return execute_lower_omp (); }
+
+}; // class pass_lower_omp
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_lower_omp (gcc::context *ctxt)
+{
+  return new pass_lower_omp (ctxt);
+}
 \f
 /* The following is a utility to diagnose OpenMP structured block violations.
    It is not part of the "omplower" pass, as that's invoked too late.  It
@@ -7017,12 +10284,34 @@ diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
     error ("invalid entry to OpenMP structured block");
 #endif
 
+  bool cilkplus_block = false;
+  if (flag_cilkplus)
+    {
+      if ((branch_ctx
+          && gimple_code (branch_ctx) == GIMPLE_OMP_FOR
+          && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
+         || (label_ctx
+             && gimple_code (label_ctx) == GIMPLE_OMP_FOR
+             && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
+       cilkplus_block = true;
+    }
+
   /* If it's obvious we have an invalid entry, be specific about the error.  */
   if (branch_ctx == NULL)
-    error ("invalid entry to OpenMP structured block");
+    {
+      if (cilkplus_block)
+       error ("invalid entry to Cilk Plus structured block");
+      else
+       error ("invalid entry to OpenMP structured block");
+    }
   else
-    /* Otherwise, be vague and lazy, but efficient.  */
-    error ("invalid branch to/from an OpenMP structured block");
+    {
+      /* Otherwise, be vague and lazy, but efficient.  */
+      if (cilkplus_block)
+       error ("invalid branch to/from a Cilk Plus structured block");
+      else
+       error ("invalid branch to/from an OpenMP structured block");
+    }
 
   gsi_replace (gsi_p, gimple_build_nop (), false);
   return true;
@@ -7053,6 +10342,9 @@ diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
     case GIMPLE_OMP_MASTER:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
+    case GIMPLE_OMP_TARGET:
+    case GIMPLE_OMP_TEAMS:
+    case GIMPLE_OMP_TASKGROUP:
       /* The minimal context here is just the current OMP construct.  */
       inner_context = stmt;
       wi->info = inner_context;
@@ -7108,6 +10400,9 @@ diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
     case GIMPLE_OMP_MASTER:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
+    case GIMPLE_OMP_TARGET:
+    case GIMPLE_OMP_TEAMS:
+    case GIMPLE_OMP_TASKGROUP:
       wi->info = stmt;
       walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
       wi->info = context;
@@ -7179,6 +10474,128 @@ diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
   return NULL_TREE;
 }
 
+/* Called from tree-cfg.c::make_edges to create cfg edges for all GIMPLE_OMP
+   codes.  */
+bool
+make_gimple_omp_edges (basic_block bb, struct omp_region **region,
+                      int *region_idx)
+{
+  gimple last = last_stmt (bb);
+  enum gimple_code code = gimple_code (last);
+  struct omp_region *cur_region = *region;
+  bool fallthru = false;
+
+  switch (code)
+    {
+    case GIMPLE_OMP_PARALLEL:
+    case GIMPLE_OMP_TASK:
+    case GIMPLE_OMP_FOR:
+    case GIMPLE_OMP_SINGLE:
+    case GIMPLE_OMP_TEAMS:
+    case GIMPLE_OMP_MASTER:
+    case GIMPLE_OMP_TASKGROUP:
+    case GIMPLE_OMP_ORDERED:
+    case GIMPLE_OMP_CRITICAL:
+    case GIMPLE_OMP_SECTION:
+      cur_region = new_omp_region (bb, code, cur_region);
+      fallthru = true;
+      break;
+
+    case GIMPLE_OMP_TARGET:
+      cur_region = new_omp_region (bb, code, cur_region);
+      fallthru = true;
+      if (gimple_omp_target_kind (last) == GF_OMP_TARGET_KIND_UPDATE)
+       cur_region = cur_region->outer;
+      break;
+
+    case GIMPLE_OMP_SECTIONS:
+      cur_region = new_omp_region (bb, code, cur_region);
+      fallthru = true;
+      break;
+
+    case GIMPLE_OMP_SECTIONS_SWITCH:
+      fallthru = false;
+      break;
+
+    case GIMPLE_OMP_ATOMIC_LOAD:
+    case GIMPLE_OMP_ATOMIC_STORE:
+       fallthru = true;
+       break;
+
+    case GIMPLE_OMP_RETURN:
+      /* In the case of a GIMPLE_OMP_SECTION, the edge will go
+        somewhere other than the next block.  This will be
+        created later.  */
+      cur_region->exit = bb;
+      fallthru = cur_region->type != GIMPLE_OMP_SECTION;
+      cur_region = cur_region->outer;
+      break;
+
+    case GIMPLE_OMP_CONTINUE:
+      cur_region->cont = bb;
+      switch (cur_region->type)
+       {
+       case GIMPLE_OMP_FOR:
+         /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
+            succs edges as abnormal to prevent splitting
+            them.  */
+         single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
+         /* Make the loopback edge.  */
+         make_edge (bb, single_succ (cur_region->entry),
+                    EDGE_ABNORMAL);
+
+         /* Create an edge from GIMPLE_OMP_FOR to exit, which
+            corresponds to the case that the body of the loop
+            is not executed at all.  */
+         make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
+         make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
+         fallthru = false;
+         break;
+
+       case GIMPLE_OMP_SECTIONS:
+         /* Wire up the edges into and out of the nested sections.  */
+         {
+           basic_block switch_bb = single_succ (cur_region->entry);
+
+           struct omp_region *i;
+           for (i = cur_region->inner; i ; i = i->next)
+             {
+               gcc_assert (i->type == GIMPLE_OMP_SECTION);
+               make_edge (switch_bb, i->entry, 0);
+               make_edge (i->exit, bb, EDGE_FALLTHRU);
+             }
+
+           /* Make the loopback edge to the block with
+              GIMPLE_OMP_SECTIONS_SWITCH.  */
+           make_edge (bb, switch_bb, 0);
+
+           /* Make the edge from the switch to exit.  */
+           make_edge (switch_bb, bb->next_bb, 0);
+           fallthru = false;
+         }
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (*region != cur_region)
+    {
+      *region = cur_region;
+      if (cur_region)
+       *region_idx = cur_region->entry->index;
+      else
+       *region_idx = 0;
+    }
+
+  return fallthru;
+}
+
 static unsigned int
 diagnose_omp_structured_block_errors (void)
 {
@@ -7205,26 +10622,1212 @@ diagnose_omp_structured_block_errors (void)
 static bool
 gate_diagnose_omp_blocks (void)
 {
-  return flag_openmp != 0;
-}
-
-struct gimple_opt_pass pass_diagnose_omp_blocks =
-{
-  {
-    GIMPLE_PASS,
-    "*diagnose_omp_blocks",            /* name */
-    gate_diagnose_omp_blocks,          /* gate */
-    diagnose_omp_structured_block_errors,      /* execute */
-    NULL,                              /* sub */
-    NULL,                              /* next */
-    0,                                 /* static_pass_number */
-    TV_NONE,                           /* tv_id */
-    PROP_gimple_any,                   /* properties_required */
-    0,                                 /* properties_provided */
-    0,                                 /* properties_destroyed */
-    0,                                 /* todo_flags_start */
-    0,                                 /* todo_flags_finish */
+  return flag_openmp || flag_cilkplus;
+}
+
+namespace {
+
+const pass_data pass_data_diagnose_omp_blocks =
+{
+  GIMPLE_PASS, /* type */
+  "*diagnose_omp_blocks", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  PROP_gimple_any, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_diagnose_omp_blocks : public gimple_opt_pass
+{
+public:
+  pass_diagnose_omp_blocks (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_diagnose_omp_blocks, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_diagnose_omp_blocks (); }
+  unsigned int execute () {
+    return diagnose_omp_structured_block_errors ();
   }
+
+}; // class pass_diagnose_omp_blocks
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_diagnose_omp_blocks (gcc::context *ctxt)
+{
+  return new pass_diagnose_omp_blocks (ctxt);
+}
+\f
+/* SIMD clone supporting code.  */
+
+/* Allocate a fresh `simd_clone' and return it.  NARGS is the number
+   of arguments to reserve space for.  */
+
+static struct cgraph_simd_clone *
+simd_clone_struct_alloc (int nargs)
+{
+  struct cgraph_simd_clone *clone_info;
+  size_t len = (sizeof (struct cgraph_simd_clone)
+               + nargs * sizeof (struct cgraph_simd_clone_arg));
+  clone_info = (struct cgraph_simd_clone *)
+              ggc_internal_cleared_alloc (len);
+  return clone_info;
+}
+
+/* Make a copy of the `struct cgraph_simd_clone' in FROM to TO.  */
+
+static inline void
+simd_clone_struct_copy (struct cgraph_simd_clone *to,
+                       struct cgraph_simd_clone *from)
+{
+  memcpy (to, from, (sizeof (struct cgraph_simd_clone)
+                    + ((from->nargs - from->inbranch)
+                       * sizeof (struct cgraph_simd_clone_arg))));
+}
+
+/* Return vector of parameter types of function FNDECL.  This uses
+   TYPE_ARG_TYPES if available, otherwise falls back to types of
+   DECL_ARGUMENTS types.  */
+
+vec<tree>
+simd_clone_vector_of_formal_parm_types (tree fndecl)
+{
+  if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
+    return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl));
+  vec<tree> args = ipa_get_vector_of_formal_parms (fndecl);
+  unsigned int i;
+  tree arg;
+  FOR_EACH_VEC_ELT (args, i, arg)
+    args[i] = TREE_TYPE (args[i]);
+  return args;
+}
+
+/* Given a simd function in NODE, extract the simd specific
+   information from the OMP clauses passed in CLAUSES, and return
+   the struct cgraph_simd_clone * if it should be cloned.  *INBRANCH_SPECIFIED
+   is set to TRUE if the `inbranch' or `notinbranch' clause specified,
+   otherwise set to FALSE.  */
+
+static struct cgraph_simd_clone *
+simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
+                           bool *inbranch_specified)
+{
+  vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl);
+  tree t;
+  int n;
+  *inbranch_specified = false;
+
+  n = args.length ();
+  if (n > 0 && args.last () == void_type_node)
+    n--;
+
+  /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
+     be cloned have a distinctive artificial label in addition to "omp
+     declare simd".  */
+  bool cilk_clone
+    = (flag_cilkplus
+       && lookup_attribute ("cilk simd function",
+                           DECL_ATTRIBUTES (node->decl)));
+
+  /* Allocate one more than needed just in case this is an in-branch
+     clone which will require a mask argument.  */
+  struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+  clone_info->nargs = n;
+  clone_info->cilk_elemental = cilk_clone;
+
+  if (!clauses)
+    {
+      args.release ();
+      return clone_info;
+    }
+  clauses = TREE_VALUE (clauses);
+  if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
+    return clone_info;
+
+  for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
+    {
+      switch (OMP_CLAUSE_CODE (t))
+       {
+       case OMP_CLAUSE_INBRANCH:
+         clone_info->inbranch = 1;
+         *inbranch_specified = true;
+         break;
+       case OMP_CLAUSE_NOTINBRANCH:
+         clone_info->inbranch = 0;
+         *inbranch_specified = true;
+         break;
+       case OMP_CLAUSE_SIMDLEN:
+         clone_info->simdlen
+           = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
+         break;
+       case OMP_CLAUSE_LINEAR:
+         {
+           tree decl = OMP_CLAUSE_DECL (t);
+           tree step = OMP_CLAUSE_LINEAR_STEP (t);
+           int argno = TREE_INT_CST_LOW (decl);
+           if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
+             {
+               clone_info->args[argno].arg_type
+                 = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
+               clone_info->args[argno].linear_step = tree_to_shwi (step);
+               gcc_assert (clone_info->args[argno].linear_step >= 0
+                           && clone_info->args[argno].linear_step < n);
+             }
+           else
+             {
+               if (POINTER_TYPE_P (args[argno]))
+                 step = fold_convert (ssizetype, step);
+               if (!tree_fits_shwi_p (step))
+                 {
+                   warning_at (OMP_CLAUSE_LOCATION (t), 0,
+                               "ignoring large linear step");
+                   args.release ();
+                   return NULL;
+                 }
+               else if (integer_zerop (step))
+                 {
+                   warning_at (OMP_CLAUSE_LOCATION (t), 0,
+                               "ignoring zero linear step");
+                   args.release ();
+                   return NULL;
+                 }
+               else
+                 {
+                   clone_info->args[argno].arg_type
+                     = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
+                   clone_info->args[argno].linear_step = tree_to_shwi (step);
+                 }
+             }
+           break;
+         }
+       case OMP_CLAUSE_UNIFORM:
+         {
+           tree decl = OMP_CLAUSE_DECL (t);
+           int argno = tree_to_uhwi (decl);
+           clone_info->args[argno].arg_type
+             = SIMD_CLONE_ARG_TYPE_UNIFORM;
+           break;
+         }
+       case OMP_CLAUSE_ALIGNED:
+         {
+           tree decl = OMP_CLAUSE_DECL (t);
+           int argno = tree_to_uhwi (decl);
+           clone_info->args[argno].alignment
+             = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
+           break;
+         }
+       default:
+         break;
+       }
+    }
+  args.release ();
+  return clone_info;
+}
+
+/* Given a SIMD clone in NODE, calculate the characteristic data
+   type and return the coresponding type.  The characteristic data
+   type is computed as described in the Intel Vector ABI.  */
+
+static tree
+simd_clone_compute_base_data_type (struct cgraph_node *node,
+                                  struct cgraph_simd_clone *clone_info)
+{
+  tree type = integer_type_node;
+  tree fndecl = node->decl;
+
+  /* a) For non-void function, the characteristic data type is the
+        return type.  */
+  if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
+    type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  /* b) If the function has any non-uniform, non-linear parameters,
+        then the characteristic data type is the type of the first
+        such parameter.  */
+  else
+    {
+      vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl);
+      for (unsigned int i = 0; i < clone_info->nargs; ++i)
+       if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+         {
+           type = map[i];
+           break;
+         }
+      map.release ();
+    }
+
+  /* c) If the characteristic data type determined by a) or b) above
+        is struct, union, or class type which is pass-by-value (except
+        for the type that maps to the built-in complex data type), the
+        characteristic data type is int.  */
+  if (RECORD_OR_UNION_TYPE_P (type)
+      && !aggregate_value_p (type, NULL)
+      && TREE_CODE (type) != COMPLEX_TYPE)
+    return integer_type_node;
+
+  /* d) If none of the above three classes is applicable, the
+        characteristic data type is int.  */
+
+  return type;
+
+  /* e) For Intel Xeon Phi native and offload compilation, if the
+        resulting characteristic data type is 8-bit or 16-bit integer
+        data type, the characteristic data type is int.  */
+  /* Well, we don't handle Xeon Phi yet.  */
+}
+
+static tree
+simd_clone_mangle (struct cgraph_node *node,
+                  struct cgraph_simd_clone *clone_info)
+{
+  char vecsize_mangle = clone_info->vecsize_mangle;
+  char mask = clone_info->inbranch ? 'M' : 'N';
+  unsigned int simdlen = clone_info->simdlen;
+  unsigned int n;
+  pretty_printer pp;
+
+  gcc_assert (vecsize_mangle && simdlen);
+
+  pp_string (&pp, "_ZGV");
+  pp_character (&pp, vecsize_mangle);
+  pp_character (&pp, mask);
+  pp_decimal_int (&pp, simdlen);
+
+  for (n = 0; n < clone_info->nargs; ++n)
+    {
+      struct cgraph_simd_clone_arg arg = clone_info->args[n];
+
+      if (arg.arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
+       pp_character (&pp, 'u');
+      else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+       {
+         gcc_assert (arg.linear_step != 0);
+         pp_character (&pp, 'l');
+         if (arg.linear_step > 1)
+           pp_unsigned_wide_integer (&pp, arg.linear_step);
+         else if (arg.linear_step < 0)
+           {
+             pp_character (&pp, 'n');
+             pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
+                                             arg.linear_step));
+           }
+       }
+      else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
+       {
+         pp_character (&pp, 's');
+         pp_unsigned_wide_integer (&pp, arg.linear_step);
+       }
+      else
+       pp_character (&pp, 'v');
+      if (arg.alignment)
+       {
+         pp_character (&pp, 'a');
+         pp_decimal_int (&pp, arg.alignment);
+       }
+    }
+
+  pp_underscore (&pp);
+  pp_string (&pp,
+            IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
+  const char *str = pp_formatted_text (&pp);
+
+  /* If there already is a SIMD clone with the same mangled name, don't
+     add another one.  This can happen e.g. for
+     #pragma omp declare simd
+     #pragma omp declare simd simdlen(8)
+     int foo (int, int);
+     if the simdlen is assumed to be 8 for the first one, etc.  */
+  for (struct cgraph_node *clone = node->simd_clones; clone;
+       clone = clone->simdclone->next_clone)
+    if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
+               str) == 0)
+      return NULL_TREE;
+
+  return get_identifier (str);
+}
+
+/* Create a simd clone of OLD_NODE and return it.  */
+
+static struct cgraph_node *
+simd_clone_create (struct cgraph_node *old_node)
+{
+  struct cgraph_node *new_node;
+  if (old_node->definition)
+    {
+      if (!cgraph_function_with_gimple_body_p (old_node))
+       return NULL;
+      cgraph_get_body (old_node);
+      new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL,
+                                            false, NULL, NULL, "simdclone");
+    }
+  else
+    {
+      tree old_decl = old_node->decl;
+      tree new_decl = copy_node (old_node->decl);
+      DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone");
+      SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
+      SET_DECL_RTL (new_decl, NULL);
+      DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
+      DECL_STATIC_DESTRUCTOR (new_decl) = 0;
+      new_node
+       = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
+      cgraph_call_function_insertion_hooks (new_node);
+    }
+  if (new_node == NULL)
+    return new_node;
+
+  TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+
+  /* The function cgraph_function_versioning () will force the new
+     symbol local.  Undo this, and inherit external visability from
+     the old node.  */
+  new_node->local.local = old_node->local.local;
+  new_node->externally_visible = old_node->externally_visible;
+
+  return new_node;
+}
+
+/* Adjust the return type of the given function to its appropriate
+   vector counterpart.  Returns a simd array to be used throughout the
+   function as a return value.  */
+
+static tree
+simd_clone_adjust_return_type (struct cgraph_node *node)
+{
+  tree fndecl = node->decl;
+  tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
+  unsigned int veclen;
+  tree t;
+
+  /* Adjust the function return type.  */
+  if (orig_rettype == void_type_node)
+    return NULL_TREE;
+  TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl));
+  if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl)))
+      || POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl))))
+    veclen = node->simdclone->vecsize_int;
+  else
+    veclen = node->simdclone->vecsize_float;
+  veclen /= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))));
+  if (veclen > node->simdclone->simdlen)
+    veclen = node->simdclone->simdlen;
+  if (veclen == node->simdclone->simdlen)
+    TREE_TYPE (TREE_TYPE (fndecl))
+      = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)),
+                          node->simdclone->simdlen);
+  else
+    {
+      t = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)), veclen);
+      t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
+      TREE_TYPE (TREE_TYPE (fndecl)) = t;
+    }
+  if (!node->definition)
+    return NULL_TREE;
+
+  t = DECL_RESULT (fndecl);
+  /* Adjust the DECL_RESULT.  */
+  gcc_assert (TREE_TYPE (t) != void_type_node);
+  TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
+  relayout_decl (t);
+
+  tree atype = build_array_type_nelts (orig_rettype,
+                                      node->simdclone->simdlen);
+  if (veclen != node->simdclone->simdlen)
+    return build1 (VIEW_CONVERT_EXPR, atype, t);
+
+  /* Set up a SIMD array to use as the return value.  */
+  tree retval = create_tmp_var_raw (atype, "retval");
+  gimple_add_tmp_var (retval);
+  return retval;
+}
+
+/* Each vector argument has a corresponding array to be used locally
+   as part of the eventual loop.  Create such temporary array and
+   return it.
+
+   PREFIX is the prefix to be used for the temporary.
+
+   TYPE is the inner element type.
+
+   SIMDLEN is the number of elements.  */
+
+static tree
+create_tmp_simd_array (const char *prefix, tree type, int simdlen)
+{
+  tree atype = build_array_type_nelts (type, simdlen);
+  tree avar = create_tmp_var_raw (atype, prefix);
+  gimple_add_tmp_var (avar);
+  return avar;
+}
+
+/* Modify the function argument types to their corresponding vector
+   counterparts if appropriate.  Also, create one array for each simd
+   argument to be used locally when using the function arguments as
+   part of the loop.
+
+   NODE is the function whose arguments are to be adjusted.
+
+   Returns an adjustment vector that will be filled describing how the
+   argument types will be adjusted.  */
+
+static ipa_parm_adjustment_vec
+simd_clone_adjust_argument_types (struct cgraph_node *node)
+{
+  vec<tree> args;
+  ipa_parm_adjustment_vec adjustments;
+
+  if (node->definition)
+    args = ipa_get_vector_of_formal_parms (node->decl);
+  else
+    args = simd_clone_vector_of_formal_parm_types (node->decl);
+  adjustments.create (args.length ());
+  unsigned i, j, veclen;
+  struct ipa_parm_adjustment adj;
+  for (i = 0; i < node->simdclone->nargs; ++i)
+    {
+      memset (&adj, 0, sizeof (adj));
+      tree parm = args[i];
+      tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
+      adj.base_index = i;
+      adj.base = parm;
+
+      node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
+      node->simdclone->args[i].orig_type = parm_type;
+
+      if (node->simdclone->args[i].arg_type != SIMD_CLONE_ARG_TYPE_VECTOR)
+       {
+         /* No adjustment necessary for scalar arguments.  */
+         adj.op = IPA_PARM_OP_COPY;
+       }
+      else
+       {
+         if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
+           veclen = node->simdclone->vecsize_int;
+         else
+           veclen = node->simdclone->vecsize_float;
+         veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
+         if (veclen > node->simdclone->simdlen)
+           veclen = node->simdclone->simdlen;
+         adj.arg_prefix = "simd";
+         adj.type = build_vector_type (parm_type, veclen);
+         node->simdclone->args[i].vector_type = adj.type;
+         for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+           {
+             adjustments.safe_push (adj);
+             if (j == veclen)
+               {
+                 memset (&adj, 0, sizeof (adj));
+                 adj.op = IPA_PARM_OP_NEW;
+                 adj.arg_prefix = "simd";
+                 adj.base_index = i;
+                 adj.type = node->simdclone->args[i].vector_type;
+               }
+           }
+
+         if (node->definition)
+           node->simdclone->args[i].simd_array
+             = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
+                                      parm_type, node->simdclone->simdlen);
+       }
+      adjustments.safe_push (adj);
+    }
+
+  if (node->simdclone->inbranch)
+    {
+      tree base_type
+       = simd_clone_compute_base_data_type (node->simdclone->origin,
+                                            node->simdclone);
+
+      memset (&adj, 0, sizeof (adj));
+      adj.op = IPA_PARM_OP_NEW;
+      adj.arg_prefix = "mask";
+
+      adj.base_index = i;
+      if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
+       veclen = node->simdclone->vecsize_int;
+      else
+       veclen = node->simdclone->vecsize_float;
+      veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+      if (veclen > node->simdclone->simdlen)
+       veclen = node->simdclone->simdlen;
+      adj.type = build_vector_type (base_type, veclen);
+      adjustments.safe_push (adj);
+
+      for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+       adjustments.safe_push (adj);
+
+      /* We have previously allocated one extra entry for the mask.  Use
+        it and fill it.  */
+      struct cgraph_simd_clone *sc = node->simdclone;
+      sc->nargs++;
+      if (node->definition)
+       {
+         sc->args[i].orig_arg
+           = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
+         sc->args[i].simd_array
+           = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+       }
+      sc->args[i].orig_type = base_type;
+      sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
+    }
+
+  if (node->definition)
+    ipa_modify_formal_parameters (node->decl, adjustments);
+  else
+    {
+      tree new_arg_types = NULL_TREE, new_reversed;
+      bool last_parm_void = false;
+      if (args.length () > 0 && args.last () == void_type_node)
+       last_parm_void = true;
+
+      gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
+      j = adjustments.length ();
+      for (i = 0; i < j; i++)
+       {
+         struct ipa_parm_adjustment *adj = &adjustments[i];
+         tree ptype;
+         if (adj->op == IPA_PARM_OP_COPY)
+           ptype = args[adj->base_index];
+         else
+           ptype = adj->type;
+         new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
+       }
+      new_reversed = nreverse (new_arg_types);
+      if (last_parm_void)
+       {
+         if (new_reversed)
+           TREE_CHAIN (new_arg_types) = void_list_node;
+         else
+           new_reversed = void_list_node;
+       }
+
+      tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl));
+      TYPE_ARG_TYPES (new_type) = new_reversed;
+      TREE_TYPE (node->decl) = new_type;
+
+      adjustments.release ();
+    }
+  args.release ();
+  return adjustments;
+}
+
+/* Initialize and copy the function arguments in NODE to their
+   corresponding local simd arrays.  Returns a fresh gimple_seq with
+   the instruction sequence generated.  */
+
+static gimple_seq
+simd_clone_init_simd_arrays (struct cgraph_node *node,
+                            ipa_parm_adjustment_vec adjustments)
+{
+  gimple_seq seq = NULL;
+  unsigned i = 0, j = 0, k;
+
+  for (tree arg = DECL_ARGUMENTS (node->decl);
+       arg;
+       arg = DECL_CHAIN (arg), i++, j++)
+    {
+      if (adjustments[j].op == IPA_PARM_OP_COPY)
+       continue;
+
+      node->simdclone->args[i].vector_arg = arg;
+
+      tree array = node->simdclone->args[i].simd_array;
+      if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
+       {
+         tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+         tree ptr = build_fold_addr_expr (array);
+         tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+                          build_int_cst (ptype, 0));
+         t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+         gimplify_and_add (t, &seq);
+       }
+      else
+       {
+         unsigned int simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
+         tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+         for (k = 0; k < node->simdclone->simdlen; k += simdlen)
+           {
+             tree ptr = build_fold_addr_expr (array);
+             int elemsize;
+             if (k)
+               {
+                 arg = DECL_CHAIN (arg);
+                 j++;
+               }
+             elemsize
+               = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
+             tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+                              build_int_cst (ptype, k * elemsize));
+             t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+             gimplify_and_add (t, &seq);
+           }
+       }
+    }
+  return seq;
+}
+
+/* Callback info for ipa_simd_modify_stmt_ops below.  */
+
+struct modify_stmt_info {
+  ipa_parm_adjustment_vec adjustments;
+  gimple stmt;
+  /* True if the parent statement was modified by
+     ipa_simd_modify_stmt_ops.  */
+  bool modified;
+};
+
+/* Callback for walk_gimple_op.
+
+   Adjust operands from a given statement as specified in the
+   adjustments vector in the callback data.  */
+
+static tree
+ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
+{
+  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
+  if (!SSA_VAR_P (*tp))
+    {
+      /* Make sure we treat subtrees as a RHS.  This makes sure that
+        when examining the `*foo' in *foo=x, the `foo' get treated as
+        a use properly.  */
+      wi->is_lhs = false;
+      wi->val_only = true;
+      if (TYPE_P (*tp))
+       *walk_subtrees = 0;
+      return NULL_TREE;
+    }
+  struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
+  struct ipa_parm_adjustment *cand
+    = ipa_get_adjustment_candidate (&tp, NULL, info->adjustments, true);
+  if (!cand)
+    return NULL_TREE;
+
+  tree t = *tp;
+  tree repl = make_ssa_name (TREE_TYPE (t), NULL);
+
+  gimple stmt;
+  gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+  if (wi->is_lhs)
+    {
+      stmt = gimple_build_assign (unshare_expr (cand->new_decl), repl);
+      gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+      SSA_NAME_DEF_STMT (repl) = info->stmt;
+    }
+  else
+    {
+      /* You'd think we could skip the extra SSA variable when
+        wi->val_only=true, but we may have `*var' which will get
+        replaced into `*var_array[iter]' and will likely be something
+        not gimple.  */
+      stmt = gimple_build_assign (repl, unshare_expr (cand->new_decl));
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+    }
+
+  if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
+    {
+      tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
+      *tp = vce;
+    }
+  else
+    *tp = repl;
+
+  info->modified = true;
+  wi->is_lhs = false;
+  wi->val_only = true;
+  return NULL_TREE;
+}
+
+/* Traverse the function body and perform all modifications as
+   described in ADJUSTMENTS.  At function return, ADJUSTMENTS will be
+   modified such that the replacement/reduction value will now be an
+   offset into the corresponding simd_array.
+
+   This function will replace all function argument uses with their
+   corresponding simd array elements, and ajust the return values
+   accordingly.  */
+
+static void
+ipa_simd_modify_function_body (struct cgraph_node *node,
+                              ipa_parm_adjustment_vec adjustments,
+                              tree retval_array, tree iter)
+{
+  basic_block bb;
+  unsigned int i, j;
+
+  /* Re-use the adjustments array, but this time use it to replace
+     every function argument use to an offset into the corresponding
+     simd_array.  */
+  for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
+    {
+      if (!node->simdclone->args[i].vector_arg)
+       continue;
+
+      tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
+      tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
+      adjustments[j].new_decl
+       = build4 (ARRAY_REF,
+                 basetype,
+                 node->simdclone->args[i].simd_array,
+                 iter,
+                 NULL_TREE, NULL_TREE);
+      if (adjustments[j].op == IPA_PARM_OP_NONE
+         && TYPE_VECTOR_SUBPARTS (vectype) < node->simdclone->simdlen)
+       j += node->simdclone->simdlen / TYPE_VECTOR_SUBPARTS (vectype) - 1;
+    }
+
+  struct modify_stmt_info info;
+  info.adjustments = adjustments;
+
+  FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
+    {
+      gimple_stmt_iterator gsi;
+
+      gsi = gsi_start_bb (bb);
+      while (!gsi_end_p (gsi))
+       {
+         gimple stmt = gsi_stmt (gsi);
+         info.stmt = stmt;
+         struct walk_stmt_info wi;
+
+         memset (&wi, 0, sizeof (wi));
+         info.modified = false;
+         wi.info = &info;
+         walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
+
+         if (gimple_code (stmt) == GIMPLE_RETURN)
+           {
+             tree retval = gimple_return_retval (stmt);
+             if (!retval)
+               {
+                 gsi_remove (&gsi, true);
+                 continue;
+               }
+
+             /* Replace `return foo' with `retval_array[iter] = foo'.  */
+             tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
+                                retval_array, iter, NULL, NULL);
+             stmt = gimple_build_assign (ref, retval);
+             gsi_replace (&gsi, stmt, true);
+             info.modified = true;
+           }
+
+         if (info.modified)
+           {
+             update_stmt (stmt);
+             if (maybe_clean_eh_stmt (stmt))
+               gimple_purge_dead_eh_edges (gimple_bb (stmt));
+           }
+         gsi_next (&gsi);
+       }
+    }
+}
+
+/* Adjust the argument types in NODE to their appropriate vector
+   counterparts.  */
+
+static void
+simd_clone_adjust (struct cgraph_node *node)
+{
+  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+
+  targetm.simd_clone.adjust (node);
+
+  tree retval = simd_clone_adjust_return_type (node);
+  ipa_parm_adjustment_vec adjustments
+    = simd_clone_adjust_argument_types (node);
+
+  push_gimplify_context ();
+
+  gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
+
+  /* Adjust all uses of vector arguments accordingly.  Adjust all
+     return values accordingly.  */
+  tree iter = create_tmp_var (unsigned_type_node, "iter");
+  tree iter1 = make_ssa_name (iter, NULL);
+  tree iter2 = make_ssa_name (iter, NULL);
+  ipa_simd_modify_function_body (node, adjustments, retval, iter1);
+
+  /* Initialize the iteration variable.  */
+  basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  basic_block body_bb = split_block_after_labels (entry_bb)->dest;
+  gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
+  /* Insert the SIMD array and iv initialization at function
+     entry.  */
+  gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
+
+  pop_gimplify_context (NULL);
+
+  /* Create a new BB right before the original exit BB, to hold the
+     iteration increment and the condition/branch.  */
+  basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
+  basic_block incr_bb = create_empty_bb (orig_exit);
+  /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
+     flag.  Set it now to be a FALLTHRU_EDGE.  */
+  gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
+  EDGE_SUCC (orig_exit, 0)->flags |= EDGE_FALLTHRU;
+  for (unsigned i = 0;
+       i < EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); ++i)
+    {
+      edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i);
+      redirect_edge_succ (e, incr_bb);
+    }
+  edge e = make_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+  e->probability = REG_BR_PROB_BASE;
+  gsi = gsi_last_bb (incr_bb);
+  gimple g = gimple_build_assign_with_ops (PLUS_EXPR, iter2, iter1,
+                                          build_int_cst (unsigned_type_node,
+                                                         1));
+  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+
+  /* Mostly annotate the loop for the vectorizer (the rest is done below).  */
+  struct loop *loop = alloc_loop ();
+  cfun->has_force_vect_loops = true;
+  loop->safelen = node->simdclone->simdlen;
+  loop->force_vect = true;
+  loop->header = body_bb;
+  add_bb_to_loop (incr_bb, loop);
+
+  /* Branch around the body if the mask applies.  */
+  if (node->simdclone->inbranch)
+    {
+      gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
+      tree mask_array
+       = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
+      tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)), NULL);
+      tree aref = build4 (ARRAY_REF,
+                         TREE_TYPE (TREE_TYPE (mask_array)),
+                         mask_array, iter1,
+                         NULL, NULL);
+      g = gimple_build_assign (mask, aref);
+      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+      int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
+       {
+         aref = build1 (VIEW_CONVERT_EXPR,
+                        build_nonstandard_integer_type (bitsize, 0), mask);
+         mask = make_ssa_name (TREE_TYPE (aref), NULL);
+         g = gimple_build_assign (mask, aref);
+         gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+       }
+
+      g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
+                            NULL, NULL);
+      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+      make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
+      FALLTHRU_EDGE (loop->header)->flags = EDGE_FALSE_VALUE;
+    }
+
+  /* Generate the condition.  */
+  g = gimple_build_cond (LT_EXPR,
+                        iter2,
+                        build_int_cst (unsigned_type_node,
+                                       node->simdclone->simdlen),
+                        NULL, NULL);
+  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+  e = split_block (incr_bb, gsi_stmt (gsi));
+  basic_block latch_bb = e->dest;
+  basic_block new_exit_bb = e->dest;
+  new_exit_bb = split_block (latch_bb, NULL)->dest;
+  loop->latch = latch_bb;
+
+  redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
+
+  make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
+  /* The successor of incr_bb is already pointing to latch_bb; just
+     change the flags.
+     make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE);  */
+  FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
+
+  gimple phi = create_phi_node (iter1, body_bb);
+  edge preheader_edge = find_edge (entry_bb, body_bb);
+  edge latch_edge = single_succ_edge (latch_bb);
+  add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
+              UNKNOWN_LOCATION);
+  add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+
+  /* Generate the new return.  */
+  gsi = gsi_last_bb (new_exit_bb);
+  if (retval
+      && TREE_CODE (retval) == VIEW_CONVERT_EXPR
+      && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
+    retval = TREE_OPERAND (retval, 0);
+  else if (retval)
+    {
+      retval = build1 (VIEW_CONVERT_EXPR,
+                      TREE_TYPE (TREE_TYPE (node->decl)),
+                      retval);
+      retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
+                                        false, GSI_CONTINUE_LINKING);
+    }
+  g = gimple_build_return (retval);
+  gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+
+  /* Handle aligned clauses by replacing default defs of the aligned
+     uniform args with __builtin_assume_aligned (arg_N(D), alignment)
+     lhs.  Handle linear by adding PHIs.  */
+  for (unsigned i = 0; i < node->simdclone->nargs; i++)
+    if (node->simdclone->args[i].alignment
+       && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
+       && (node->simdclone->args[i].alignment
+           & (node->simdclone->args[i].alignment - 1)) == 0
+       && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
+          == POINTER_TYPE)
+      {
+       unsigned int alignment = node->simdclone->args[i].alignment;
+       tree orig_arg = node->simdclone->args[i].orig_arg;
+       tree def = ssa_default_def (cfun, orig_arg);
+       if (def && !has_zero_uses (def))
+         {
+           tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
+           gimple_seq seq = NULL;
+           bool need_cvt = false;
+           gimple call
+             = gimple_build_call (fn, 2, def, size_int (alignment));
+           g = call;
+           if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
+                                           ptr_type_node))
+             need_cvt = true;
+           tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg, NULL);
+           gimple_call_set_lhs (g, t);
+           gimple_seq_add_stmt_without_update (&seq, g);
+           if (need_cvt)
+             {
+               t = make_ssa_name (orig_arg, NULL);
+               g = gimple_build_assign_with_ops (NOP_EXPR, t,
+                                                 gimple_call_lhs (g),
+                                                 NULL_TREE);
+               gimple_seq_add_stmt_without_update (&seq, g);
+             }
+           gsi_insert_seq_on_edge_immediate
+             (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
+
+           entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+           int freq = compute_call_stmt_bb_frequency (current_function_decl,
+                                                      entry_bb);
+           cgraph_create_edge (node, cgraph_get_create_node (fn),
+                               call, entry_bb->count, freq);
+
+           imm_use_iterator iter;
+           use_operand_p use_p;
+           gimple use_stmt;
+           tree repl = gimple_get_lhs (g);
+           FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+             if (is_gimple_debug (use_stmt) || use_stmt == call)
+               continue;
+             else
+               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+                 SET_USE (use_p, repl);
+         }
+      }
+    else if (node->simdclone->args[i].arg_type
+            == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+      {
+       tree orig_arg = node->simdclone->args[i].orig_arg;
+       tree def = ssa_default_def (cfun, orig_arg);
+       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+                   || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
+       if (def && !has_zero_uses (def))
+         {
+           iter1 = make_ssa_name (orig_arg, NULL);
+           iter2 = make_ssa_name (orig_arg, NULL);
+           phi = create_phi_node (iter1, body_bb);
+           add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
+           add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+           enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+                                 ? PLUS_EXPR : POINTER_PLUS_EXPR;
+           tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+                          ? TREE_TYPE (orig_arg) : sizetype;
+           tree addcst
+             = build_int_cst (addtype, node->simdclone->args[i].linear_step);
+           g = gimple_build_assign_with_ops (code, iter2, iter1, addcst);
+           gsi = gsi_last_bb (incr_bb);
+           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+
+           imm_use_iterator iter;
+           use_operand_p use_p;
+           gimple use_stmt;
+           FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+             if (use_stmt == phi)
+               continue;
+             else
+               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+                 SET_USE (use_p, iter1);
+         }
+      }
+
+  calculate_dominance_info (CDI_DOMINATORS);
+  add_loop (loop, loop->header->loop_father);
+  update_ssa (TODO_update_ssa);
+
+  pop_cfun ();
+}
+
+/* If the function in NODE is tagged as an elemental SIMD function,
+   create the appropriate SIMD clones.  */
+
+static void
+expand_simd_clones (struct cgraph_node *node)
+{
+  tree attr = lookup_attribute ("omp declare simd",
+                               DECL_ATTRIBUTES (node->decl));
+  if (attr == NULL_TREE
+      || node->global.inlined_to
+      || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
+    return;
+
+  /* Ignore
+     #pragma omp declare simd
+     extern int foo ();
+     in C, there we don't know the argument types at all.  */
+  if (!node->definition
+      && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
+    return;
+
+  do
+    {
+      /* Start with parsing the "omp declare simd" attribute(s).  */
+      bool inbranch_clause_specified;
+      struct cgraph_simd_clone *clone_info
+       = simd_clone_clauses_extract (node, TREE_VALUE (attr),
+                                     &inbranch_clause_specified);
+      if (clone_info == NULL)
+       continue;
+
+      int orig_simdlen = clone_info->simdlen;
+      tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+      /* The target can return 0 (no simd clones should be created),
+        1 (just one ISA of simd clones should be created) or higher
+        count of ISA variants.  In that case, clone_info is initialized
+        for the first ISA variant.  */
+      int count
+       = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
+                                                         base_type, 0);
+      if (count == 0)
+       continue;
+
+      /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
+        also create one inbranch and one !inbranch clone of it.  */
+      for (int i = 0; i < count * 2; i++)
+       {
+         struct cgraph_simd_clone *clone = clone_info;
+         if (inbranch_clause_specified && (i & 1) != 0)
+           continue;
+
+         if (i != 0)
+           {
+             clone = simd_clone_struct_alloc (clone_info->nargs
+                                              + ((i & 1) != 0));
+             simd_clone_struct_copy (clone, clone_info);
+             /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
+                and simd_clone_adjust_argument_types did to the first
+                clone's info.  */
+             clone->nargs -= clone_info->inbranch;
+             clone->simdlen = orig_simdlen;
+             /* And call the target hook again to get the right ISA.  */
+             targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
+                                                             base_type,
+                                                             i / 2);
+             if ((i & 1) != 0)
+               clone->inbranch = 1;
+           }
+
+         /* simd_clone_mangle might fail if such a clone has been created
+            already.  */
+         tree id = simd_clone_mangle (node, clone);
+         if (id == NULL_TREE)
+           continue;
+
+         /* Only when we are sure we want to create the clone actually
+            clone the function (or definitions) or create another
+            extern FUNCTION_DECL (for prototypes without definitions).  */
+         struct cgraph_node *n = simd_clone_create (node);
+         if (n == NULL)
+           continue;
+
+         n->simdclone = clone;
+         clone->origin = node;
+         clone->next_clone = NULL;
+         if (node->simd_clones == NULL)
+           {
+             clone->prev_clone = n;
+             node->simd_clones = n;
+           }
+         else
+           {
+             clone->prev_clone = node->simd_clones->simdclone->prev_clone;
+             clone->prev_clone->simdclone->next_clone = n;
+             node->simd_clones->simdclone->prev_clone = n;
+           }
+         change_decl_assembler_name (n->decl, id);
+         /* And finally adjust the return type, parameters and for
+            definitions also function body.  */
+         if (node->definition)
+           simd_clone_adjust (n);
+         else
+           {
+             simd_clone_adjust_return_type (n);
+             simd_clone_adjust_argument_types (n);
+           }
+       }
+    }
+  while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
+}
+
+/* Entry point for IPA simd clone creation pass.  */
+
+static unsigned int
+ipa_omp_simd_clone (void)
+{
+  struct cgraph_node *node;
+  FOR_EACH_FUNCTION (node)
+    expand_simd_clones (node);
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_simd_clone =
+{
+  SIMPLE_IPA_PASS,             /* type */
+  "simdclone",                 /* name */
+  OPTGROUP_NONE,               /* optinfo_flags */
+  true,                                /* has_gate */
+  true,                                /* has_execute */
+  TV_NONE,                     /* tv_id */
+  ( PROP_ssa | PROP_cfg ),     /* properties_required */
+  0,                           /* properties_provided */
+  0,                           /* properties_destroyed */
+  0,                           /* todo_flags_start */
+  0,                           /* todo_flags_finish */
 };
 
+class pass_omp_simd_clone : public simple_ipa_opt_pass
+{
+public:
+  pass_omp_simd_clone(gcc::context *ctxt)
+    : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return ((flag_openmp || flag_openmp_simd
+                         || flag_cilkplus || (in_lto_p && !flag_wpa))
+                        && (targetm.simd_clone.compute_vecsize_and_simdlen
+                            != NULL)); }
+  unsigned int execute () { return ipa_omp_simd_clone (); }
+};
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_omp_simd_clone (gcc::context *ctxt)
+{
+  return new pass_omp_simd_clone (ctxt);
+}
+
 #include "gt-omp-low.h"