nvptx-protos.h (nvptx_output_mov_insn): Declare.
[gcc.git] / libgomp / target.c
index 50baa4d1319e5139be660e9a99e2e7f70f9c9bbe..cf9d0e64c7c241d9870642ee9954da3d74fb6827 100644 (file)
 #include <limits.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>  /* For PRIu64.  */
+#endif
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -46,13 +50,17 @@ static void gomp_target_init (void);
 /* The whole initialization code for offloading plugins is only run one.  */
 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
 
+/* Mutex for offload image registration.  */
+static gomp_mutex_t register_lock;
+
 /* This structure describes an offload image.
    It contains type of the target device, pointer to host table descriptor, and
    pointer to target data.  */
 struct offload_image_descr {
+  unsigned version;
   enum offload_target_type type;
-  void *host_table;
-  void *target_data;
+  const void *host_table;
+  const void *target_data;
 };
 
 /* Array of descriptors of offload images.  */
@@ -64,31 +72,26 @@ static int num_offload_images;
 /* Array of descriptors for all available devices.  */
 static struct gomp_device_descr *devices;
 
-#ifdef PLUGIN_SUPPORT
 /* Total number of available devices.  */
 static int num_devices;
-#endif
 
 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
 static int num_devices_openmp;
 
-/* The comparison function.  */
+/* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
 
-attribute_hidden int
-splay_compare (splay_tree_key x, splay_tree_key y)
+static void *
+gomp_realloc_unlock (void *old, size_t size)
 {
-  if (x->host_start == x->host_end
-      && y->host_start == y->host_end)
-    return 0;
-  if (x->host_end <= y->host_start)
-    return -1;
-  if (x->host_start >= y->host_end)
-    return 1;
-  return 0;
+  void *ret = realloc (old, size);
+  if (ret == NULL)
+    {
+      gomp_mutex_unlock (&register_lock);
+      gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
+    }
+  return ret;
 }
 
-#include "splay-tree.h"
-
 attribute_hidden void
 gomp_init_targets_once (void)
 {
@@ -114,71 +117,310 @@ resolve_device (int device_id)
   if (device_id < 0 || device_id >= gomp_get_num_devices ())
     return NULL;
 
+  gomp_mutex_lock (&devices[device_id].lock);
+  if (!devices[device_id].is_initialized)
+    gomp_init_device (&devices[device_id]);
+  gomp_mutex_unlock (&devices[device_id].lock);
+
   return &devices[device_id];
 }
 
 
-/* Handle the case where splay_tree_lookup found oldn for newn.
+static inline splay_tree_key
+gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
+{
+  if (key->host_start != key->host_end)
+    return splay_tree_lookup (mem_map, key);
+
+  key->host_end++;
+  splay_tree_key n = splay_tree_lookup (mem_map, key);
+  key->host_end--;
+  if (n)
+    return n;
+  key->host_start--;
+  n = splay_tree_lookup (mem_map, key);
+  key->host_start++;
+  if (n)
+    return n;
+  return splay_tree_lookup (mem_map, key);
+}
+
+static inline splay_tree_key
+gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
+{
+  if (key->host_start != key->host_end)
+    return splay_tree_lookup (mem_map, key);
+
+  key->host_end++;
+  splay_tree_key n = splay_tree_lookup (mem_map, key);
+  key->host_end--;
+  return n;
+}
+
+/* Handle the case where gomp_map_lookup, splay_tree_lookup or
+   gomp_map_0len_lookup found oldn for newn.
    Helper function of gomp_map_vars.  */
 
 static inline void
-gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+                       splay_tree_key newn, struct target_var_desc *tgt_var,
                        unsigned char kind)
 {
+  tgt_var->key = oldn;
+  tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
+  tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
+  tgt_var->offset = newn->host_start - oldn->host_start;
+  tgt_var->length = newn->host_end - newn->host_start;
+
   if ((kind & GOMP_MAP_FLAG_FORCE)
       || oldn->host_start > newn->host_start
       || oldn->host_end < newn->host_end)
-    gomp_fatal ("Trying to map into device [%p..%p) object when "
-               "[%p..%p) is already mapped",
-               (void *) newn->host_start, (void *) newn->host_end,
-               (void *) oldn->host_start, (void *) oldn->host_end);
-  oldn->refcount++;
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Trying to map into device [%p..%p) object when "
+                 "[%p..%p) is already mapped",
+                 (void *) newn->host_start, (void *) newn->host_end,
+                 (void *) oldn->host_start, (void *) oldn->host_end);
+    }
+
+  if (GOMP_MAP_ALWAYS_TO_P (kind))
+    devicep->host2dev_func (devicep->target_id,
+                           (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+                                     + newn->host_start - oldn->host_start),
+                           (void *) newn->host_start,
+                           newn->host_end - newn->host_start);
+  if (oldn->refcount != REFCOUNT_INFINITY)
+    oldn->refcount++;
 }
 
 static int
-get_kind (bool is_openacc, void *kinds, int idx)
+get_kind (bool short_mapkind, void *kinds, int idx)
+{
+  return short_mapkind ? ((unsigned short *) kinds)[idx]
+                      : ((unsigned char *) kinds)[idx];
+}
+
+static void
+gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
+                 uintptr_t target_offset, uintptr_t bias)
+{
+  struct gomp_device_descr *devicep = tgt->device_descr;
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+
+  cur_node.host_start = host_ptr;
+  if (cur_node.host_start == (uintptr_t) NULL)
+    {
+      cur_node.tgt_offset = (uintptr_t) NULL;
+      /* FIXME: see comment about coalescing host/dev transfers below.  */
+      devicep->host2dev_func (devicep->target_id,
+                             (void *) (tgt->tgt_start + target_offset),
+                             (void *) &cur_node.tgt_offset,
+                             sizeof (void *));
+      return;
+    }
+  /* Add bias to the pointer value.  */
+  cur_node.host_start += bias;
+  cur_node.host_end = cur_node.host_start;
+  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Pointer target of array section wasn't mapped");
+    }
+  cur_node.host_start -= n->host_start;
+  cur_node.tgt_offset
+    = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
+  /* At this point tgt_offset is target address of the
+     array section.  Now subtract bias to get what we want
+     to initialize the pointer with.  */
+  cur_node.tgt_offset -= bias;
+  /* FIXME: see comment about coalescing host/dev transfers below.  */
+  devicep->host2dev_func (devicep->target_id,
+                         (void *) (tgt->tgt_start + target_offset),
+                         (void *) &cur_node.tgt_offset,
+                         sizeof (void *));
+}
+
+static void
+gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+                         size_t first, size_t i, void **hostaddrs,
+                         size_t *sizes, void *kinds)
+{
+  struct gomp_device_descr *devicep = tgt->device_descr;
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int kind;
+  const bool short_mapkind = true;
+  const int typemask = short_mapkind ? 0xff : 0x7;
+
+  cur_node.host_start = (uintptr_t) hostaddrs[i];
+  cur_node.host_end = cur_node.host_start + sizes[i];
+  splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
+  kind = get_kind (short_mapkind, kinds, i);
+  if (n2
+      && n2->tgt == n->tgt
+      && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+    {
+      gomp_map_vars_existing (devicep, n2, &cur_node,
+                             &tgt->list[i], kind & typemask);
+      return;
+    }
+  if (sizes[i] == 0)
+    {
+      if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
+       {
+         cur_node.host_start--;
+         n2 = splay_tree_lookup (mem_map, &cur_node);
+         cur_node.host_start++;
+         if (n2
+             && n2->tgt == n->tgt
+             && n2->host_start - n->host_start
+                == n2->tgt_offset - n->tgt_offset)
+           {
+             gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+                                     kind & typemask);
+             return;
+           }
+       }
+      cur_node.host_end++;
+      n2 = splay_tree_lookup (mem_map, &cur_node);
+      cur_node.host_end--;
+      if (n2
+         && n2->tgt == n->tgt
+         && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+       {
+         gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+                                 kind & typemask);
+         return;
+       }
+    }
+  gomp_mutex_unlock (&devicep->lock);
+  gomp_fatal ("Trying to map into device [%p..%p) structure element when "
+             "other mapped elements from the same structure weren't mapped "
+             "together with it", (void *) cur_node.host_start,
+             (void *) cur_node.host_end);
+}
+
+static inline uintptr_t
+gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
 {
-  return is_openacc ? ((unsigned short *) kinds)[idx]
-                   : ((unsigned char *) kinds)[idx];
+  if (tgt->list[i].key != NULL)
+    return tgt->list[i].key->tgt->tgt_start
+          + tgt->list[i].key->tgt_offset
+          + tgt->list[i].offset;
+  if (tgt->list[i].offset == ~(uintptr_t) 0)
+    return (uintptr_t) hostaddrs[i];
+  if (tgt->list[i].offset == ~(uintptr_t) 1)
+    return 0;
+  if (tgt->list[i].offset == ~(uintptr_t) 2)
+    return tgt->list[i + 1].key->tgt->tgt_start
+          + tgt->list[i + 1].key->tgt_offset
+          + tgt->list[i + 1].offset
+          + (uintptr_t) hostaddrs[i]
+          - (uintptr_t) hostaddrs[i + 1];
+  return tgt->tgt_start + tgt->list[i].offset;
 }
 
 attribute_hidden struct target_mem_desc *
 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
               void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
-              bool is_openacc, bool is_target)
+              bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
 {
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
-  const int rshift = is_openacc ? 8 : 3;
-  const int typemask = is_openacc ? 0xff : 0x7;
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
+  bool has_firstprivate = false;
+  const int rshift = short_mapkind ? 8 : 3;
+  const int typemask = short_mapkind ? 0xff : 0x7;
+  struct splay_tree_s *mem_map = &devicep->mem_map;
   struct splay_tree_key_s cur_node;
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
-  tgt->refcount = 1;
+  tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
   tgt->device_descr = devicep;
-  tgt->mem_map = mm;
 
   if (mapnum == 0)
-    return tgt;
+    {
+      tgt->tgt_start = 0;
+      tgt->tgt_end = 0;
+      return tgt;
+    }
 
   tgt_align = sizeof (void *);
   tgt_size = 0;
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       size_t align = 4 * sizeof (void *);
       tgt_align = align;
       tgt_size = mapnum * sizeof (void *);
     }
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < mapnum; i++)
     {
-      int kind = get_kind (is_openacc, kinds, i);
-      if (hostaddrs[i] == NULL)
+      int kind = get_kind (short_mapkind, kinds, i);
+      if (hostaddrs[i] == NULL
+         || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
+       {
+         tgt->list[i].key = NULL;
+         tgt->list[i].offset = ~(uintptr_t) 0;
+         continue;
+       }
+      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
+       {
+         cur_node.host_start = (uintptr_t) hostaddrs[i];
+         cur_node.host_end = cur_node.host_start;
+         splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+         if (n == NULL)
+           {
+             gomp_mutex_unlock (&devicep->lock);
+             gomp_fatal ("use_device_ptr pointer wasn't mapped");
+           }
+         cur_node.host_start -= n->host_start;
+         hostaddrs[i]
+           = (void *) (n->tgt->tgt_start + n->tgt_offset
+                       + cur_node.host_start);
+         tgt->list[i].key = NULL;
+         tgt->list[i].offset = ~(uintptr_t) 0;
+         continue;
+       }
+      else if ((kind & typemask) == GOMP_MAP_STRUCT)
+       {
+         size_t first = i + 1;
+         size_t last = i + sizes[i];
+         cur_node.host_start = (uintptr_t) hostaddrs[i];
+         cur_node.host_end = (uintptr_t) hostaddrs[last]
+                             + sizes[last];
+         tgt->list[i].key = NULL;
+         tgt->list[i].offset = ~(uintptr_t) 2;
+         splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+         if (n == NULL)
+           {
+             size_t align = (size_t) 1 << (kind >> rshift);
+             if (tgt_align < align)
+               tgt_align = align;
+             tgt_size -= (uintptr_t) hostaddrs[first]
+                         - (uintptr_t) hostaddrs[i];
+             tgt_size = (tgt_size + align - 1) & ~(align - 1);
+             tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
+             not_found_cnt += last - i;
+             for (i = first; i <= last; i++)
+               tgt->list[i].key = NULL;
+             i--;
+             continue;
+           }
+         for (i = first; i <= last; i++)
+           gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+                                     sizes, kinds);
+         i--;
+         continue;
+       }
+      else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
        {
-         tgt->list[i] = NULL;
+         tgt->list[i].key = NULL;
+         tgt->list[i].offset = ~(uintptr_t) 1;
+         has_firstprivate = true;
          continue;
        }
       cur_node.host_start = (uintptr_t) hostaddrs[i];
@@ -186,15 +428,37 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
        cur_node.host_end = cur_node.host_start + sizes[i];
       else
        cur_node.host_end = cur_node.host_start + sizeof (void *);
-      splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
-      if (n)
+      if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
+       {
+         tgt->list[i].key = NULL;
+
+         size_t align = (size_t) 1 << (kind >> rshift);
+         if (tgt_align < align)
+           tgt_align = align;
+         tgt_size = (tgt_size + align - 1) & ~(align - 1);
+         tgt_size += cur_node.host_end - cur_node.host_start;
+         has_firstprivate = true;
+         continue;
+       }
+      splay_tree_key n;
+      if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
        {
-         tgt->list[i] = n;
-         gomp_map_vars_existing (n, &cur_node, kind & typemask);
+         n = gomp_map_0len_lookup (mem_map, &cur_node);
+         if (!n)
+           {
+             tgt->list[i].key = NULL;
+             tgt->list[i].offset = ~(uintptr_t) 1;
+             continue;
+           }
        }
+      else
+       n = splay_tree_lookup (mem_map, &cur_node);
+      if (n)
+       gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+                               kind & typemask);
       else
        {
-         tgt->list[i] = NULL;
+         tgt->list[i].key = NULL;
 
          size_t align = (size_t) 1 << (kind >> rshift);
          not_found_cnt++;
@@ -206,7 +470,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
            {
              size_t j;
              for (j = i + 1; j < mapnum; j++)
-               if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+               if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
                                         & typemask))
                  break;
                else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
@@ -215,7 +479,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                  break;
                else
                  {
-                   tgt->list[j] = NULL;
+                   tgt->list[j].key = NULL;
                    i++;
                  }
            }
@@ -225,12 +489,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
   if (devaddrs)
     {
       if (mapnum != 1)
-        gomp_fatal ("unexpected aggregation");
+       {
+         gomp_mutex_unlock (&devicep->lock);
+         gomp_fatal ("unexpected aggregation");
+       }
       tgt->to_free = devaddrs[0];
       tgt->tgt_start = (uintptr_t) tgt->to_free;
       tgt->tgt_end = tgt->tgt_start + sizes[0];
     }
-  else if (not_found_cnt || is_target)
+  else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       /* Allocate tgt_align aligned tgt_size block of memory.  */
       /* FIXME: Perhaps change interface to allocate properly aligned
@@ -249,60 +516,153 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
     }
 
   tgt_size = 0;
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     tgt_size = mapnum * sizeof (void *);
 
   tgt->array = NULL;
-  if (not_found_cnt)
+  if (not_found_cnt || has_firstprivate)
     {
-      tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
+      if (not_found_cnt)
+       tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
       splay_tree_node array = tgt->array;
-      size_t j;
+      size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
+      uintptr_t field_tgt_base = 0;
 
       for (i = 0; i < mapnum; i++)
-       if (tgt->list[i] == NULL)
+       if (tgt->list[i].key == NULL)
          {
-           int kind = get_kind (is_openacc, kinds, i);
+           int kind = get_kind (short_mapkind, kinds, i);
            if (hostaddrs[i] == NULL)
              continue;
+           switch (kind & typemask)
+             {
+               size_t align, len, first, last;
+               splay_tree_key n;
+             case GOMP_MAP_FIRSTPRIVATE:
+               align = (size_t) 1 << (kind >> rshift);
+               tgt_size = (tgt_size + align - 1) & ~(align - 1);
+               tgt->list[i].offset = tgt_size;
+               len = sizes[i];
+               devicep->host2dev_func (devicep->target_id,
+                                       (void *) (tgt->tgt_start + tgt_size),
+                                       (void *) hostaddrs[i], len);
+               tgt_size += len;
+               continue;
+             case GOMP_MAP_FIRSTPRIVATE_INT:
+             case GOMP_MAP_USE_DEVICE_PTR:
+             case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+               continue;
+             case GOMP_MAP_STRUCT:
+               first = i + 1;
+               last = i + sizes[i];
+               cur_node.host_start = (uintptr_t) hostaddrs[i];
+               cur_node.host_end = (uintptr_t) hostaddrs[last]
+                                   + sizes[last];
+               if (tgt->list[first].key != NULL)
+                 continue;
+               n = splay_tree_lookup (mem_map, &cur_node);
+               if (n == NULL)
+                 {
+                   size_t align = (size_t) 1 << (kind >> rshift);
+                   tgt_size -= (uintptr_t) hostaddrs[first]
+                               - (uintptr_t) hostaddrs[i];
+                   tgt_size = (tgt_size + align - 1) & ~(align - 1);
+                   tgt_size += (uintptr_t) hostaddrs[first]
+                               - (uintptr_t) hostaddrs[i];
+                   field_tgt_base = (uintptr_t) hostaddrs[first];
+                   field_tgt_offset = tgt_size;
+                   field_tgt_clear = last;
+                   tgt_size += cur_node.host_end
+                               - (uintptr_t) hostaddrs[first];
+                   continue;
+                 }
+               for (i = first; i <= last; i++)
+                 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+                                           sizes, kinds);
+               i--;
+               continue;
+             case GOMP_MAP_ALWAYS_POINTER:
+               cur_node.host_start = (uintptr_t) hostaddrs[i];
+               cur_node.host_end = cur_node.host_start + sizeof (void *);
+               n = splay_tree_lookup (mem_map, &cur_node);
+               if (n == NULL
+                   || n->host_start > cur_node.host_start
+                   || n->host_end < cur_node.host_end)
+                 {
+                   gomp_mutex_unlock (&devicep->lock);
+                   gomp_fatal ("always pointer not mapped");
+                 }
+               if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
+                   != GOMP_MAP_ALWAYS_POINTER)
+                 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
+               if (cur_node.tgt_offset)
+                 cur_node.tgt_offset -= sizes[i];
+               devicep->host2dev_func (devicep->target_id,
+                                       (void *) (n->tgt->tgt_start
+                                                 + n->tgt_offset
+                                                 + cur_node.host_start
+                                                 - n->host_start),
+                                       (void *) &cur_node.tgt_offset,
+                                       sizeof (void *));
+               cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+                                     + cur_node.host_start - n->host_start;
+               continue;
+             default:
+               break;
+             }
            splay_tree_key k = &array->key;
            k->host_start = (uintptr_t) hostaddrs[i];
            if (!GOMP_MAP_POINTER_P (kind & typemask))
              k->host_end = k->host_start + sizes[i];
            else
              k->host_end = k->host_start + sizeof (void *);
-           splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
+           splay_tree_key n = splay_tree_lookup (mem_map, k);
            if (n)
-             {
-               tgt->list[i] = n;
-               gomp_map_vars_existing (n, k, kind & typemask);
-             }
+             gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+                                     kind & typemask);
            else
              {
                size_t align = (size_t) 1 << (kind >> rshift);
-               tgt->list[i] = k;
-               tgt_size = (tgt_size + align - 1) & ~(align - 1);
+               tgt->list[i].key = k;
                k->tgt = tgt;
-               k->tgt_offset = tgt_size;
-               tgt_size += k->host_end - k->host_start;
-               k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+               if (field_tgt_clear != ~(size_t) 0)
+                 {
+                   k->tgt_offset = k->host_start - field_tgt_base
+                                   + field_tgt_offset;
+                   if (i == field_tgt_clear)
+                     field_tgt_clear = ~(size_t) 0;
+                 }
+               else
+                 {
+                   tgt_size = (tgt_size + align - 1) & ~(align - 1);
+                   k->tgt_offset = tgt_size;
+                   tgt_size += k->host_end - k->host_start;
+                 }
+               tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+               tgt->list[i].always_copy_from
+                 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
+               tgt->list[i].offset = 0;
+               tgt->list[i].length = k->host_end - k->host_start;
                k->refcount = 1;
                k->async_refcount = 0;
                tgt->refcount++;
                array->left = NULL;
                array->right = NULL;
-               splay_tree_insert (&mm->splay_tree, array);
+               splay_tree_insert (mem_map, array);
                switch (kind & typemask)
                  {
                  case GOMP_MAP_ALLOC:
                  case GOMP_MAP_FROM:
                  case GOMP_MAP_FORCE_ALLOC:
                  case GOMP_MAP_FORCE_FROM:
+                 case GOMP_MAP_ALWAYS_FROM:
                    break;
                  case GOMP_MAP_TO:
                  case GOMP_MAP_TOFROM:
                  case GOMP_MAP_FORCE_TO:
                  case GOMP_MAP_FORCE_TOFROM:
+                 case GOMP_MAP_ALWAYS_TO:
+                 case GOMP_MAP_ALWAYS_TOFROM:
                    /* FIXME: Perhaps add some smarts, like if copying
                       several adjacent fields from host to target, use some
                       host buffer to avoid sending each var individually.  */
@@ -313,51 +673,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                                            k->host_end - k->host_start);
                    break;
                  case GOMP_MAP_POINTER:
-                   cur_node.host_start
-                     = (uintptr_t) *(void **) k->host_start;
-                   if (cur_node.host_start == (uintptr_t) NULL)
-                     {
-                       cur_node.tgt_offset = (uintptr_t) NULL;
-                       /* FIXME: see above FIXME comment.  */
-                       devicep->host2dev_func (devicep->target_id,
-                                               (void *) (tgt->tgt_start
-                                                         + k->tgt_offset),
-                                               (void *) &cur_node.tgt_offset,
-                                               sizeof (void *));
-                       break;
-                     }
-                   /* Add bias to the pointer value.  */
-                   cur_node.host_start += sizes[i];
-                   cur_node.host_end = cur_node.host_start + 1;
-                   n = splay_tree_lookup (&mm->splay_tree, &cur_node);
-                   if (n == NULL)
-                     {
-                       /* Could be possibly zero size array section.  */
-                       cur_node.host_end--;
-                       n = splay_tree_lookup (&mm->splay_tree, &cur_node);
-                       if (n == NULL)
-                         {
-                           cur_node.host_start--;
-                           n = splay_tree_lookup (&mm->splay_tree, &cur_node);
-                           cur_node.host_start++;
-                         }
-                     }
-                   if (n == NULL)
-                     gomp_fatal ("Pointer target of array section "
-                                 "wasn't mapped");
-                   cur_node.host_start -= n->host_start;
-                   cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
-                                         + cur_node.host_start;
-                   /* At this point tgt_offset is target address of the
-                      array section.  Now subtract bias to get what we want
-                      to initialize the pointer with.  */
-                   cur_node.tgt_offset -= sizes[i];
-                   /* FIXME: see above FIXME comment.  */
-                   devicep->host2dev_func (devicep->target_id,
-                                           (void *) (tgt->tgt_start
-                                                     + k->tgt_offset),
-                                           (void *) &cur_node.tgt_offset,
-                                           sizeof (void *));
+                   gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+                                     k->tgt_offset, sizes[i]);
                    break;
                  case GOMP_MAP_TO_PSET:
                    /* FIXME: see above FIXME comment.  */
@@ -368,7 +685,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                                            k->host_end - k->host_start);
 
                    for (j = i + 1; j < mapnum; j++)
-                     if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+                     if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
+                                                        j)
                                               & typemask))
                        break;
                      else if ((uintptr_t) hostaddrs[j] < k->host_start
@@ -377,59 +695,17 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                        break;
                      else
                        {
-                         tgt->list[j] = k;
-                         k->refcount++;
-                         cur_node.host_start
-                           = (uintptr_t) *(void **) hostaddrs[j];
-                         if (cur_node.host_start == (uintptr_t) NULL)
-                           {
-                             cur_node.tgt_offset = (uintptr_t) NULL;
-                             /* FIXME: see above FIXME comment.  */
-                             devicep->host2dev_func (devicep->target_id,
-                                (void *) (tgt->tgt_start + k->tgt_offset
-                                          + ((uintptr_t) hostaddrs[j]
-                                             - k->host_start)),
-                                (void *) &cur_node.tgt_offset,
-                                sizeof (void *));
-                             i++;
-                             continue;
-                           }
-                         /* Add bias to the pointer value.  */
-                         cur_node.host_start += sizes[j];
-                         cur_node.host_end = cur_node.host_start + 1;
-                         n = splay_tree_lookup (&mm->splay_tree, &cur_node);
-                         if (n == NULL)
-                           {
-                             /* Could be possibly zero size array section.  */
-                             cur_node.host_end--;
-                             n = splay_tree_lookup (&mm->splay_tree,
-                                                    &cur_node);
-                             if (n == NULL)
-                               {
-                                 cur_node.host_start--;
-                                 n = splay_tree_lookup (&mm->splay_tree,
-                                                        &cur_node);
-                                 cur_node.host_start++;
-                               }
-                           }
-                         if (n == NULL)
-                           gomp_fatal ("Pointer target of array section "
-                                       "wasn't mapped");
-                         cur_node.host_start -= n->host_start;
-                         cur_node.tgt_offset = n->tgt->tgt_start
-                                               + n->tgt_offset
-                                               + cur_node.host_start;
-                         /* At this point tgt_offset is target address of the
-                            array section.  Now subtract bias to get what we
-                            want to initialize the pointer with.  */
-                         cur_node.tgt_offset -= sizes[j];
-                         /* FIXME: see above FIXME comment.  */
-                         devicep->host2dev_func (devicep->target_id,
-                            (void *) (tgt->tgt_start + k->tgt_offset
-                                      + ((uintptr_t) hostaddrs[j]
-                                         - k->host_start)),
-                            (void *) &cur_node.tgt_offset,
-                            sizeof (void *));
+                         tgt->list[j].key = k;
+                         tgt->list[j].copy_from = false;
+                         tgt->list[j].always_copy_from = false;
+                         if (k->refcount != REFCOUNT_INFINITY)
+                           k->refcount++;
+                         gomp_map_pointer (tgt,
+                                           (uintptr_t) *(void **) hostaddrs[j],
+                                           k->tgt_offset
+                                           + ((uintptr_t) hostaddrs[j]
+                                              - k->host_start),
+                                           sizes[j]);
                          i++;
                        }
                    break;
@@ -438,9 +714,17 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                      /* We already looked up the memory region above and it
                         was missing.  */
                      size_t size = k->host_end - k->host_start;
+                     gomp_mutex_unlock (&devicep->lock);
+#ifdef HAVE_INTTYPES_H
+                     gomp_fatal ("present clause: !acc_is_present (%p, "
+                                 "%"PRIu64" (0x%"PRIx64"))",
+                                 (void *) k->host_start,
+                                 (uint64_t) size, (uint64_t) size);
+#else
                      gomp_fatal ("present clause: !acc_is_present (%p, "
-                                 "%zd (0x%zx))", (void *) k->host_start,
-                                 size, size);
+                                 "%lu (0x%lx))", (void *) k->host_start,
+                                 (unsigned long) size, (unsigned long) size);
+#endif
                    }
                    break;
                  case GOMP_MAP_FORCE_DEVICEPTR:
@@ -453,6 +737,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
                                            sizeof (void *));
                    break;
                  default:
+                   gomp_mutex_unlock (&devicep->lock);
                    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
                                kind);
                  }
@@ -461,15 +746,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
          }
     }
 
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       for (i = 0; i < mapnum; i++)
        {
-         if (tgt->list[i] == NULL)
-           cur_node.tgt_offset = (uintptr_t) NULL;
-         else
-           cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
-                                 + tgt->list[i]->tgt_offset;
+         cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
          /* FIXME: see above FIXME comment.  */
          devicep->host2dev_func (devicep->target_id,
                                  (void *) (tgt->tgt_start
@@ -479,7 +760,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
        }
     }
 
-  gomp_mutex_unlock (&mm->lock);
+  /* If the variable from "omp target enter data" map-list was already mapped,
+     tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
+     gomp_exit_data.  */
+  if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
+    {
+      free (tgt);
+      tgt = NULL;
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
   return tgt;
 }
 
@@ -504,29 +794,28 @@ attribute_hidden void
 gomp_copy_from_async (struct target_mem_desc *tgt)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
-  struct gomp_memory_mapping *mm = tgt->mem_map;
   size_t i;
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
+    if (tgt->list[i].key == NULL)
       ;
-    else if (tgt->list[i]->refcount > 1)
+    else if (tgt->list[i].key->refcount > 1)
       {
-       tgt->list[i]->refcount--;
-       tgt->list[i]->async_refcount++;
+       tgt->list[i].key->refcount--;
+       tgt->list[i].key->async_refcount++;
       }
     else
       {
-       splay_tree_key k = tgt->list[i];
-       if (k->copy_from)
+       splay_tree_key k = tgt->list[i].key;
+       if (tgt->list[i].copy_from)
          devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
                                  (void *) (k->tgt->tgt_start + k->tgt_offset),
                                  k->host_end - k->host_start);
       }
 
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
 }
 
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
@@ -537,7 +826,6 @@ attribute_hidden void
 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
-  struct gomp_memory_mapping *mm = tgt->mem_map;
 
   if (tgt->list_count == 0)
     {
@@ -545,46 +833,61 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
       return;
     }
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   size_t i;
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
-      ;
-    else if (tgt->list[i]->refcount > 1)
-      tgt->list[i]->refcount--;
-    else if (tgt->list[i]->async_refcount > 0)
-      tgt->list[i]->async_refcount--;
-    else
-      {
-       splay_tree_key k = tgt->list[i];
-       if (k->copy_from && do_copyfrom)
-         devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
-                                 (void *) (k->tgt->tgt_start + k->tgt_offset),
-                                 k->host_end - k->host_start);
-       splay_tree_remove (&mm->splay_tree, k);
-       if (k->tgt->refcount > 1)
-         k->tgt->refcount--;
-       else
-         gomp_unmap_tgt (k->tgt);
-      }
+    {
+      splay_tree_key k = tgt->list[i].key;
+      if (k == NULL)
+       continue;
+
+      bool do_unmap = false;
+      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+       k->refcount--;
+      else if (k->refcount == 1)
+       {
+         if (k->async_refcount > 0)
+           k->async_refcount--;
+         else
+           {
+             k->refcount--;
+             do_unmap = true;
+           }
+       }
+
+      if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+         || tgt->list[i].always_copy_from)
+       devicep->dev2host_func (devicep->target_id,
+                               (void *) (k->host_start + tgt->list[i].offset),
+                               (void *) (k->tgt->tgt_start + k->tgt_offset
+                                         + tgt->list[i].offset),
+                               tgt->list[i].length);
+      if (do_unmap)
+       {
+         splay_tree_remove (&devicep->mem_map, k);
+         if (k->tgt->refcount > 1)
+           k->tgt->refcount--;
+         else
+           gomp_unmap_tgt (k->tgt);
+       }
+    }
 
   if (tgt->refcount > 1)
     tgt->refcount--;
   else
     gomp_unmap_tgt (tgt);
 
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
 }
 
 static void
-gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
-            size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
-            bool is_openacc)
+gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
+            size_t *sizes, void *kinds, bool short_mapkind)
 {
   size_t i;
   struct splay_tree_key_s cur_node;
-  const int typemask = is_openacc ? 0xff : 0x7;
+  const int typemask = short_mapkind ? 0xff : 0x7;
 
   if (!devicep)
     return;
@@ -592,25 +895,27 @@ gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
   if (mapnum == 0)
     return;
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
   for (i = 0; i < mapnum; i++)
     if (sizes[i])
       {
        cur_node.host_start = (uintptr_t) hostaddrs[i];
        cur_node.host_end = cur_node.host_start + sizes[i];
-       splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
-                                             &cur_node);
+       splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
        if (n)
          {
-           int kind = get_kind (is_openacc, kinds, i);
+           int kind = get_kind (short_mapkind, kinds, i);
            if (n->host_start > cur_node.host_start
                || n->host_end < cur_node.host_end)
-             gomp_fatal ("Trying to update [%p..%p) object when"
-                         "only [%p..%p) is mapped",
-                         (void *) cur_node.host_start,
-                         (void *) cur_node.host_end,
-                         (void *) n->host_start,
-                         (void *) n->host_end);
+             {
+               gomp_mutex_unlock (&devicep->lock);
+               gomp_fatal ("Trying to update [%p..%p) object when "
+                           "only [%p..%p) is mapped",
+                           (void *) cur_node.host_start,
+                           (void *) cur_node.host_end,
+                           (void *) n->host_start,
+                           (void *) n->host_end);
+             }
            if (GOMP_MAP_COPY_TO_P (kind & typemask))
              devicep->host2dev_func (devicep->target_id,
                                      (void *) (n->tgt->tgt_start
@@ -628,31 +933,245 @@ gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
                                                - n->host_start),
                                      cur_node.host_end - cur_node.host_start);
          }
-       else
-         gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
-                     (void *) cur_node.host_start,
-                     (void *) cur_node.host_end);
       }
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
+   And insert to splay tree the mapping between addresses from HOST_TABLE and
+   from loaded target image.  We rely in the host and device compiler
+   emitting variable and functions in the same order.  */
+
+static void
+gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
+                          const void *host_table, const void *target_data,
+                          bool is_register_lock)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  /* Load image to device and get target addresses for the image.  */
+  struct addr_pair *target_table = NULL;
+  int i, num_target_entries;
+
+  num_target_entries
+    = devicep->load_image_func (devicep->target_id, version,
+                               target_data, &target_table);
+
+  if (num_target_entries != num_funcs + num_vars)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      if (is_register_lock)
+       gomp_mutex_unlock (&register_lock);
+      gomp_fatal ("Cannot map target functions or variables"
+                 " (expected %u, have %u)", num_funcs + num_vars,
+                 num_target_entries);
+    }
+
+  /* Insert host-target address mapping into splay tree.  */
+  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+  tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
+  tgt->refcount = REFCOUNT_INFINITY;
+  tgt->tgt_start = 0;
+  tgt->tgt_end = 0;
+  tgt->to_free = NULL;
+  tgt->prev = NULL;
+  tgt->list_count = 0;
+  tgt->device_descr = devicep;
+  splay_tree_node array = tgt->array;
+
+  for (i = 0; i < num_funcs; i++)
+    {
+      splay_tree_key k = &array->key;
+      k->host_start = (uintptr_t) host_func_table[i];
+      k->host_end = k->host_start + 1;
+      k->tgt = tgt;
+      k->tgt_offset = target_table[i].start;
+      k->refcount = REFCOUNT_INFINITY;
+      k->async_refcount = 0;
+      array->left = NULL;
+      array->right = NULL;
+      splay_tree_insert (&devicep->mem_map, array);
+      array++;
+    }
+
+  for (i = 0; i < num_vars; i++)
+    {
+      struct addr_pair *target_var = &target_table[num_funcs + i];
+      if (target_var->end - target_var->start
+         != (uintptr_t) host_var_table[i * 2 + 1])
+       {
+         gomp_mutex_unlock (&devicep->lock);
+         if (is_register_lock)
+           gomp_mutex_unlock (&register_lock);
+         gomp_fatal ("Can't map target variables (size mismatch)");
+       }
+
+      splay_tree_key k = &array->key;
+      k->host_start = (uintptr_t) host_var_table[i * 2];
+      k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
+      k->tgt = tgt;
+      k->tgt_offset = target_var->start;
+      k->refcount = REFCOUNT_INFINITY;
+      k->async_refcount = 0;
+      array->left = NULL;
+      array->right = NULL;
+      splay_tree_insert (&devicep->mem_map, array);
+      array++;
+    }
+
+  free (target_table);
+}
+
+/* Unload the mappings described by target_data from device DEVICE_P.
+   The device must be locked.   */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+                              unsigned version,
+                              const void *host_table, const void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  unsigned j;
+  struct splay_tree_key_s k;
+  splay_tree_key node = NULL;
+
+  /* Find mapping at start of node array */
+  if (num_funcs || num_vars)
+    {
+      k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
+                     : (uintptr_t) host_var_table[0]);
+      k.host_end = k.host_start + 1;
+      node = splay_tree_lookup (&devicep->mem_map, &k);
+    }
+
+  devicep->unload_image_func (devicep->target_id, version, target_data);
+
+  /* Remove mappings from splay tree.  */
+  for (j = 0; j < num_funcs; j++)
+    {
+      k.host_start = (uintptr_t) host_func_table[j];
+      k.host_end = k.host_start + 1;
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  for (j = 0; j < num_vars; j++)
+    {
+      k.host_start = (uintptr_t) host_var_table[j * 2];
+      k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  if (node)
+    {
+      free (node->tgt);
+      free (node);
+    }
 }
 
-/* This function should be called from every offload image.
+/* This function should be called from every offload image while loading.
    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
    the target, and TARGET_DATA needed by target plugin.  */
 
 void
-GOMP_offload_register (void *host_table, enum offload_target_type target_type,
-                      void *target_data)
+GOMP_offload_register_ver (unsigned version, const void *host_table,
+                          int target_type, const void *target_data)
 {
-  offload_images = gomp_realloc (offload_images,
-                                (num_offload_images + 1)
-                                * sizeof (struct offload_image_descr));
+  int i;
+
+  if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
+    gomp_fatal ("Library too old for offload (version %u < %u)",
+               GOMP_VERSION, GOMP_VERSION_LIB (version));
+  
+  gomp_mutex_lock (&register_lock);
+
+  /* Load image to all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      if (devicep->type == target_type && devicep->is_initialized)
+       gomp_load_image_to_device (devicep, version,
+                                  host_table, target_data, true);
+      gomp_mutex_unlock (&devicep->lock);
+    }
 
+  /* Insert image to array of pending images.  */
+  offload_images
+    = gomp_realloc_unlock (offload_images,
+                          (num_offload_images + 1)
+                          * sizeof (struct offload_image_descr));
+  offload_images[num_offload_images].version = version;
   offload_images[num_offload_images].type = target_type;
   offload_images[num_offload_images].host_table = host_table;
   offload_images[num_offload_images].target_data = target_data;
 
   num_offload_images++;
+  gomp_mutex_unlock (&register_lock);
+}
+
+void
+GOMP_offload_register (const void *host_table, int target_type,
+                      const void *target_data)
+{
+  GOMP_offload_register_ver (0, host_table, target_type, target_data);
+}
+
+/* This function should be called from every offload image while unloading.
+   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
+   the target, and TARGET_DATA needed by target plugin.  */
+
+void
+GOMP_offload_unregister_ver (unsigned version, const void *host_table,
+                            int target_type, const void *target_data)
+{
+  int i;
+
+  gomp_mutex_lock (&register_lock);
+
+  /* Unload image from all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      if (devicep->type == target_type && devicep->is_initialized)
+       gomp_unload_image_from_device (devicep, version,
+                                      host_table, target_data);
+      gomp_mutex_unlock (&devicep->lock);
+    }
+
+  /* Remove image from array of pending images.  */
+  for (i = 0; i < num_offload_images; i++)
+    if (offload_images[i].target_data == target_data)
+      {
+       offload_images[i] = offload_images[--num_offload_images];
+       break;
+      }
+
+  gomp_mutex_unlock (&register_lock);
+}
+
+void
+GOMP_offload_unregister (const void *host_table, int target_type,
+                        const void *target_data)
+{
+  GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
 }
 
 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
@@ -661,66 +1180,55 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type,
 attribute_hidden void
 gomp_init_device (struct gomp_device_descr *devicep)
 {
+  int i;
   devicep->init_device_func (devicep->target_id);
+
+  /* Load to device all images registered by the moment.  */
+  for (i = 0; i < num_offload_images; i++)
+    {
+      struct offload_image_descr *image = &offload_images[i];
+      if (image->type == devicep->type)
+       gomp_load_image_to_device (devicep, image->version,
+                                  image->host_table, image->target_data,
+                                  false);
+    }
+
   devicep->is_initialized = true;
 }
 
-/* Initialize address mapping tables.  MM must be locked on entry, and remains
-   locked on return.  */
-
 attribute_hidden void
-gomp_init_tables (struct gomp_device_descr *devicep,
-                 struct gomp_memory_mapping *mm)
+gomp_unload_device (struct gomp_device_descr *devicep)
 {
-  /* Get address mapping table for device.  */
-  struct mapping_table *table = NULL;
-  int num_entries = devicep->get_table_func (devicep->target_id, &table);
-
-  /* Insert host-target address mapping into dev_splay_tree.  */
-  int i;
-  for (i = 0; i < num_entries; i++)
+  if (devicep->is_initialized)
     {
-      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
-      tgt->refcount = 1;
-      tgt->array = gomp_malloc (sizeof (*tgt->array));
-      tgt->tgt_start = table[i].tgt_start;
-      tgt->tgt_end = table[i].tgt_end;
-      tgt->to_free = NULL;
-      tgt->list_count = 0;
-      tgt->device_descr = devicep;
-      splay_tree_node node = tgt->array;
-      splay_tree_key k = &node->key;
-      k->host_start = table[i].host_start;
-      k->host_end = table[i].host_end;
-      k->tgt_offset = 0;
-      k->refcount = 1;
-      k->copy_from = false;
-      k->tgt = tgt;
-      node->left = NULL;
-      node->right = NULL;
-      splay_tree_insert (&mm->splay_tree, node);
+      unsigned i;
+      
+      /* Unload from device all images registered at the moment.  */
+      for (i = 0; i < num_offload_images; i++)
+       {
+         struct offload_image_descr *image = &offload_images[i];
+         if (image->type == devicep->type)
+           gomp_unload_image_from_device (devicep, image->version,
+                                          image->host_table,
+                                          image->target_data);
+       }
     }
-
-  free (table);
-  mm->is_initialized = true;
 }
 
 /* Free address mapping tables.  MM must be locked on entry, and remains locked
    on return.  */
 
 attribute_hidden void
-gomp_free_memmap (struct gomp_memory_mapping *mm)
+gomp_free_memmap (struct splay_tree_s *mem_map)
 {
-  while (mm->splay_tree.root)
+  while (mem_map->root)
     {
-      struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
+      struct target_mem_desc *tgt = mem_map->root->key.tgt;
 
-      splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
+      splay_tree_remove (mem_map, &mem_map->root->key);
       free (tgt->array);
       free (tgt);
     }
-
-  mm->is_initialized = false;
 }
 
 /* This function de-initializes the target device, specified by DEVICEP.
@@ -735,126 +1243,260 @@ gomp_fini_device (struct gomp_device_descr *devicep)
   devicep->is_initialized = false;
 }
 
-/* Called when encountering a target directive.  If DEVICE
-   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
-   GOMP_DEVICE_HOST_FALLBACK (or any value
-   larger than last available hw device), use host fallback.
-   FN is address of host code, UNUSED is part of the current ABI, but
-   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
-   with MAPNUM entries, with addresses of the host objects,
-   sizes of the host objects (resp. for pointer kind pointer bias
-   and assumed sizeof (void *) size) and kinds.  */
+/* Host fallback for GOMP_target{,_ext} routines.  */
 
-void
-GOMP_target (int device, void (*fn) (void *), const void *unused,
-            size_t mapnum, void **hostaddrs, size_t *sizes,
-            unsigned char *kinds)
+static void
+gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
 {
-  struct gomp_device_descr *devicep = resolve_device (device);
-
-  if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+  struct gomp_thread old_thr, *thr = gomp_thread ();
+  old_thr = *thr;
+  memset (thr, '\0', sizeof (*thr));
+  if (gomp_places_list)
     {
-      /* Host fallback.  */
-      struct gomp_thread old_thr, *thr = gomp_thread ();
-      old_thr = *thr;
-      memset (thr, '\0', sizeof (*thr));
-      if (gomp_places_list)
-       {
-         thr->place = old_thr.place;
-         thr->ts.place_partition_len = gomp_places_list_len;
-       }
-      fn (hostaddrs);
-      gomp_free_thread (thr);
-      *thr = old_thr;
-      return;
+      thr->place = old_thr.place;
+      thr->ts.place_partition_len = gomp_places_list_len;
     }
+  fn (hostaddrs);
+  gomp_free_thread (thr);
+  *thr = old_thr;
+}
 
-  gomp_mutex_lock (&devicep->lock);
-  if (!devicep->is_initialized)
-    gomp_init_device (devicep);
-  gomp_mutex_unlock (&devicep->lock);
+/* Host fallback with firstprivate map-type handling.  */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+                                  void **hostaddrs, size_t *sizes,
+                                  unsigned short *kinds)
+{
+  size_t i, tgt_align = 0, tgt_size = 0;
+  char *tgt = NULL;
+  for (i = 0; i < mapnum; i++)
+    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+      {
+       size_t align = (size_t) 1 << (kinds[i] >> 8);
+       if (tgt_align < align)
+         tgt_align = align;
+       tgt_size = (tgt_size + align - 1) & ~(align - 1);
+       tgt_size += sizes[i];
+      }
+  if (tgt_align)
+    {
+      tgt = gomp_alloca (tgt_size + tgt_align - 1);
+      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+      if (al)
+       tgt += tgt_align - al;
+      tgt_size = 0;
+      for (i = 0; i < mapnum; i++)
+       if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+         {
+           size_t align = (size_t) 1 << (kinds[i] >> 8);
+           tgt_size = (tgt_size + align - 1) & ~(align - 1);
+           memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+           hostaddrs[i] = tgt + tgt_size;
+           tgt_size = tgt_size + sizes[i];
+         }
+    }
+  gomp_target_fallback (fn, hostaddrs);
+}
 
-  void *fn_addr;
+/* Helper function of GOMP_target{,_ext} routines.  */
 
+static void *
+gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
+                        void (*host_fn) (void *))
+{
   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
-    fn_addr = (void *) fn;
+    return (void *) host_fn;
   else
     {
-      struct gomp_memory_mapping *mm = &devicep->mem_map;
-      gomp_mutex_lock (&mm->lock);
-
-      if (!mm->is_initialized)
-       gomp_init_tables (devicep, mm);
-
+      gomp_mutex_lock (&devicep->lock);
       struct splay_tree_key_s k;
-      k.host_start = (uintptr_t) fn;
+      k.host_start = (uintptr_t) host_fn;
       k.host_end = k.host_start + 1;
-      splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
+      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
+      gomp_mutex_unlock (&devicep->lock);
       if (tgt_fn == NULL)
        gomp_fatal ("Target function wasn't mapped");
 
-      gomp_mutex_unlock (&mm->lock);
-
-      fn_addr = (void *) tgt_fn->tgt->tgt_start;
+      return (void *) tgt_fn->tgt_offset;
     }
+}
+
+/* Called when encountering a target directive.  If DEVICE
+   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
+   GOMP_DEVICE_HOST_FALLBACK (or any value
+   larger than last available hw device), use host fallback.
+   FN is address of host code, UNUSED is part of the current ABI, but
+   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
+   with MAPNUM entries, with addresses of the host objects,
+   sizes of the host objects (resp. for pointer kind pointer bias
+   and assumed sizeof (void *) size) and kinds.  */
+
+void
+GOMP_target (int device, void (*fn) (void *), const void *unused,
+            size_t mapnum, void **hostaddrs, size_t *sizes,
+            unsigned char *kinds)
+{
+  struct gomp_device_descr *devicep = resolve_device (device);
+
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return gomp_target_fallback (fn, hostaddrs);
+
+  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
 
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
-                    true);
-  struct gomp_thread old_thr, *thr = gomp_thread ();
-  old_thr = *thr;
-  memset (thr, '\0', sizeof (*thr));
-  if (gomp_places_list)
-    {
-      thr->place = old_thr.place;
-      thr->ts.place_partition_len = gomp_places_list_len;
-    }
+                    GOMP_MAP_VARS_TARGET);
   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
-  gomp_free_thread (thr);
-  *thr = old_thr;
   gomp_unmap_vars (tgt_vars, true);
 }
 
+/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
+   and several arguments have been added:
+   FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
+   DEPEND is array of dependencies, see GOMP_task for details.
+   NUM_TEAMS is positive if GOMP_teams will be called in the body with
+   that value, or 1 if teams construct is not present, or 0, if
+   teams construct does not have num_teams clause and so the choice is
+   implementation defined, and -1 if it can't be determined on the host
+   what value will GOMP_teams have on the device.
+   THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
+   body with that value, or 0, if teams construct does not have thread_limit
+   clause or the teams construct is not present, or -1 if it can't be
+   determined on the host what value will GOMP_teams have on the device.  */
+
 void
-GOMP_target_data (int device, const void *unused, size_t mapnum,
-                 void **hostaddrs, size_t *sizes, unsigned char *kinds)
+GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
+                void **hostaddrs, size_t *sizes, unsigned short *kinds,
+                unsigned int flags, void **depend, int num_teams,
+                int thread_limit)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+  (void) num_teams;
+  (void) thread_limit;
+
+  if (flags & GOMP_TARGET_FLAG_NOWAIT)
     {
-      /* Host fallback.  */
-      struct gomp_task_icv *icv = gomp_icv (false);
-      if (icv->target_data)
+      struct gomp_thread *thr = gomp_thread ();
+      /* Create a team if we don't have any around, as nowait
+        target tasks make sense to run asynchronously even when
+        outside of any parallel.  */
+      if (__builtin_expect (thr->ts.team == NULL, 0))
+       {
+         struct gomp_team *team = gomp_new_team (1);
+         struct gomp_task *task = thr->task;
+         struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
+         team->prev_ts = thr->ts;
+         thr->ts.team = team;
+         thr->ts.team_id = 0;
+         thr->ts.work_share = &team->work_shares[0];
+         thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+         thr->ts.single_count = 0;
+#endif
+         thr->ts.static_trip = 0;
+         thr->task = &team->implicit_task[0];
+         gomp_init_task (thr->task, NULL, icv);
+         if (task)
+           {
+             thr->task = task;
+             gomp_end_task ();
+             free (task);
+             thr->task = &team->implicit_task[0];
+           }
+         else
+           pthread_setspecific (gomp_thread_destructor, thr);
+       }
+      if (thr->ts.team
+         && !thr->task->final_task)
        {
-         /* Even when doing a host fallback, if there are any active
-            #pragma omp target data constructs, need to remember the
-            new #pragma omp target data, otherwise GOMP_target_end_data
-            would get out of sync.  */
-         struct target_mem_desc *tgt
-           = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
-         tgt->prev = icv->target_data;
-         icv->target_data = tgt;
+         gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
+                                  sizes, kinds, flags, depend,
+                                  GOMP_TARGET_TASK_BEFORE_MAP);
+         return;
        }
+    }
+
+  /* If there are depend clauses, but nowait is not present
+     (or we are in a final task), block the parent task until the
+     dependencies are resolved and then just continue with the rest
+     of the function as if it is a merged task.  */
+  if (depend != NULL)
+    {
+      struct gomp_thread *thr = gomp_thread ();
+      if (thr->task && thr->task->depend_hash)
+       gomp_task_maybe_wait_for_dependencies (depend);
+    }
+
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    {
+      gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
       return;
     }
 
-  gomp_mutex_lock (&devicep->lock);
-  if (!devicep->is_initialized)
-    gomp_init_device (devicep);
-  gomp_mutex_unlock (&devicep->lock);
+  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+
+  struct target_mem_desc *tgt_vars
+    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+                    GOMP_MAP_VARS_TARGET);
+  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+  gomp_unmap_vars (tgt_vars, true);
+}
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
+/* Host fallback for GOMP_target_data{,_ext} routines.  */
+
+static void
+gomp_target_data_fallback (void)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  if (icv->target_data)
+    {
+      /* Even when doing a host fallback, if there are any active
+         #pragma omp target data constructs, need to remember the
+         new #pragma omp target data, otherwise GOMP_target_end_data
+         would get out of sync.  */
+      struct target_mem_desc *tgt
+       = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
+                        GOMP_MAP_VARS_DATA);
+      tgt->prev = icv->target_data;
+      icv->target_data = tgt;
+    }
+}
+
+void
+GOMP_target_data (int device, const void *unused, size_t mapnum,
+                 void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+  struct gomp_device_descr *devicep = resolve_device (device);
+
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return gomp_target_data_fallback ();
 
   struct target_mem_desc *tgt
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
-                    false);
+                    GOMP_MAP_VARS_DATA);
+  struct gomp_task_icv *icv = gomp_icv (true);
+  tgt->prev = icv->target_data;
+  icv->target_data = tgt;
+}
+
+void
+GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
+                     size_t *sizes, unsigned short *kinds)
+{
+  struct gomp_device_descr *devicep = resolve_device (device);
+
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return gomp_target_data_fallback ();
+
+  struct target_mem_desc *tgt
+    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+                    GOMP_MAP_VARS_DATA);
   struct gomp_task_icv *icv = gomp_icv (true);
   tgt->prev = icv->target_data;
   icv->target_data = tgt;
@@ -882,18 +1524,262 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     return;
 
+  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
+}
+
+void
+GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
+                       size_t *sizes, unsigned short *kinds,
+                       unsigned int flags, void **depend)
+{
+  struct gomp_device_descr *devicep = resolve_device (device);
+
+  /* If there are depend clauses, but nowait is not present,
+     block the parent task until the dependencies are resolved
+     and then just continue with the rest of the function as if it
+     is a merged task.  Until we are able to schedule task during
+     variable mapping or unmapping, ignore nowait if depend clauses
+     are not present.  */
+  if (depend != NULL)
+    {
+      struct gomp_thread *thr = gomp_thread ();
+      if (thr->task && thr->task->depend_hash)
+       {
+         if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+             && thr->ts.team
+             && !thr->task->final_task)
+           {
+             if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+                                          mapnum, hostaddrs, sizes, kinds,
+                                          flags | GOMP_TARGET_FLAG_UPDATE,
+                                          depend, GOMP_TARGET_TASK_DATA))
+               return;
+           }
+         else
+           {
+             struct gomp_team *team = thr->ts.team;
+             /* If parallel or taskgroup has been cancelled, don't start new
+                tasks.  */
+             if (team
+                 && (gomp_team_barrier_cancelled (&team->barrier)
+                     || (thr->task->taskgroup
+                         && thr->task->taskgroup->cancelled)))
+               return;
+
+             gomp_task_maybe_wait_for_dependencies (depend);
+           }
+       }
+    }
+
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return;
+
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team
+      && (gomp_team_barrier_cancelled (&team->barrier)
+         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+    return;
+
+  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
+}
+
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+               void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  const int typemask = 0xff;
+  size_t i;
   gomp_mutex_lock (&devicep->lock);
-  if (!devicep->is_initialized)
-    gomp_init_device (devicep);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+       {
+       case GOMP_MAP_FROM:
+       case GOMP_MAP_ALWAYS_FROM:
+       case GOMP_MAP_DELETE:
+       case GOMP_MAP_RELEASE:
+       case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+       case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
+         cur_node.host_start = (uintptr_t) hostaddrs[i];
+         cur_node.host_end = cur_node.host_start + sizes[i];
+         splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
+                             || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
+           ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
+           : splay_tree_lookup (&devicep->mem_map, &cur_node);
+         if (!k)
+           continue;
+
+         if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+           k->refcount--;
+         if ((kind == GOMP_MAP_DELETE
+              || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
+             && k->refcount != REFCOUNT_INFINITY)
+           k->refcount = 0;
+
+         if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+             || kind == GOMP_MAP_ALWAYS_FROM)
+           devicep->dev2host_func (devicep->target_id,
+                                   (void *) cur_node.host_start,
+                                   (void *) (k->tgt->tgt_start + k->tgt_offset
+                                             + cur_node.host_start
+                                             - k->host_start),
+                                   cur_node.host_end - cur_node.host_start);
+         if (k->refcount == 0)
+           {
+             splay_tree_remove (&devicep->mem_map, k);
+             if (k->tgt->refcount > 1)
+               k->tgt->refcount--;
+             else
+               gomp_unmap_tgt (k->tgt);
+           }
+
+         break;
+       default:
+         gomp_mutex_unlock (&devicep->lock);
+         gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+                     kind);
+       }
+    }
+
   gomp_mutex_unlock (&devicep->lock);
+}
+
+void
+GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
+                            size_t *sizes, unsigned short *kinds,
+                            unsigned int flags, void **depend)
+{
+  struct gomp_device_descr *devicep = resolve_device (device);
+
+  /* If there are depend clauses, but nowait is not present,
+     block the parent task until the dependencies are resolved
+     and then just continue with the rest of the function as if it
+     is a merged task.  Until we are able to schedule task during
+     variable mapping or unmapping, ignore nowait if depend clauses
+     are not present.  */
+  if (depend != NULL)
+    {
+      struct gomp_thread *thr = gomp_thread ();
+      if (thr->task && thr->task->depend_hash)
+       {
+         if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+             && thr->ts.team
+             && !thr->task->final_task)
+           {
+             if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+                                          mapnum, hostaddrs, sizes, kinds,
+                                          flags, depend,
+                                          GOMP_TARGET_TASK_DATA))
+               return;
+           }
+         else
+           {
+             struct gomp_team *team = thr->ts.team;
+             /* If parallel or taskgroup has been cancelled, don't start new
+                tasks.  */
+             if (team
+                 && (gomp_team_barrier_cancelled (&team->barrier)
+                     || (thr->task->taskgroup
+                         && thr->task->taskgroup->cancelled)))
+               return;
+
+             gomp_task_maybe_wait_for_dependencies (depend);
+           }
+       }
+    }
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
+  if (devicep == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return;
 
-  gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team
+      && (gomp_team_barrier_cancelled (&team->barrier)
+         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+    return;
+
+  size_t i;
+  if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+    for (i = 0; i < mapnum; i++)
+      if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+       {
+         gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
+                        &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+         i += sizes[i];
+       }
+      else
+       gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
+                      true, GOMP_MAP_VARS_ENTER_DATA);
+  else
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
+}
+
+bool
+gomp_target_task_fn (void *data)
+{
+  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+  struct gomp_device_descr *devicep = ttask->devicep;
+
+  if (ttask->fn != NULL)
+    {
+      if (devicep == NULL
+         || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       {
+         ttask->state = GOMP_TARGET_TASK_FALLBACK;
+         gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
+                                            ttask->hostaddrs, ttask->sizes,
+                                            ttask->kinds);
+         return false;
+       }
+
+      if (ttask->state == GOMP_TARGET_TASK_FINISHED)
+       {
+         gomp_unmap_vars (ttask->tgt, true);
+         return false;
+       }
+
+      void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
+      ttask->tgt
+       = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
+                        ttask->sizes, ttask->kinds, true,
+                        GOMP_MAP_VARS_TARGET);
+      ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
+
+      devicep->async_run_func (devicep->target_id, fn_addr,
+                              (void *) ttask->tgt->tgt_start, (void *) ttask);
+      return true;
+    }
+  else if (devicep == NULL
+          || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return false;
+
+  size_t i;
+  if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
+    gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+                ttask->kinds, true);
+  else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+    for (i = 0; i < ttask->mapnum; i++)
+      if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+       {
+         gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
+                        NULL, &ttask->sizes[i], &ttask->kinds[i], true,
+                        GOMP_MAP_VARS_ENTER_DATA);
+         i += ttask->sizes[i];
+       }
+      else
+       gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+                      &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+  else
+    gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+                   ttask->kinds);
+  return false;
 }
 
 void
@@ -908,6 +1794,384 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   (void) num_teams;
 }
 
+void *
+omp_target_alloc (size_t size, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return malloc (size);
+
+  if (device_num < 0)
+    return NULL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return NULL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return malloc (size);
+
+  gomp_mutex_lock (&devicep->lock);
+  void *ret = devicep->alloc_func (devicep->target_id, size);
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+void
+omp_target_free (void *device_ptr, int device_num)
+{
+  if (device_ptr == NULL)
+    return;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    {
+      free (device_ptr);
+      return;
+    }
+
+  if (device_num < 0)
+    return;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    {
+      free (device_ptr);
+      return;
+    }
+
+  gomp_mutex_lock (&devicep->lock);
+  devicep->free_func (devicep->target_id, device_ptr);
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+int
+omp_target_is_present (void *ptr, int device_num)
+{
+  if (ptr == NULL)
+    return 1;
+
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return 1;
+
+  if (device_num < 0)
+    return 0;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return 0;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return 1;
+
+  gomp_mutex_lock (&devicep->lock);
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+
+  cur_node.host_start = (uintptr_t) ptr;
+  cur_node.host_end = cur_node.host_start;
+  splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
+  int ret = n != NULL;
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+                  size_t src_offset, int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device_num < 0)
+       return EINVAL;
+
+      dst_devicep = resolve_device (dst_device_num);
+      if (dst_devicep == NULL)
+       return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       dst_devicep = NULL;
+    }
+  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device_num < 0)
+       return EINVAL;
+
+      src_devicep = resolve_device (src_device_num);
+      if (src_devicep == NULL)
+       return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       src_devicep = NULL;
+    }
+  if (src_devicep == NULL && dst_devicep == NULL)
+    {
+      memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
+      return 0;
+    }
+  if (src_devicep == NULL)
+    {
+      gomp_mutex_lock (&dst_devicep->lock);
+      dst_devicep->host2dev_func (dst_devicep->target_id,
+                                 (char *) dst + dst_offset,
+                                 (char *) src + src_offset, length);
+      gomp_mutex_unlock (&dst_devicep->lock);
+      return 0;
+    }
+  if (dst_devicep == NULL)
+    {
+      gomp_mutex_lock (&src_devicep->lock);
+      src_devicep->dev2host_func (src_devicep->target_id,
+                                 (char *) dst + dst_offset,
+                                 (char *) src + src_offset, length);
+      gomp_mutex_unlock (&src_devicep->lock);
+      return 0;
+    }
+  if (src_devicep == dst_devicep)
+    {
+      gomp_mutex_lock (&src_devicep->lock);
+      src_devicep->dev2dev_func (src_devicep->target_id,
+                                (char *) dst + dst_offset,
+                                (char *) src + src_offset, length);
+      gomp_mutex_unlock (&src_devicep->lock);
+      return 0;
+    }
+  return EINVAL;
+}
+
+static int
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+                              int num_dims, const size_t *volume,
+                              const size_t *dst_offsets,
+                              const size_t *src_offsets,
+                              const size_t *dst_dimensions,
+                              const size_t *src_dimensions,
+                              struct gomp_device_descr *dst_devicep,
+                              struct gomp_device_descr *src_devicep)
+{
+  size_t dst_slice = element_size;
+  size_t src_slice = element_size;
+  size_t j, dst_off, src_off, length;
+  int i, ret;
+
+  if (num_dims == 1)
+    {
+      if (__builtin_mul_overflow (element_size, volume[0], &length)
+         || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
+         || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+       return EINVAL;
+      if (dst_devicep == NULL && src_devicep == NULL)
+       memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+      else if (src_devicep == NULL)
+       dst_devicep->host2dev_func (dst_devicep->target_id,
+                                   (char *) dst + dst_off,
+                                   (char *) src + src_off, length);
+      else if (dst_devicep == NULL)
+       src_devicep->dev2host_func (src_devicep->target_id,
+                                   (char *) dst + dst_off,
+                                   (char *) src + src_off, length);
+      else if (src_devicep == dst_devicep)
+       src_devicep->dev2dev_func (src_devicep->target_id,
+                                  (char *) dst + dst_off,
+                                  (char *) src + src_off, length);
+      else
+       return EINVAL;
+      return 0;
+    }
+
+  /* FIXME: it would be nice to have some plugin function to handle
+     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
+     be handled in the generic recursion below, and for host-host it
+     should be used even for any num_dims >= 2.  */
+
+  for (i = 1; i < num_dims; i++)
+    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
+       || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
+      return EINVAL;
+  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
+      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
+    return EINVAL;
+  for (j = 0; j < volume[0]; j++)
+    {
+      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
+                                          (char *) src + src_off,
+                                          element_size, num_dims - 1,
+                                          volume + 1, dst_offsets + 1,
+                                          src_offsets + 1, dst_dimensions + 1,
+                                          src_dimensions + 1, dst_devicep,
+                                          src_devicep);
+      if (ret)
+       return ret;
+      dst_off += dst_slice;
+      src_off += src_slice;
+    }
+  return 0;
+}
+
+int
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+                       int num_dims, const size_t *volume,
+                       const size_t *dst_offsets,
+                       const size_t *src_offsets,
+                       const size_t *dst_dimensions,
+                       const size_t *src_dimensions,
+                       int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  if (!dst && !src)
+    return INT_MAX;
+
+  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (dst_device_num < 0)
+       return EINVAL;
+
+      dst_devicep = resolve_device (dst_device_num);
+      if (dst_devicep == NULL)
+       return EINVAL;
+
+      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       dst_devicep = NULL;
+    }
+  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+    {
+      if (src_device_num < 0)
+       return EINVAL;
+
+      src_devicep = resolve_device (src_device_num);
+      if (src_devicep == NULL)
+       return EINVAL;
+
+      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+       src_devicep = NULL;
+    }
+
+  if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
+    return EINVAL;
+
+  if (src_devicep)
+    gomp_mutex_lock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_lock (&dst_devicep->lock);
+  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
+                                          volume, dst_offsets, src_offsets,
+                                          dst_dimensions, src_dimensions,
+                                          dst_devicep, src_devicep);
+  if (src_devicep)
+    gomp_mutex_unlock (&src_devicep->lock);
+  else if (dst_devicep)
+    gomp_mutex_unlock (&dst_devicep->lock);
+  return ret;
+}
+
+int
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+                         size_t device_offset, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  gomp_mutex_lock (&devicep->lock);
+
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int ret = EINVAL;
+
+  cur_node.host_start = (uintptr_t) host_ptr;
+  cur_node.host_end = cur_node.host_start + size;
+  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+  if (n)
+    {
+      if (n->tgt->tgt_start + n->tgt_offset
+         == (uintptr_t) device_ptr + device_offset
+         && n->host_start <= cur_node.host_start
+         && n->host_end >= cur_node.host_end)
+       ret = 0;
+    }
+  else
+    {
+      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+      tgt->array = gomp_malloc (sizeof (*tgt->array));
+      tgt->refcount = 1;
+      tgt->tgt_start = 0;
+      tgt->tgt_end = 0;
+      tgt->to_free = NULL;
+      tgt->prev = NULL;
+      tgt->list_count = 0;
+      tgt->device_descr = devicep;
+      splay_tree_node array = tgt->array;
+      splay_tree_key k = &array->key;
+      k->host_start = cur_node.host_start;
+      k->host_end = cur_node.host_end;
+      k->tgt = tgt;
+      k->tgt_offset = (uintptr_t) device_ptr + device_offset;
+      k->refcount = REFCOUNT_INFINITY;
+      k->async_refcount = 0;
+      array->left = NULL;
+      array->right = NULL;
+      splay_tree_insert (&devicep->mem_map, array);
+      ret = 0;
+    }
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
+int
+omp_target_disassociate_ptr (void *ptr, int device_num)
+{
+  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+    return EINVAL;
+
+  if (device_num < 0)
+    return EINVAL;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+    return EINVAL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+    return EINVAL;
+
+  gomp_mutex_lock (&devicep->lock);
+
+  struct splay_tree_s *mem_map = &devicep->mem_map;
+  struct splay_tree_key_s cur_node;
+  int ret = EINVAL;
+
+  cur_node.host_start = (uintptr_t) ptr;
+  cur_node.host_end = cur_node.host_start;
+  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+  if (n
+      && n->host_start == cur_node.host_start
+      && n->refcount == REFCOUNT_INFINITY
+      && n->tgt->tgt_start == 0
+      && n->tgt->to_free == NULL
+      && n->tgt->refcount == 1
+      && n->tgt->list_count == 0)
+    {
+      splay_tree_remove (&devicep->mem_map, n);
+      gomp_unmap_tgt (n->tgt);
+      ret = 0;
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+  return ret;
+}
+
 #ifdef PLUGIN_SUPPORT
 
 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
@@ -920,128 +2184,102 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
                             const char *plugin_name)
 {
   const char *err = NULL, *last_missing = NULL;
-  int optional_present, optional_total;
-
-  /* Clear any existing error.  */
-  dlerror ();
 
   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
   if (!plugin_handle)
-    {
-      err = dlerror ();
-      goto out;
-    }
+    goto dl_fail;
 
   /* Check if all required functions are available in the plugin and store
-     their handlers.  */
+     their handlers.  None of the symbols can legitimately be NULL,
+     so we don't need to check dlerror all the time.  */
 #define DLSYM(f)                                                       \
-  do                                                                   \
-    {                                                                  \
-      device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f);    \
-      err = dlerror ();                                                        \
-      if (err != NULL)                                                 \
-       goto out;                                                       \
-    }                                                                  \
-  while (0)
-  /* Similar, but missing functions are not an error.  */
-#define DLSYM_OPT(f, n)                                                \
-  do                                                                   \
-    {                                                                  \
-      const char *tmp_err;                                                     \
-      device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n);    \
-      tmp_err = dlerror ();                                            \
-      if (tmp_err == NULL)                                             \
-        optional_present++;                                            \
-      else                                                             \
-        last_missing = #n;                                             \
-      optional_total++;                                                        \
-    }                                                                  \
-  while (0)
+  if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
+    goto dl_fail
+  /* Similar, but missing functions are not an error.  Return false if
+     failed, true otherwise.  */
+#define DLSYM_OPT(f, n)                                                        \
+  ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))      \
+   || (last_missing = #n, 0))
+
+  DLSYM (version);
+  if (device->version_func () != GOMP_VERSION)
+    {
+      err = "plugin version mismatch";
+      goto fail;
+    }
 
   DLSYM (get_name);
   DLSYM (get_caps);
   DLSYM (get_type);
   DLSYM (get_num_devices);
-  DLSYM (register_image);
   DLSYM (init_device);
   DLSYM (fini_device);
-  DLSYM (get_table);
+  DLSYM (load_image);
+  DLSYM (unload_image);
   DLSYM (alloc);
   DLSYM (free);
   DLSYM (dev2host);
   DLSYM (host2dev);
   device->capabilities = device->get_caps_func ();
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
-    DLSYM (run);
+    {
+      DLSYM (run);
+      DLSYM (async_run);
+      DLSYM (dev2dev);
+    }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
-      optional_present = optional_total = 0;
-      DLSYM_OPT (openacc.exec, openacc_parallel);
-      DLSYM_OPT (openacc.open_device, openacc_open_device);
-      DLSYM_OPT (openacc.close_device, openacc_close_device);
-      DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
-      DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
-      DLSYM_OPT (openacc.register_async_cleanup,
-                openacc_register_async_cleanup);
-      DLSYM_OPT (openacc.async_test, openacc_async_test);
-      DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
-      DLSYM_OPT (openacc.async_wait, openacc_async_wait);
-      DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
-      DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
-      DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
-      DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
-      DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
-      DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
-      /* Require all the OpenACC handlers if we have
-        GOMP_OFFLOAD_CAP_OPENACC_200.  */
-      if (optional_present != optional_total)
+      if (!DLSYM_OPT (openacc.exec, openacc_parallel)
+         || !DLSYM_OPT (openacc.register_async_cleanup,
+                        openacc_register_async_cleanup)
+         || !DLSYM_OPT (openacc.async_test, openacc_async_test)
+         || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
+         || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
+         || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
+         || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
+         || !DLSYM_OPT (openacc.async_wait_all_async,
+                        openacc_async_wait_all_async)
+         || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
+         || !DLSYM_OPT (openacc.create_thread_data,
+                        openacc_create_thread_data)
+         || !DLSYM_OPT (openacc.destroy_thread_data,
+                        openacc_destroy_thread_data))
        {
+         /* Require all the OpenACC handlers if we have
+            GOMP_OFFLOAD_CAP_OPENACC_200.  */
          err = "plugin missing OpenACC handler function";
-         goto out;
+         goto fail;
        }
-      optional_present = optional_total = 0;
-      DLSYM_OPT (openacc.cuda.get_current_device,
-                openacc_get_current_cuda_device);
-      DLSYM_OPT (openacc.cuda.get_current_context,
-                openacc_get_current_cuda_context);
-      DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
-      DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
-      /* Make sure all the CUDA functions are there if any of them are.  */
-      if (optional_present && optional_present != optional_total)
+
+      unsigned cuda = 0;
+      cuda += DLSYM_OPT (openacc.cuda.get_current_device,
+                        openacc_get_current_cuda_device);
+      cuda += DLSYM_OPT (openacc.cuda.get_current_context,
+                        openacc_get_current_cuda_context);
+      cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
+      cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
+      if (cuda && cuda != 4)
        {
+         /* Make sure all the CUDA functions are there if any of them are.  */
          err = "plugin missing OpenACC CUDA handler function";
-         goto out;
+         goto fail;
        }
     }
 #undef DLSYM
 #undef DLSYM_OPT
 
- out:
-  if (err != NULL)
-    {
-      gomp_error ("while loading %s: %s", plugin_name, err);
-      if (last_missing)
-        gomp_error ("missing function was %s", last_missing);
-      if (plugin_handle)
-       dlclose (plugin_handle);
-    }
-  return err == NULL;
-}
+  return 1;
 
-/* This function adds a compatible offload image IMAGE to an accelerator device
-   DEVICE.  DEVICE must be locked on entry, and remains locked on return.  */
+ dl_fail:
+  err = dlerror ();
+ fail:
+  gomp_error ("while loading %s: %s", plugin_name, err);
+  if (last_missing)
+    gomp_error ("missing function was %s", last_missing);
+  if (plugin_handle)
+    dlclose (plugin_handle);
 
-static void
-gomp_register_image_for_device (struct gomp_device_descr *device,
-                               struct offload_image_descr *image)
-{
-  if (!device->offload_regions_registered
-      && (device->type == image->type
-         || device->type == OFFLOAD_TARGET_TYPE_HOST))
-    {
-      device->register_image_func (image->host_table, image->target_data);
-      device->offload_regions_registered = true;
-    }
+  return 0;
 }
 
 /* This function initializes the runtime needed for offloading.
@@ -1102,17 +2340,13 @@ gomp_target_init (void)
                current_device.name = current_device.get_name_func ();
                /* current_device.capabilities has already been set.  */
                current_device.type = current_device.get_type_func ();
-               current_device.mem_map.is_initialized = false;
-               current_device.mem_map.splay_tree.root = NULL;
+               current_device.mem_map.root = NULL;
                current_device.is_initialized = false;
-               current_device.offload_regions_registered = false;
                current_device.openacc.data_environ = NULL;
-               current_device.openacc.target_data = NULL;
                for (i = 0; i < new_num_devices; i++)
                  {
                    current_device.target_id = i;
                    devices[num_devices] = current_device;
-                   gomp_mutex_init (&devices[num_devices].mem_map.lock);
                    gomp_mutex_init (&devices[num_devices].lock);
                    num_devices++;
                  }
@@ -1147,21 +2381,12 @@ gomp_target_init (void)
 
   for (i = 0; i < num_devices; i++)
     {
-      int j;
-
-      for (j = 0; j < num_offload_images; j++)
-       gomp_register_image_for_device (&devices[i], &offload_images[j]);
-
       /* The 'devices' array can be moved (by the realloc call) until we have
         found all the plugins, so registering with the OpenACC runtime (which
         takes a copy of the pointer argument) must be delayed until now.  */
       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
        goacc_register (&devices[i]);
     }
-
-  free (offload_images);
-  offload_images = NULL;
-  num_offload_images = 0;
 }
 
 #else /* PLUGIN_SUPPORT */