#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h> /* For PRIu64. */
+#endif
#include <string.h>
#include <assert.h>
+#include <errno.h>
#ifdef PLUGIN_SUPPORT
#include <dlfcn.h>
/* The whole initialization code for offloading plugins is only run one. */
static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
+/* Mutex for offload image registration. */
+static gomp_mutex_t register_lock;
+
/* This structure describes an offload image.
It contains type of the target device, pointer to host table descriptor, and
pointer to target data. */
struct offload_image_descr {
+ unsigned version;
enum offload_target_type type;
- void *host_table;
- void *target_data;
+ const void *host_table;
+ const void *target_data;
};
/* Array of descriptors of offload images. */
/* Array of descriptors for all available devices. */
static struct gomp_device_descr *devices;
-#ifdef PLUGIN_SUPPORT
/* Total number of available devices. */
static int num_devices;
-#endif
/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
static int num_devices_openmp;
-/* The comparison function. */
+/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
-attribute_hidden int
-splay_compare (splay_tree_key x, splay_tree_key y)
+static void *
+gomp_realloc_unlock (void *old, size_t size)
{
- if (x->host_start == x->host_end
- && y->host_start == y->host_end)
- return 0;
- if (x->host_end <= y->host_start)
- return -1;
- if (x->host_start >= y->host_end)
- return 1;
- return 0;
+ void *ret = realloc (old, size);
+ if (ret == NULL)
+ {
+ gomp_mutex_unlock (®ister_lock);
+ gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
+ }
+ return ret;
}
-#include "splay-tree.h"
-
attribute_hidden void
gomp_init_targets_once (void)
{
if (device_id < 0 || device_id >= gomp_get_num_devices ())
return NULL;
+ gomp_mutex_lock (&devices[device_id].lock);
+ if (!devices[device_id].is_initialized)
+ gomp_init_device (&devices[device_id]);
+ gomp_mutex_unlock (&devices[device_id].lock);
+
return &devices[device_id];
}
-/* Handle the case where splay_tree_lookup found oldn for newn.
+static inline splay_tree_key
+gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
+{
+ if (key->host_start != key->host_end)
+ return splay_tree_lookup (mem_map, key);
+
+ key->host_end++;
+ splay_tree_key n = splay_tree_lookup (mem_map, key);
+ key->host_end--;
+ if (n)
+ return n;
+ key->host_start--;
+ n = splay_tree_lookup (mem_map, key);
+ key->host_start++;
+ if (n)
+ return n;
+ return splay_tree_lookup (mem_map, key);
+}
+
+static inline splay_tree_key
+gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
+{
+ if (key->host_start != key->host_end)
+ return splay_tree_lookup (mem_map, key);
+
+ key->host_end++;
+ splay_tree_key n = splay_tree_lookup (mem_map, key);
+ key->host_end--;
+ return n;
+}
+
+/* Handle the case where gomp_map_lookup, splay_tree_lookup or
+ gomp_map_0len_lookup found oldn for newn.
Helper function of gomp_map_vars. */
static inline void
-gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+ splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind)
{
+ tgt_var->key = oldn;
+ tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
+ tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
+ tgt_var->offset = newn->host_start - oldn->host_start;
+ tgt_var->length = newn->host_end - newn->host_start;
+
if ((kind & GOMP_MAP_FLAG_FORCE)
|| oldn->host_start > newn->host_start
|| oldn->host_end < newn->host_end)
- gomp_fatal ("Trying to map into device [%p..%p) object when "
- "[%p..%p) is already mapped",
- (void *) newn->host_start, (void *) newn->host_end,
- (void *) oldn->host_start, (void *) oldn->host_end);
- oldn->refcount++;
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Trying to map into device [%p..%p) object when "
+ "[%p..%p) is already mapped",
+ (void *) newn->host_start, (void *) newn->host_end,
+ (void *) oldn->host_start, (void *) oldn->host_end);
+ }
+
+ if (GOMP_MAP_ALWAYS_TO_P (kind))
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ + newn->host_start - oldn->host_start),
+ (void *) newn->host_start,
+ newn->host_end - newn->host_start);
+ if (oldn->refcount != REFCOUNT_INFINITY)
+ oldn->refcount++;
}
static int
-get_kind (bool is_openacc, void *kinds, int idx)
+get_kind (bool short_mapkind, void *kinds, int idx)
+{
+ return short_mapkind ? ((unsigned short *) kinds)[idx]
+ : ((unsigned char *) kinds)[idx];
+}
+
+static void
+gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
+ uintptr_t target_offset, uintptr_t bias)
+{
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+
+ cur_node.host_start = host_ptr;
+ if (cur_node.host_start == (uintptr_t) NULL)
+ {
+ cur_node.tgt_offset = (uintptr_t) NULL;
+ /* FIXME: see comment about coalescing host/dev transfers below. */
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *));
+ return;
+ }
+ /* Add bias to the pointer value. */
+ cur_node.host_start += bias;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Pointer target of array section wasn't mapped");
+ }
+ cur_node.host_start -= n->host_start;
+ cur_node.tgt_offset
+ = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
+ /* At this point tgt_offset is target address of the
+ array section. Now subtract bias to get what we want
+ to initialize the pointer with. */
+ cur_node.tgt_offset -= bias;
+ /* FIXME: see comment about coalescing host/dev transfers below. */
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *));
+}
+
+static void
+gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+ size_t first, size_t i, void **hostaddrs,
+ size_t *sizes, void *kinds)
+{
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int kind;
+ const bool short_mapkind = true;
+ const int typemask = short_mapkind ? 0xff : 0x7;
+
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + sizes[i];
+ splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
+ kind = get_kind (short_mapkind, kinds, i);
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node,
+ &tgt->list[i], kind & typemask);
+ return;
+ }
+ if (sizes[i] == 0)
+ {
+ if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
+ {
+ cur_node.host_start--;
+ n2 = splay_tree_lookup (mem_map, &cur_node);
+ cur_node.host_start++;
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start
+ == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+ kind & typemask);
+ return;
+ }
+ }
+ cur_node.host_end++;
+ n2 = splay_tree_lookup (mem_map, &cur_node);
+ cur_node.host_end--;
+ if (n2
+ && n2->tgt == n->tgt
+ && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+ kind & typemask);
+ return;
+ }
+ }
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Trying to map into device [%p..%p) structure element when "
+ "other mapped elements from the same structure weren't mapped "
+ "together with it", (void *) cur_node.host_start,
+ (void *) cur_node.host_end);
+}
+
+static inline uintptr_t
+gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
{
- return is_openacc ? ((unsigned short *) kinds)[idx]
- : ((unsigned char *) kinds)[idx];
+ if (tgt->list[i].key != NULL)
+ return tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset
+ + tgt->list[i].offset;
+ if (tgt->list[i].offset == ~(uintptr_t) 0)
+ return (uintptr_t) hostaddrs[i];
+ if (tgt->list[i].offset == ~(uintptr_t) 1)
+ return 0;
+ if (tgt->list[i].offset == ~(uintptr_t) 2)
+ return tgt->list[i + 1].key->tgt->tgt_start
+ + tgt->list[i + 1].key->tgt_offset
+ + tgt->list[i + 1].offset
+ + (uintptr_t) hostaddrs[i]
+ - (uintptr_t) hostaddrs[i + 1];
+ return tgt->tgt_start + tgt->list[i].offset;
}
attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
- bool is_openacc, bool is_target)
+ bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
- const int rshift = is_openacc ? 8 : 3;
- const int typemask = is_openacc ? 0xff : 0x7;
- struct gomp_memory_mapping *mm = &devicep->mem_map;
+ bool has_firstprivate = false;
+ const int rshift = short_mapkind ? 8 : 3;
+ const int typemask = short_mapkind ? 0xff : 0x7;
+ struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
struct target_mem_desc *tgt
= gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
tgt->list_count = mapnum;
- tgt->refcount = 1;
+ tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
tgt->device_descr = devicep;
- tgt->mem_map = mm;
if (mapnum == 0)
- return tgt;
+ {
+ tgt->tgt_start = 0;
+ tgt->tgt_end = 0;
+ return tgt;
+ }
tgt_align = sizeof (void *);
tgt_size = 0;
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
size_t align = 4 * sizeof (void *);
tgt_align = align;
tgt_size = mapnum * sizeof (void *);
}
- gomp_mutex_lock (&mm->lock);
+ gomp_mutex_lock (&devicep->lock);
for (i = 0; i < mapnum; i++)
{
- int kind = get_kind (is_openacc, kinds, i);
- if (hostaddrs[i] == NULL)
+ int kind = get_kind (short_mapkind, kinds, i);
+ if (hostaddrs[i] == NULL
+ || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
+ {
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 0;
+ continue;
+ }
+ else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
+ {
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("use_device_ptr pointer wasn't mapped");
+ }
+ cur_node.host_start -= n->host_start;
+ hostaddrs[i]
+ = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start);
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 0;
+ continue;
+ }
+ else if ((kind & typemask) == GOMP_MAP_STRUCT)
+ {
+ size_t first = i + 1;
+ size_t last = i + sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = (uintptr_t) hostaddrs[last]
+ + sizes[last];
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 2;
+ splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ size_t align = (size_t) 1 << (kind >> rshift);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size -= (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
+ not_found_cnt += last - i;
+ for (i = first; i <= last; i++)
+ tgt->list[i].key = NULL;
+ i--;
+ continue;
+ }
+ for (i = first; i <= last; i++)
+ gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ sizes, kinds);
+ i--;
+ continue;
+ }
+ else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 1;
+ has_firstprivate = true;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
- splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
- if (n)
+ if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ tgt->list[i].key = NULL;
+
+ size_t align = (size_t) 1 << (kind >> rshift);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += cur_node.host_end - cur_node.host_start;
+ has_firstprivate = true;
+ continue;
+ }
+ splay_tree_key n;
+ if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
{
- tgt->list[i] = n;
- gomp_map_vars_existing (n, &cur_node, kind & typemask);
+ n = gomp_map_0len_lookup (mem_map, &cur_node);
+ if (!n)
+ {
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = ~(uintptr_t) 1;
+ continue;
+ }
}
+ else
+ n = splay_tree_lookup (mem_map, &cur_node);
+ if (n)
+ gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+ kind & typemask);
else
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
size_t align = (size_t) 1 << (kind >> rshift);
not_found_cnt++;
{
size_t j;
for (j = i + 1; j < mapnum; j++)
- if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
break;
else
{
- tgt->list[j] = NULL;
+ tgt->list[j].key = NULL;
i++;
}
}
if (devaddrs)
{
if (mapnum != 1)
- gomp_fatal ("unexpected aggregation");
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("unexpected aggregation");
+ }
tgt->to_free = devaddrs[0];
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_end = tgt->tgt_start + sizes[0];
}
- else if (not_found_cnt || is_target)
+ else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
{
/* Allocate tgt_align aligned tgt_size block of memory. */
/* FIXME: Perhaps change interface to allocate properly aligned
}
tgt_size = 0;
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
tgt_size = mapnum * sizeof (void *);
tgt->array = NULL;
- if (not_found_cnt)
+ if (not_found_cnt || has_firstprivate)
{
- tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
+ if (not_found_cnt)
+ tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
splay_tree_node array = tgt->array;
- size_t j;
+ size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
+ uintptr_t field_tgt_base = 0;
for (i = 0; i < mapnum; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
{
- int kind = get_kind (is_openacc, kinds, i);
+ int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
continue;
+ switch (kind & typemask)
+ {
+ size_t align, len, first, last;
+ splay_tree_key n;
+ case GOMP_MAP_FIRSTPRIVATE:
+ align = (size_t) 1 << (kind >> rshift);
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt->list[i].offset = tgt_size;
+ len = sizes[i];
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (tgt->tgt_start + tgt_size),
+ (void *) hostaddrs[i], len);
+ tgt_size += len;
+ continue;
+ case GOMP_MAP_FIRSTPRIVATE_INT:
+ case GOMP_MAP_USE_DEVICE_PTR:
+ case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+ continue;
+ case GOMP_MAP_STRUCT:
+ first = i + 1;
+ last = i + sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = (uintptr_t) hostaddrs[last]
+ + sizes[last];
+ if (tgt->list[first].key != NULL)
+ continue;
+ n = splay_tree_lookup (mem_map, &cur_node);
+ if (n == NULL)
+ {
+ size_t align = (size_t) 1 << (kind >> rshift);
+ tgt_size -= (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += (uintptr_t) hostaddrs[first]
+ - (uintptr_t) hostaddrs[i];
+ field_tgt_base = (uintptr_t) hostaddrs[first];
+ field_tgt_offset = tgt_size;
+ field_tgt_clear = last;
+ tgt_size += cur_node.host_end
+ - (uintptr_t) hostaddrs[first];
+ continue;
+ }
+ for (i = first; i <= last; i++)
+ gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ sizes, kinds);
+ i--;
+ continue;
+ case GOMP_MAP_ALWAYS_POINTER:
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (mem_map, &cur_node);
+ if (n == NULL
+ || n->host_start > cur_node.host_start
+ || n->host_end < cur_node.host_end)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("always pointer not mapped");
+ }
+ if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
+ != GOMP_MAP_ALWAYS_POINTER)
+ cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
+ if (cur_node.tgt_offset)
+ cur_node.tgt_offset -= sizes[i];
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (n->tgt->tgt_start
+ + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *));
+ cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start - n->host_start;
+ continue;
+ default:
+ break;
+ }
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) hostaddrs[i];
if (!GOMP_MAP_POINTER_P (kind & typemask))
k->host_end = k->host_start + sizes[i];
else
k->host_end = k->host_start + sizeof (void *);
- splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
+ splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n)
- {
- tgt->list[i] = n;
- gomp_map_vars_existing (n, k, kind & typemask);
- }
+ gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+ kind & typemask);
else
{
size_t align = (size_t) 1 << (kind >> rshift);
- tgt->list[i] = k;
- tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt->list[i].key = k;
k->tgt = tgt;
- k->tgt_offset = tgt_size;
- tgt_size += k->host_end - k->host_start;
- k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ if (field_tgt_clear != ~(size_t) 0)
+ {
+ k->tgt_offset = k->host_start - field_tgt_base
+ + field_tgt_offset;
+ if (i == field_tgt_clear)
+ field_tgt_clear = ~(size_t) 0;
+ }
+ else
+ {
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ k->tgt_offset = tgt_size;
+ tgt_size += k->host_end - k->host_start;
+ }
+ tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].always_copy_from
+ = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
+ tgt->list[i].offset = 0;
+ tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
k->async_refcount = 0;
tgt->refcount++;
array->left = NULL;
array->right = NULL;
- splay_tree_insert (&mm->splay_tree, array);
+ splay_tree_insert (mem_map, array);
switch (kind & typemask)
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_ALLOC:
case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
break;
case GOMP_MAP_TO:
case GOMP_MAP_TOFROM:
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_FORCE_TOFROM:
+ case GOMP_MAP_ALWAYS_TO:
+ case GOMP_MAP_ALWAYS_TOFROM:
/* FIXME: Perhaps add some smarts, like if copying
several adjacent fields from host to target, use some
host buffer to avoid sending each var individually. */
k->host_end - k->host_start);
break;
case GOMP_MAP_POINTER:
- cur_node.host_start
- = (uintptr_t) *(void **) k->host_start;
- if (cur_node.host_start == (uintptr_t) NULL)
- {
- cur_node.tgt_offset = (uintptr_t) NULL;
- /* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
- break;
- }
- /* Add bias to the pointer value. */
- cur_node.host_start += sizes[i];
- cur_node.host_end = cur_node.host_start + 1;
- n = splay_tree_lookup (&mm->splay_tree, &cur_node);
- if (n == NULL)
- {
- /* Could be possibly zero size array section. */
- cur_node.host_end--;
- n = splay_tree_lookup (&mm->splay_tree, &cur_node);
- if (n == NULL)
- {
- cur_node.host_start--;
- n = splay_tree_lookup (&mm->splay_tree, &cur_node);
- cur_node.host_start++;
- }
- }
- if (n == NULL)
- gomp_fatal ("Pointer target of array section "
- "wasn't mapped");
- cur_node.host_start -= n->host_start;
- cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
- + cur_node.host_start;
- /* At this point tgt_offset is target address of the
- array section. Now subtract bias to get what we want
- to initialize the pointer with. */
- cur_node.tgt_offset -= sizes[i];
- /* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+ k->tgt_offset, sizes[i]);
break;
case GOMP_MAP_TO_PSET:
/* FIXME: see above FIXME comment. */
k->host_end - k->host_start);
for (j = i + 1; j < mapnum; j++)
- if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
+ j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < k->host_start
break;
else
{
- tgt->list[j] = k;
- k->refcount++;
- cur_node.host_start
- = (uintptr_t) *(void **) hostaddrs[j];
- if (cur_node.host_start == (uintptr_t) NULL)
- {
- cur_node.tgt_offset = (uintptr_t) NULL;
- /* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start + k->tgt_offset
- + ((uintptr_t) hostaddrs[j]
- - k->host_start)),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
- i++;
- continue;
- }
- /* Add bias to the pointer value. */
- cur_node.host_start += sizes[j];
- cur_node.host_end = cur_node.host_start + 1;
- n = splay_tree_lookup (&mm->splay_tree, &cur_node);
- if (n == NULL)
- {
- /* Could be possibly zero size array section. */
- cur_node.host_end--;
- n = splay_tree_lookup (&mm->splay_tree,
- &cur_node);
- if (n == NULL)
- {
- cur_node.host_start--;
- n = splay_tree_lookup (&mm->splay_tree,
- &cur_node);
- cur_node.host_start++;
- }
- }
- if (n == NULL)
- gomp_fatal ("Pointer target of array section "
- "wasn't mapped");
- cur_node.host_start -= n->host_start;
- cur_node.tgt_offset = n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start;
- /* At this point tgt_offset is target address of the
- array section. Now subtract bias to get what we
- want to initialize the pointer with. */
- cur_node.tgt_offset -= sizes[j];
- /* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start + k->tgt_offset
- + ((uintptr_t) hostaddrs[j]
- - k->host_start)),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ tgt->list[j].key = k;
+ tgt->list[j].copy_from = false;
+ tgt->list[j].always_copy_from = false;
+ if (k->refcount != REFCOUNT_INFINITY)
+ k->refcount++;
+ gomp_map_pointer (tgt,
+ (uintptr_t) *(void **) hostaddrs[j],
+ k->tgt_offset
+ + ((uintptr_t) hostaddrs[j]
+ - k->host_start),
+ sizes[j]);
i++;
}
break;
/* We already looked up the memory region above and it
was missing. */
size_t size = k->host_end - k->host_start;
+ gomp_mutex_unlock (&devicep->lock);
+#ifdef HAVE_INTTYPES_H
+ gomp_fatal ("present clause: !acc_is_present (%p, "
+ "%"PRIu64" (0x%"PRIx64"))",
+ (void *) k->host_start,
+ (uint64_t) size, (uint64_t) size);
+#else
gomp_fatal ("present clause: !acc_is_present (%p, "
- "%zd (0x%zx))", (void *) k->host_start,
- size, size);
+ "%lu (0x%lx))", (void *) k->host_start,
+ (unsigned long) size, (unsigned long) size);
+#endif
}
break;
case GOMP_MAP_FORCE_DEVICEPTR:
sizeof (void *));
break;
default:
+ gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
kind);
}
}
}
- if (is_target)
+ if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
for (i = 0; i < mapnum; i++)
{
- if (tgt->list[i] == NULL)
- cur_node.tgt_offset = (uintptr_t) NULL;
- else
- cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset;
+ cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
/* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
}
}
- gomp_mutex_unlock (&mm->lock);
+ /* If the variable from "omp target enter data" map-list was already mapped,
+ tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
+ gomp_exit_data. */
+ if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
+ {
+ free (tgt);
+ tgt = NULL;
+ }
+
+ gomp_mutex_unlock (&devicep->lock);
return tgt;
}
gomp_copy_from_async (struct target_mem_desc *tgt)
{
struct gomp_device_descr *devicep = tgt->device_descr;
- struct gomp_memory_mapping *mm = tgt->mem_map;
size_t i;
- gomp_mutex_lock (&mm->lock);
+ gomp_mutex_lock (&devicep->lock);
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
;
- else if (tgt->list[i]->refcount > 1)
+ else if (tgt->list[i].key->refcount > 1)
{
- tgt->list[i]->refcount--;
- tgt->list[i]->async_refcount++;
+ tgt->list[i].key->refcount--;
+ tgt->list[i].key->async_refcount++;
}
else
{
- splay_tree_key k = tgt->list[i];
- if (k->copy_from)
+ splay_tree_key k = tgt->list[i].key;
+ if (tgt->list[i].copy_from)
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset),
k->host_end - k->host_start);
}
- gomp_mutex_unlock (&mm->lock);
+ gomp_mutex_unlock (&devicep->lock);
}
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
{
struct gomp_device_descr *devicep = tgt->device_descr;
- struct gomp_memory_mapping *mm = tgt->mem_map;
if (tgt->list_count == 0)
{
return;
}
- gomp_mutex_lock (&mm->lock);
+ gomp_mutex_lock (&devicep->lock);
size_t i;
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
- ;
- else if (tgt->list[i]->refcount > 1)
- tgt->list[i]->refcount--;
- else if (tgt->list[i]->async_refcount > 0)
- tgt->list[i]->async_refcount--;
- else
- {
- splay_tree_key k = tgt->list[i];
- if (k->copy_from && do_copyfrom)
- devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
- (void *) (k->tgt->tgt_start + k->tgt_offset),
- k->host_end - k->host_start);
- splay_tree_remove (&mm->splay_tree, k);
- if (k->tgt->refcount > 1)
- k->tgt->refcount--;
- else
- gomp_unmap_tgt (k->tgt);
- }
+ {
+ splay_tree_key k = tgt->list[i].key;
+ if (k == NULL)
+ continue;
+
+ bool do_unmap = false;
+ if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+ k->refcount--;
+ else if (k->refcount == 1)
+ {
+ if (k->async_refcount > 0)
+ k->async_refcount--;
+ else
+ {
+ k->refcount--;
+ do_unmap = true;
+ }
+ }
+
+ if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+ || tgt->list[i].always_copy_from)
+ devicep->dev2host_func (devicep->target_id,
+ (void *) (k->host_start + tgt->list[i].offset),
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + tgt->list[i].offset),
+ tgt->list[i].length);
+ if (do_unmap)
+ {
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ gomp_unmap_tgt (k->tgt);
+ }
+ }
if (tgt->refcount > 1)
tgt->refcount--;
else
gomp_unmap_tgt (tgt);
- gomp_mutex_unlock (&mm->lock);
+ gomp_mutex_unlock (&devicep->lock);
}
static void
-gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
- size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
- bool is_openacc)
+gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
+ size_t *sizes, void *kinds, bool short_mapkind)
{
size_t i;
struct splay_tree_key_s cur_node;
- const int typemask = is_openacc ? 0xff : 0x7;
+ const int typemask = short_mapkind ? 0xff : 0x7;
if (!devicep)
return;
if (mapnum == 0)
return;
- gomp_mutex_lock (&mm->lock);
+ gomp_mutex_lock (&devicep->lock);
for (i = 0; i < mapnum; i++)
if (sizes[i])
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
- splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
- &cur_node);
+ splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
if (n)
{
- int kind = get_kind (is_openacc, kinds, i);
+ int kind = get_kind (short_mapkind, kinds, i);
if (n->host_start > cur_node.host_start
|| n->host_end < cur_node.host_end)
- gomp_fatal ("Trying to update [%p..%p) object when"
- "only [%p..%p) is mapped",
- (void *) cur_node.host_start,
- (void *) cur_node.host_end,
- (void *) n->host_start,
- (void *) n->host_end);
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Trying to update [%p..%p) object when "
+ "only [%p..%p) is mapped",
+ (void *) cur_node.host_start,
+ (void *) cur_node.host_end,
+ (void *) n->host_start,
+ (void *) n->host_end);
+ }
if (GOMP_MAP_COPY_TO_P (kind & typemask))
devicep->host2dev_func (devicep->target_id,
(void *) (n->tgt->tgt_start
- n->host_start),
cur_node.host_end - cur_node.host_start);
}
- else
- gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
- (void *) cur_node.host_start,
- (void *) cur_node.host_end);
}
- gomp_mutex_unlock (&mm->lock);
+ gomp_mutex_unlock (&devicep->lock);
+}
+
+/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
+ And insert to splay tree the mapping between addresses from HOST_TABLE and
+ from loaded target image. We rely in the host and device compiler
+ emitting variable and functions in the same order. */
+
+static void
+gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
+ const void *host_table, const void *target_data,
+ bool is_register_lock)
+{
+ void **host_func_table = ((void ***) host_table)[0];
+ void **host_funcs_end = ((void ***) host_table)[1];
+ void **host_var_table = ((void ***) host_table)[2];
+ void **host_vars_end = ((void ***) host_table)[3];
+
+ /* The func table contains only addresses, the var table contains addresses
+ and corresponding sizes. */
+ int num_funcs = host_funcs_end - host_func_table;
+ int num_vars = (host_vars_end - host_var_table) / 2;
+
+ /* Load image to device and get target addresses for the image. */
+ struct addr_pair *target_table = NULL;
+ int i, num_target_entries;
+
+ num_target_entries
+ = devicep->load_image_func (devicep->target_id, version,
+ target_data, &target_table);
+
+ if (num_target_entries != num_funcs + num_vars)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ if (is_register_lock)
+ gomp_mutex_unlock (®ister_lock);
+ gomp_fatal ("Cannot map target functions or variables"
+ " (expected %u, have %u)", num_funcs + num_vars,
+ num_target_entries);
+ }
+
+ /* Insert host-target address mapping into splay tree. */
+ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+ tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
+ tgt->refcount = REFCOUNT_INFINITY;
+ tgt->tgt_start = 0;
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
+ tgt->prev = NULL;
+ tgt->list_count = 0;
+ tgt->device_descr = devicep;
+ splay_tree_node array = tgt->array;
+
+ for (i = 0; i < num_funcs; i++)
+ {
+ splay_tree_key k = &array->key;
+ k->host_start = (uintptr_t) host_func_table[i];
+ k->host_end = k->host_start + 1;
+ k->tgt = tgt;
+ k->tgt_offset = target_table[i].start;
+ k->refcount = REFCOUNT_INFINITY;
+ k->async_refcount = 0;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ array++;
+ }
+
+ for (i = 0; i < num_vars; i++)
+ {
+ struct addr_pair *target_var = &target_table[num_funcs + i];
+ if (target_var->end - target_var->start
+ != (uintptr_t) host_var_table[i * 2 + 1])
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ if (is_register_lock)
+ gomp_mutex_unlock (®ister_lock);
+ gomp_fatal ("Can't map target variables (size mismatch)");
+ }
+
+ splay_tree_key k = &array->key;
+ k->host_start = (uintptr_t) host_var_table[i * 2];
+ k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
+ k->tgt = tgt;
+ k->tgt_offset = target_var->start;
+ k->refcount = REFCOUNT_INFINITY;
+ k->async_refcount = 0;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ array++;
+ }
+
+ free (target_table);
+}
+
+/* Unload the mappings described by target_data from device DEVICE_P.
+ The device must be locked. */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+ unsigned version,
+ const void *host_table, const void *target_data)
+{
+ void **host_func_table = ((void ***) host_table)[0];
+ void **host_funcs_end = ((void ***) host_table)[1];
+ void **host_var_table = ((void ***) host_table)[2];
+ void **host_vars_end = ((void ***) host_table)[3];
+
+ /* The func table contains only addresses, the var table contains addresses
+ and corresponding sizes. */
+ int num_funcs = host_funcs_end - host_func_table;
+ int num_vars = (host_vars_end - host_var_table) / 2;
+
+ unsigned j;
+ struct splay_tree_key_s k;
+ splay_tree_key node = NULL;
+
+ /* Find mapping at start of node array */
+ if (num_funcs || num_vars)
+ {
+ k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
+ : (uintptr_t) host_var_table[0]);
+ k.host_end = k.host_start + 1;
+ node = splay_tree_lookup (&devicep->mem_map, &k);
+ }
+
+ devicep->unload_image_func (devicep->target_id, version, target_data);
+
+ /* Remove mappings from splay tree. */
+ for (j = 0; j < num_funcs; j++)
+ {
+ k.host_start = (uintptr_t) host_func_table[j];
+ k.host_end = k.host_start + 1;
+ splay_tree_remove (&devicep->mem_map, &k);
+ }
+
+ for (j = 0; j < num_vars; j++)
+ {
+ k.host_start = (uintptr_t) host_var_table[j * 2];
+ k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+ splay_tree_remove (&devicep->mem_map, &k);
+ }
+
+ if (node)
+ {
+ free (node->tgt);
+ free (node);
+ }
}
-/* This function should be called from every offload image.
+/* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
void
-GOMP_offload_register (void *host_table, enum offload_target_type target_type,
- void *target_data)
+GOMP_offload_register_ver (unsigned version, const void *host_table,
+ int target_type, const void *target_data)
{
- offload_images = gomp_realloc (offload_images,
- (num_offload_images + 1)
- * sizeof (struct offload_image_descr));
+ int i;
+
+ if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
+ gomp_fatal ("Library too old for offload (version %u < %u)",
+ GOMP_VERSION, GOMP_VERSION_LIB (version));
+
+ gomp_mutex_lock (®ister_lock);
+
+ /* Load image to all initialized devices. */
+ for (i = 0; i < num_devices; i++)
+ {
+ struct gomp_device_descr *devicep = &devices[i];
+ gomp_mutex_lock (&devicep->lock);
+ if (devicep->type == target_type && devicep->is_initialized)
+ gomp_load_image_to_device (devicep, version,
+ host_table, target_data, true);
+ gomp_mutex_unlock (&devicep->lock);
+ }
+ /* Insert image to array of pending images. */
+ offload_images
+ = gomp_realloc_unlock (offload_images,
+ (num_offload_images + 1)
+ * sizeof (struct offload_image_descr));
+ offload_images[num_offload_images].version = version;
offload_images[num_offload_images].type = target_type;
offload_images[num_offload_images].host_table = host_table;
offload_images[num_offload_images].target_data = target_data;
num_offload_images++;
+ gomp_mutex_unlock (®ister_lock);
+}
+
+void
+GOMP_offload_register (const void *host_table, int target_type,
+ const void *target_data)
+{
+ GOMP_offload_register_ver (0, host_table, target_type, target_data);
+}
+
+/* This function should be called from every offload image while unloading.
+ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
+ the target, and TARGET_DATA needed by target plugin. */
+
+void
+GOMP_offload_unregister_ver (unsigned version, const void *host_table,
+ int target_type, const void *target_data)
+{
+ int i;
+
+ gomp_mutex_lock (®ister_lock);
+
+ /* Unload image from all initialized devices. */
+ for (i = 0; i < num_devices; i++)
+ {
+ struct gomp_device_descr *devicep = &devices[i];
+ gomp_mutex_lock (&devicep->lock);
+ if (devicep->type == target_type && devicep->is_initialized)
+ gomp_unload_image_from_device (devicep, version,
+ host_table, target_data);
+ gomp_mutex_unlock (&devicep->lock);
+ }
+
+ /* Remove image from array of pending images. */
+ for (i = 0; i < num_offload_images; i++)
+ if (offload_images[i].target_data == target_data)
+ {
+ offload_images[i] = offload_images[--num_offload_images];
+ break;
+ }
+
+ gomp_mutex_unlock (®ister_lock);
+}
+
+void
+GOMP_offload_unregister (const void *host_table, int target_type,
+ const void *target_data)
+{
+ GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
}
/* This function initializes the target device, specified by DEVICEP. DEVICEP
attribute_hidden void
gomp_init_device (struct gomp_device_descr *devicep)
{
+ int i;
devicep->init_device_func (devicep->target_id);
+
+ /* Load to device all images registered by the moment. */
+ for (i = 0; i < num_offload_images; i++)
+ {
+ struct offload_image_descr *image = &offload_images[i];
+ if (image->type == devicep->type)
+ gomp_load_image_to_device (devicep, image->version,
+ image->host_table, image->target_data,
+ false);
+ }
+
devicep->is_initialized = true;
}
-/* Initialize address mapping tables. MM must be locked on entry, and remains
- locked on return. */
-
attribute_hidden void
-gomp_init_tables (struct gomp_device_descr *devicep,
- struct gomp_memory_mapping *mm)
+gomp_unload_device (struct gomp_device_descr *devicep)
{
- /* Get address mapping table for device. */
- struct mapping_table *table = NULL;
- int num_entries = devicep->get_table_func (devicep->target_id, &table);
-
- /* Insert host-target address mapping into dev_splay_tree. */
- int i;
- for (i = 0; i < num_entries; i++)
+ if (devicep->is_initialized)
{
- struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
- tgt->refcount = 1;
- tgt->array = gomp_malloc (sizeof (*tgt->array));
- tgt->tgt_start = table[i].tgt_start;
- tgt->tgt_end = table[i].tgt_end;
- tgt->to_free = NULL;
- tgt->list_count = 0;
- tgt->device_descr = devicep;
- splay_tree_node node = tgt->array;
- splay_tree_key k = &node->key;
- k->host_start = table[i].host_start;
- k->host_end = table[i].host_end;
- k->tgt_offset = 0;
- k->refcount = 1;
- k->copy_from = false;
- k->tgt = tgt;
- node->left = NULL;
- node->right = NULL;
- splay_tree_insert (&mm->splay_tree, node);
+ unsigned i;
+
+ /* Unload from device all images registered at the moment. */
+ for (i = 0; i < num_offload_images; i++)
+ {
+ struct offload_image_descr *image = &offload_images[i];
+ if (image->type == devicep->type)
+ gomp_unload_image_from_device (devicep, image->version,
+ image->host_table,
+ image->target_data);
+ }
}
-
- free (table);
- mm->is_initialized = true;
}
/* Free address mapping tables. MM must be locked on entry, and remains locked
on return. */
attribute_hidden void
-gomp_free_memmap (struct gomp_memory_mapping *mm)
+gomp_free_memmap (struct splay_tree_s *mem_map)
{
- while (mm->splay_tree.root)
+ while (mem_map->root)
{
- struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
+ struct target_mem_desc *tgt = mem_map->root->key.tgt;
- splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
+ splay_tree_remove (mem_map, &mem_map->root->key);
free (tgt->array);
free (tgt);
}
-
- mm->is_initialized = false;
}
/* This function de-initializes the target device, specified by DEVICEP.
devicep->is_initialized = false;
}
-/* Called when encountering a target directive. If DEVICE
- is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
- GOMP_DEVICE_HOST_FALLBACK (or any value
- larger than last available hw device), use host fallback.
- FN is address of host code, UNUSED is part of the current ABI, but
- we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
- with MAPNUM entries, with addresses of the host objects,
- sizes of the host objects (resp. for pointer kind pointer bias
- and assumed sizeof (void *) size) and kinds. */
+/* Host fallback for GOMP_target{,_ext} routines. */
-void
-GOMP_target (int device, void (*fn) (void *), const void *unused,
- size_t mapnum, void **hostaddrs, size_t *sizes,
- unsigned char *kinds)
+static void
+gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
{
- struct gomp_device_descr *devicep = resolve_device (device);
-
- if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ struct gomp_thread old_thr, *thr = gomp_thread ();
+ old_thr = *thr;
+ memset (thr, '\0', sizeof (*thr));
+ if (gomp_places_list)
{
- /* Host fallback. */
- struct gomp_thread old_thr, *thr = gomp_thread ();
- old_thr = *thr;
- memset (thr, '\0', sizeof (*thr));
- if (gomp_places_list)
- {
- thr->place = old_thr.place;
- thr->ts.place_partition_len = gomp_places_list_len;
- }
- fn (hostaddrs);
- gomp_free_thread (thr);
- *thr = old_thr;
- return;
+ thr->place = old_thr.place;
+ thr->ts.place_partition_len = gomp_places_list_len;
}
+ fn (hostaddrs);
+ gomp_free_thread (thr);
+ *thr = old_thr;
+}
- gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->lock);
+/* Host fallback with firstprivate map-type handling. */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
+{
+ size_t i, tgt_align = 0, tgt_size = 0;
+ char *tgt = NULL;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ if (tgt_align < align)
+ tgt_align = align;
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ tgt_size += sizes[i];
+ }
+ if (tgt_align)
+ {
+ tgt = gomp_alloca (tgt_size + tgt_align - 1);
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+ if (al)
+ tgt += tgt_align - al;
+ tgt_size = 0;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ tgt_size = (tgt_size + align - 1) & ~(align - 1);
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+ hostaddrs[i] = tgt + tgt_size;
+ tgt_size = tgt_size + sizes[i];
+ }
+ }
+ gomp_target_fallback (fn, hostaddrs);
+}
- void *fn_addr;
+/* Helper function of GOMP_target{,_ext} routines. */
+static void *
+gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
+ void (*host_fn) (void *))
+{
if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
- fn_addr = (void *) fn;
+ return (void *) host_fn;
else
{
- struct gomp_memory_mapping *mm = &devicep->mem_map;
- gomp_mutex_lock (&mm->lock);
-
- if (!mm->is_initialized)
- gomp_init_tables (devicep, mm);
-
+ gomp_mutex_lock (&devicep->lock);
struct splay_tree_key_s k;
- k.host_start = (uintptr_t) fn;
+ k.host_start = (uintptr_t) host_fn;
k.host_end = k.host_start + 1;
- splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
+ splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
+ gomp_mutex_unlock (&devicep->lock);
if (tgt_fn == NULL)
gomp_fatal ("Target function wasn't mapped");
- gomp_mutex_unlock (&mm->lock);
-
- fn_addr = (void *) tgt_fn->tgt->tgt_start;
+ return (void *) tgt_fn->tgt_offset;
}
+}
+
+/* Called when encountering a target directive. If DEVICE
+ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
+ GOMP_DEVICE_HOST_FALLBACK (or any value
+ larger than last available hw device), use host fallback.
+ FN is address of host code, UNUSED is part of the current ABI, but
+ we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
+ with MAPNUM entries, with addresses of the host objects,
+ sizes of the host objects (resp. for pointer kind pointer bias
+ and assumed sizeof (void *) size) and kinds. */
+
+void
+GOMP_target (int device, void (*fn) (void *), const void *unused,
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned char *kinds)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return gomp_target_fallback (fn, hostaddrs);
+
+ void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
- true);
- struct gomp_thread old_thr, *thr = gomp_thread ();
- old_thr = *thr;
- memset (thr, '\0', sizeof (*thr));
- if (gomp_places_list)
- {
- thr->place = old_thr.place;
- thr->ts.place_partition_len = gomp_places_list_len;
- }
+ GOMP_MAP_VARS_TARGET);
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
- gomp_free_thread (thr);
- *thr = old_thr;
gomp_unmap_vars (tgt_vars, true);
}
+/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
+ and several arguments have been added:
+ FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
+ DEPEND is array of dependencies, see GOMP_task for details.
+ NUM_TEAMS is positive if GOMP_teams will be called in the body with
+ that value, or 1 if teams construct is not present, or 0, if
+ teams construct does not have num_teams clause and so the choice is
+ implementation defined, and -1 if it can't be determined on the host
+ what value will GOMP_teams have on the device.
+ THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
+ body with that value, or 0, if teams construct does not have thread_limit
+ clause or the teams construct is not present, or -1 if it can't be
+ determined on the host what value will GOMP_teams have on the device. */
+
void
-GOMP_target_data (int device, const void *unused, size_t mapnum,
- void **hostaddrs, size_t *sizes, unsigned char *kinds)
+GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend, int num_teams,
+ int thread_limit)
{
struct gomp_device_descr *devicep = resolve_device (device);
- if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ (void) num_teams;
+ (void) thread_limit;
+
+ if (flags & GOMP_TARGET_FLAG_NOWAIT)
{
- /* Host fallback. */
- struct gomp_task_icv *icv = gomp_icv (false);
- if (icv->target_data)
+ struct gomp_thread *thr = gomp_thread ();
+ /* Create a team if we don't have any around, as nowait
+ target tasks make sense to run asynchronously even when
+ outside of any parallel. */
+ if (__builtin_expect (thr->ts.team == NULL, 0))
+ {
+ struct gomp_team *team = gomp_new_team (1);
+ struct gomp_task *task = thr->task;
+ struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
+ team->prev_ts = thr->ts;
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+ thr->ts.single_count = 0;
+#endif
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ gomp_init_task (thr->task, NULL, icv);
+ if (task)
+ {
+ thr->task = task;
+ gomp_end_task ();
+ free (task);
+ thr->task = &team->implicit_task[0];
+ }
+ else
+ pthread_setspecific (gomp_thread_destructor, thr);
+ }
+ if (thr->ts.team
+ && !thr->task->final_task)
{
- /* Even when doing a host fallback, if there are any active
- #pragma omp target data constructs, need to remember the
- new #pragma omp target data, otherwise GOMP_target_end_data
- would get out of sync. */
- struct target_mem_desc *tgt
- = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
- tgt->prev = icv->target_data;
- icv->target_data = tgt;
+ gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
+ sizes, kinds, flags, depend,
+ GOMP_TARGET_TASK_BEFORE_MAP);
+ return;
}
+ }
+
+ /* If there are depend clauses, but nowait is not present
+ (or we are in a final task), block the parent task until the
+ dependencies are resolved and then just continue with the rest
+ of the function as if it is a merged task. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
return;
}
- gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->lock);
+ void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+
+ struct target_mem_desc *tgt_vars
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+ gomp_unmap_vars (tgt_vars, true);
+}
- struct gomp_memory_mapping *mm = &devicep->mem_map;
- gomp_mutex_lock (&mm->lock);
- if (!mm->is_initialized)
- gomp_init_tables (devicep, mm);
- gomp_mutex_unlock (&mm->lock);
+/* Host fallback for GOMP_target_data{,_ext} routines. */
+
+static void
+gomp_target_data_fallback (void)
+{
+ struct gomp_task_icv *icv = gomp_icv (false);
+ if (icv->target_data)
+ {
+ /* Even when doing a host fallback, if there are any active
+ #pragma omp target data constructs, need to remember the
+ new #pragma omp target data, otherwise GOMP_target_end_data
+ would get out of sync. */
+ struct target_mem_desc *tgt
+ = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
+ GOMP_MAP_VARS_DATA);
+ tgt->prev = icv->target_data;
+ icv->target_data = tgt;
+ }
+}
+
+void
+GOMP_target_data (int device, const void *unused, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned char *kinds)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return gomp_target_data_fallback ();
struct target_mem_desc *tgt
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
- false);
+ GOMP_MAP_VARS_DATA);
+ struct gomp_task_icv *icv = gomp_icv (true);
+ tgt->prev = icv->target_data;
+ icv->target_data = tgt;
+}
+
+void
+GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return gomp_target_data_fallback ();
+
+ struct target_mem_desc *tgt
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_DATA);
struct gomp_task_icv *icv = gomp_icv (true);
tgt->prev = icv->target_data;
icv->target_data = tgt;
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
return;
+ gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
+}
+
+void
+GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ /* If there are depend clauses, but nowait is not present,
+ block the parent task until the dependencies are resolved
+ and then just continue with the rest of the function as if it
+ is a merged task. Until we are able to schedule task during
+ variable mapping or unmapping, ignore nowait if depend clauses
+ are not present. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ {
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+ && thr->ts.team
+ && !thr->task->final_task)
+ {
+ if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags | GOMP_TARGET_FLAG_UPDATE,
+ depend, GOMP_TARGET_TASK_DATA))
+ return;
+ }
+ else
+ {
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+ }
+ }
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return;
+
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
+}
+
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ const int typemask = 0xff;
+ size_t i;
gomp_mutex_lock (&devicep->lock);
- if (!devicep->is_initialized)
- gomp_init_device (devicep);
+ for (i = 0; i < mapnum; i++)
+ {
+ struct splay_tree_key_s cur_node;
+ unsigned char kind = kinds[i] & typemask;
+ switch (kind)
+ {
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+ case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+ case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + sizes[i];
+ splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
+ || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
+ ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
+ : splay_tree_lookup (&devicep->mem_map, &cur_node);
+ if (!k)
+ continue;
+
+ if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+ k->refcount--;
+ if ((kind == GOMP_MAP_DELETE
+ || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
+ && k->refcount != REFCOUNT_INFINITY)
+ k->refcount = 0;
+
+ if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+ || kind == GOMP_MAP_ALWAYS_FROM)
+ devicep->dev2host_func (devicep->target_id,
+ (void *) cur_node.host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + cur_node.host_start
+ - k->host_start),
+ cur_node.host_end - cur_node.host_start);
+ if (k->refcount == 0)
+ {
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ gomp_unmap_tgt (k->tgt);
+ }
+
+ break;
+ default:
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+ kind);
+ }
+ }
+
gomp_mutex_unlock (&devicep->lock);
+}
+
+void
+GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds,
+ unsigned int flags, void **depend)
+{
+ struct gomp_device_descr *devicep = resolve_device (device);
+
+ /* If there are depend clauses, but nowait is not present,
+ block the parent task until the dependencies are resolved
+ and then just continue with the rest of the function as if it
+ is a merged task. Until we are able to schedule task during
+ variable mapping or unmapping, ignore nowait if depend clauses
+ are not present. */
+ if (depend != NULL)
+ {
+ struct gomp_thread *thr = gomp_thread ();
+ if (thr->task && thr->task->depend_hash)
+ {
+ if ((flags & GOMP_TARGET_FLAG_NOWAIT)
+ && thr->ts.team
+ && !thr->task->final_task)
+ {
+ if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
+ mapnum, hostaddrs, sizes, kinds,
+ flags, depend,
+ GOMP_TARGET_TASK_DATA))
+ return;
+ }
+ else
+ {
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new
+ tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup
+ && thr->task->taskgroup->cancelled)))
+ return;
+
+ gomp_task_maybe_wait_for_dependencies (depend);
+ }
+ }
+ }
- struct gomp_memory_mapping *mm = &devicep->mem_map;
- gomp_mutex_lock (&mm->lock);
- if (!mm->is_initialized)
- gomp_init_tables (devicep, mm);
- gomp_mutex_unlock (&mm->lock);
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return;
- gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ /* If parallel or taskgroup has been cancelled, don't start new tasks. */
+ if (team
+ && (gomp_team_barrier_cancelled (&team->barrier)
+ || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+ return;
+
+ size_t i;
+ if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+ {
+ gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
+ &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ i += sizes[i];
+ }
+ else
+ gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
+ true, GOMP_MAP_VARS_ENTER_DATA);
+ else
+ gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
+}
+
+bool
+gomp_target_task_fn (void *data)
+{
+ struct gomp_target_task *ttask = (struct gomp_target_task *) data;
+ struct gomp_device_descr *devicep = ttask->devicep;
+
+ if (ttask->fn != NULL)
+ {
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ ttask->state = GOMP_TARGET_TASK_FALLBACK;
+ gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
+ ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
+ return false;
+ }
+
+ if (ttask->state == GOMP_TARGET_TASK_FINISHED)
+ {
+ gomp_unmap_vars (ttask->tgt, true);
+ return false;
+ }
+
+ void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
+ ttask->tgt
+ = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
+ ttask->sizes, ttask->kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
+
+ devicep->async_run_func (devicep->target_id, fn_addr,
+ (void *) ttask->tgt->tgt_start, (void *) ttask);
+ return true;
+ }
+ else if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return false;
+
+ size_t i;
+ if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
+ gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ ttask->kinds, true);
+ else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
+ for (i = 0; i < ttask->mapnum; i++)
+ if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
+ {
+ gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
+ NULL, &ttask->sizes[i], &ttask->kinds[i], true,
+ GOMP_MAP_VARS_ENTER_DATA);
+ i += ttask->sizes[i];
+ }
+ else
+ gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
+ &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
+ else
+ gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
+ ttask->kinds);
+ return false;
}
void
(void) num_teams;
}
+void *
+omp_target_alloc (size_t size, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return malloc (size);
+
+ if (device_num < 0)
+ return NULL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return NULL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return malloc (size);
+
+ gomp_mutex_lock (&devicep->lock);
+ void *ret = devicep->alloc_func (devicep->target_id, size);
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+void
+omp_target_free (void *device_ptr, int device_num)
+{
+ if (device_ptr == NULL)
+ return;
+
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ {
+ free (device_ptr);
+ return;
+ }
+
+ if (device_num < 0)
+ return;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ {
+ free (device_ptr);
+ return;
+ }
+
+ gomp_mutex_lock (&devicep->lock);
+ devicep->free_func (devicep->target_id, device_ptr);
+ gomp_mutex_unlock (&devicep->lock);
+}
+
+int
+omp_target_is_present (void *ptr, int device_num)
+{
+ if (ptr == NULL)
+ return 1;
+
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return 1;
+
+ if (device_num < 0)
+ return 0;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return 0;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return 1;
+
+ gomp_mutex_lock (&devicep->lock);
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+
+ cur_node.host_start = (uintptr_t) ptr;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
+ int ret = n != NULL;
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+int
+omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
+ size_t src_offset, int dst_device_num, int src_device_num)
+{
+ struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (dst_device_num < 0)
+ return EINVAL;
+
+ dst_devicep = resolve_device (dst_device_num);
+ if (dst_devicep == NULL)
+ return EINVAL;
+
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ dst_devicep = NULL;
+ }
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (src_device_num < 0)
+ return EINVAL;
+
+ src_devicep = resolve_device (src_device_num);
+ if (src_devicep == NULL)
+ return EINVAL;
+
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ src_devicep = NULL;
+ }
+ if (src_devicep == NULL && dst_devicep == NULL)
+ {
+ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
+ return 0;
+ }
+ if (src_devicep == NULL)
+ {
+ gomp_mutex_lock (&dst_devicep->lock);
+ dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&dst_devicep->lock);
+ return 0;
+ }
+ if (dst_devicep == NULL)
+ {
+ gomp_mutex_lock (&src_devicep->lock);
+ src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&src_devicep->lock);
+ return 0;
+ }
+ if (src_devicep == dst_devicep)
+ {
+ gomp_mutex_lock (&src_devicep->lock);
+ src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
+ gomp_mutex_unlock (&src_devicep->lock);
+ return 0;
+ }
+ return EINVAL;
+}
+
+static int
+omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
+ int num_dims, const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ struct gomp_device_descr *dst_devicep,
+ struct gomp_device_descr *src_devicep)
+{
+ size_t dst_slice = element_size;
+ size_t src_slice = element_size;
+ size_t j, dst_off, src_off, length;
+ int i, ret;
+
+ if (num_dims == 1)
+ {
+ if (__builtin_mul_overflow (element_size, volume[0], &length)
+ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
+ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+ return EINVAL;
+ if (dst_devicep == NULL && src_devicep == NULL)
+ memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+ else if (src_devicep == NULL)
+ dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else if (dst_devicep == NULL)
+ src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else if (src_devicep == dst_devicep)
+ src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
+ else
+ return EINVAL;
+ return 0;
+ }
+
+ /* FIXME: it would be nice to have some plugin function to handle
+ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
+ be handled in the generic recursion below, and for host-host it
+ should be used even for any num_dims >= 2. */
+
+ for (i = 1; i < num_dims; i++)
+ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
+ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
+ return EINVAL;
+ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
+ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
+ return EINVAL;
+ for (j = 0; j < volume[0]; j++)
+ {
+ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
+ (char *) src + src_off,
+ element_size, num_dims - 1,
+ volume + 1, dst_offsets + 1,
+ src_offsets + 1, dst_dimensions + 1,
+ src_dimensions + 1, dst_devicep,
+ src_devicep);
+ if (ret)
+ return ret;
+ dst_off += dst_slice;
+ src_off += src_slice;
+ }
+ return 0;
+}
+
+int
+omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
+ int num_dims, const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ int dst_device_num, int src_device_num)
+{
+ struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+ if (!dst && !src)
+ return INT_MAX;
+
+ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (dst_device_num < 0)
+ return EINVAL;
+
+ dst_devicep = resolve_device (dst_device_num);
+ if (dst_devicep == NULL)
+ return EINVAL;
+
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ dst_devicep = NULL;
+ }
+ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
+ {
+ if (src_device_num < 0)
+ return EINVAL;
+
+ src_devicep = resolve_device (src_device_num);
+ if (src_devicep == NULL)
+ return EINVAL;
+
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ src_devicep = NULL;
+ }
+
+ if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
+ return EINVAL;
+
+ if (src_devicep)
+ gomp_mutex_lock (&src_devicep->lock);
+ else if (dst_devicep)
+ gomp_mutex_lock (&dst_devicep->lock);
+ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
+ volume, dst_offsets, src_offsets,
+ dst_dimensions, src_dimensions,
+ dst_devicep, src_devicep);
+ if (src_devicep)
+ gomp_mutex_unlock (&src_devicep->lock);
+ else if (dst_devicep)
+ gomp_mutex_unlock (&dst_devicep->lock);
+ return ret;
+}
+
+int
+omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
+ size_t device_offset, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return EINVAL;
+
+ if (device_num < 0)
+ return EINVAL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return EINVAL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return EINVAL;
+
+ gomp_mutex_lock (&devicep->lock);
+
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int ret = EINVAL;
+
+ cur_node.host_start = (uintptr_t) host_ptr;
+ cur_node.host_end = cur_node.host_start + size;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n)
+ {
+ if (n->tgt->tgt_start + n->tgt_offset
+ == (uintptr_t) device_ptr + device_offset
+ && n->host_start <= cur_node.host_start
+ && n->host_end >= cur_node.host_end)
+ ret = 0;
+ }
+ else
+ {
+ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+ tgt->array = gomp_malloc (sizeof (*tgt->array));
+ tgt->refcount = 1;
+ tgt->tgt_start = 0;
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
+ tgt->prev = NULL;
+ tgt->list_count = 0;
+ tgt->device_descr = devicep;
+ splay_tree_node array = tgt->array;
+ splay_tree_key k = &array->key;
+ k->host_start = cur_node.host_start;
+ k->host_end = cur_node.host_end;
+ k->tgt = tgt;
+ k->tgt_offset = (uintptr_t) device_ptr + device_offset;
+ k->refcount = REFCOUNT_INFINITY;
+ k->async_refcount = 0;
+ array->left = NULL;
+ array->right = NULL;
+ splay_tree_insert (&devicep->mem_map, array);
+ ret = 0;
+ }
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
+int
+omp_target_disassociate_ptr (void *ptr, int device_num)
+{
+ if (device_num == GOMP_DEVICE_HOST_FALLBACK)
+ return EINVAL;
+
+ if (device_num < 0)
+ return EINVAL;
+
+ struct gomp_device_descr *devicep = resolve_device (device_num);
+ if (devicep == NULL)
+ return EINVAL;
+
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ return EINVAL;
+
+ gomp_mutex_lock (&devicep->lock);
+
+ struct splay_tree_s *mem_map = &devicep->mem_map;
+ struct splay_tree_key_s cur_node;
+ int ret = EINVAL;
+
+ cur_node.host_start = (uintptr_t) ptr;
+ cur_node.host_end = cur_node.host_start;
+ splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
+ if (n
+ && n->host_start == cur_node.host_start
+ && n->refcount == REFCOUNT_INFINITY
+ && n->tgt->tgt_start == 0
+ && n->tgt->to_free == NULL
+ && n->tgt->refcount == 1
+ && n->tgt->list_count == 0)
+ {
+ splay_tree_remove (&devicep->mem_map, n);
+ gomp_unmap_tgt (n->tgt);
+ ret = 0;
+ }
+
+ gomp_mutex_unlock (&devicep->lock);
+ return ret;
+}
+
#ifdef PLUGIN_SUPPORT
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
const char *plugin_name)
{
const char *err = NULL, *last_missing = NULL;
- int optional_present, optional_total;
-
- /* Clear any existing error. */
- dlerror ();
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
if (!plugin_handle)
- {
- err = dlerror ();
- goto out;
- }
+ goto dl_fail;
/* Check if all required functions are available in the plugin and store
- their handlers. */
+ their handlers. None of the symbols can legitimately be NULL,
+ so we don't need to check dlerror all the time. */
#define DLSYM(f) \
- do \
- { \
- device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
- err = dlerror (); \
- if (err != NULL) \
- goto out; \
- } \
- while (0)
- /* Similar, but missing functions are not an error. */
-#define DLSYM_OPT(f, n) \
- do \
- { \
- const char *tmp_err; \
- device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
- tmp_err = dlerror (); \
- if (tmp_err == NULL) \
- optional_present++; \
- else \
- last_missing = #n; \
- optional_total++; \
- } \
- while (0)
+ if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
+ goto dl_fail
+ /* Similar, but missing functions are not an error. Return false if
+ failed, true otherwise. */
+#define DLSYM_OPT(f, n) \
+ ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
+ || (last_missing = #n, 0))
+
+ DLSYM (version);
+ if (device->version_func () != GOMP_VERSION)
+ {
+ err = "plugin version mismatch";
+ goto fail;
+ }
DLSYM (get_name);
DLSYM (get_caps);
DLSYM (get_type);
DLSYM (get_num_devices);
- DLSYM (register_image);
DLSYM (init_device);
DLSYM (fini_device);
- DLSYM (get_table);
+ DLSYM (load_image);
+ DLSYM (unload_image);
DLSYM (alloc);
DLSYM (free);
DLSYM (dev2host);
DLSYM (host2dev);
device->capabilities = device->get_caps_func ();
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
- DLSYM (run);
+ {
+ DLSYM (run);
+ DLSYM (async_run);
+ DLSYM (dev2dev);
+ }
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
- optional_present = optional_total = 0;
- DLSYM_OPT (openacc.exec, openacc_parallel);
- DLSYM_OPT (openacc.open_device, openacc_open_device);
- DLSYM_OPT (openacc.close_device, openacc_close_device);
- DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
- DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
- DLSYM_OPT (openacc.register_async_cleanup,
- openacc_register_async_cleanup);
- DLSYM_OPT (openacc.async_test, openacc_async_test);
- DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
- DLSYM_OPT (openacc.async_wait, openacc_async_wait);
- DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
- DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
- DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
- DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
- DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
- DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
- /* Require all the OpenACC handlers if we have
- GOMP_OFFLOAD_CAP_OPENACC_200. */
- if (optional_present != optional_total)
+ if (!DLSYM_OPT (openacc.exec, openacc_parallel)
+ || !DLSYM_OPT (openacc.register_async_cleanup,
+ openacc_register_async_cleanup)
+ || !DLSYM_OPT (openacc.async_test, openacc_async_test)
+ || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
+ || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
+ || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
+ || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
+ || !DLSYM_OPT (openacc.async_wait_all_async,
+ openacc_async_wait_all_async)
+ || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
+ || !DLSYM_OPT (openacc.create_thread_data,
+ openacc_create_thread_data)
+ || !DLSYM_OPT (openacc.destroy_thread_data,
+ openacc_destroy_thread_data))
{
+ /* Require all the OpenACC handlers if we have
+ GOMP_OFFLOAD_CAP_OPENACC_200. */
err = "plugin missing OpenACC handler function";
- goto out;
+ goto fail;
}
- optional_present = optional_total = 0;
- DLSYM_OPT (openacc.cuda.get_current_device,
- openacc_get_current_cuda_device);
- DLSYM_OPT (openacc.cuda.get_current_context,
- openacc_get_current_cuda_context);
- DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
- DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
- /* Make sure all the CUDA functions are there if any of them are. */
- if (optional_present && optional_present != optional_total)
+
+ unsigned cuda = 0;
+ cuda += DLSYM_OPT (openacc.cuda.get_current_device,
+ openacc_get_current_cuda_device);
+ cuda += DLSYM_OPT (openacc.cuda.get_current_context,
+ openacc_get_current_cuda_context);
+ cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
+ cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
+ if (cuda && cuda != 4)
{
+ /* Make sure all the CUDA functions are there if any of them are. */
err = "plugin missing OpenACC CUDA handler function";
- goto out;
+ goto fail;
}
}
#undef DLSYM
#undef DLSYM_OPT
- out:
- if (err != NULL)
- {
- gomp_error ("while loading %s: %s", plugin_name, err);
- if (last_missing)
- gomp_error ("missing function was %s", last_missing);
- if (plugin_handle)
- dlclose (plugin_handle);
- }
- return err == NULL;
-}
+ return 1;
-/* This function adds a compatible offload image IMAGE to an accelerator device
- DEVICE. DEVICE must be locked on entry, and remains locked on return. */
+ dl_fail:
+ err = dlerror ();
+ fail:
+ gomp_error ("while loading %s: %s", plugin_name, err);
+ if (last_missing)
+ gomp_error ("missing function was %s", last_missing);
+ if (plugin_handle)
+ dlclose (plugin_handle);
-static void
-gomp_register_image_for_device (struct gomp_device_descr *device,
- struct offload_image_descr *image)
-{
- if (!device->offload_regions_registered
- && (device->type == image->type
- || device->type == OFFLOAD_TARGET_TYPE_HOST))
- {
- device->register_image_func (image->host_table, image->target_data);
- device->offload_regions_registered = true;
- }
+ return 0;
}
/* This function initializes the runtime needed for offloading.
current_device.name = current_device.get_name_func ();
/* current_device.capabilities has already been set. */
current_device.type = current_device.get_type_func ();
- current_device.mem_map.is_initialized = false;
- current_device.mem_map.splay_tree.root = NULL;
+ current_device.mem_map.root = NULL;
current_device.is_initialized = false;
- current_device.offload_regions_registered = false;
current_device.openacc.data_environ = NULL;
- current_device.openacc.target_data = NULL;
for (i = 0; i < new_num_devices; i++)
{
current_device.target_id = i;
devices[num_devices] = current_device;
- gomp_mutex_init (&devices[num_devices].mem_map.lock);
gomp_mutex_init (&devices[num_devices].lock);
num_devices++;
}
for (i = 0; i < num_devices; i++)
{
- int j;
-
- for (j = 0; j < num_offload_images; j++)
- gomp_register_image_for_device (&devices[i], &offload_images[j]);
-
/* The 'devices' array can be moved (by the realloc call) until we have
found all the plugins, so registering with the OpenACC runtime (which
takes a copy of the pointer argument) must be delayed until now. */
if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
goacc_register (&devices[i]);
}
-
- free (offload_images);
- offload_images = NULL;
- num_offload_images = 0;
}
#else /* PLUGIN_SUPPORT */