-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2017 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
return kind == GOMP_MAP_TO_PSET;
}
+static void goacc_wait (int async, int num_waits, va_list *ap);
-/* Ensure that the target device for DEVICE_TYPE is initialised (and that
- plugins have been loaded if appropriate). The ACC_dev variable for the
- current thread will be set appropriately for the given device type on
- return. */
-attribute_hidden void
-select_acc_device (int device_type)
-{
- goacc_lazy_initialize ();
-
- if (device_type == GOMP_DEVICE_HOST_FALLBACK)
- return;
-
- if (device_type == acc_device_none)
- device_type = acc_device_host;
-
- if (device_type >= 0)
- {
- /* NOTE: this will go badly if the surrounding data environment is set up
- to use a different device type. We'll just have to trust that users
- know what they're doing... */
- acc_set_device_type (device_type);
- }
-}
-
-static void goacc_wait (int async, int num_waits, va_list ap);
+/* Launch a possibly offloaded function on DEVICE. FN is the host fn
+ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
+ blocks to be copied to/from the device. Varadic arguments are
+ keyed optional parameters terminated with a zero. */
void
-GOACC_parallel (int device, void (*fn) (void *),
- size_t mapnum, void **hostaddrs, size_t *sizes,
- unsigned short *kinds,
- int num_gangs, int num_workers, int vector_length,
- int async, int num_waits, ...)
+GOACC_parallel_keyed (int device, void (*fn) (void *),
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, ...)
{
bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
va_list ap;
struct splay_tree_key_s k;
splay_tree_key tgt_fn_key;
void (*tgt_fn);
-
- if (num_gangs != 1)
- gomp_fatal ("num_gangs (%d) different from one is not yet supported",
- num_gangs);
- if (num_workers != 1)
- gomp_fatal ("num_workers (%d) different from one is not yet supported",
- num_workers);
+ int async = GOMP_ASYNC_SYNC;
+ unsigned dims[GOMP_DIM_MAX];
+ unsigned tag;
#ifdef HAVE_INTTYPES_H
- gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
- "async = %d\n",
- __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
+ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
+ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
#else
- gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
- __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
- async);
+ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
+ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
#endif
- select_acc_device (device);
+ goacc_lazy_initialize ();
thr = goacc_thread ();
acc_dev = thr->dev;
return;
}
- va_start (ap, num_waits);
-
- if (num_waits > 0)
- goacc_wait (async, num_waits, ap);
+ /* Default: let the runtime choose. */
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ dims[i] = 0;
- va_end (ap);
+ va_start (ap, kinds);
+ /* TODO: This will need amending when device_type is implemented. */
+ while ((tag = va_arg (ap, unsigned)) != 0)
+ {
+ if (GOMP_LAUNCH_DEVICE (tag))
+ gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
+ GOMP_LAUNCH_DEVICE (tag));
+
+ switch (GOMP_LAUNCH_CODE (tag))
+ {
+ case GOMP_LAUNCH_DIM:
+ {
+ unsigned mask = GOMP_LAUNCH_OP (tag);
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ if (mask & GOMP_DIM_MASK (i))
+ dims[i] = va_arg (ap, unsigned);
+ }
+ break;
+
+ case GOMP_LAUNCH_ASYNC:
+ {
+ /* Small constant values are encoded in the operand. */
+ async = GOMP_LAUNCH_OP (tag);
+
+ if (async == GOMP_LAUNCH_OP_MAX)
+ async = va_arg (ap, unsigned);
+ break;
+ }
+
+ case GOMP_LAUNCH_WAIT:
+ {
+ unsigned num_waits = GOMP_LAUNCH_OP (tag);
+
+ if (num_waits)
+ goacc_wait (async, num_waits, &ap);
+ break;
+ }
+
+ default:
+ gomp_fatal ("unrecognized offload code '%d',"
+ " libgomp is too old", GOMP_LAUNCH_CODE (tag));
+ }
+ }
+ va_end (ap);
+
acc_dev->openacc.async_set_async_func (async);
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
if (tgt_fn_key == NULL)
gomp_fatal ("target function wasn't mapped");
- tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
+ tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
}
else
tgt_fn = (void (*)) fn;
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- false);
+ GOMP_MAP_VARS_OPENACC);
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
- devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset);
+ devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset);
- acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
- num_gangs, num_workers, vector_length, async,
- tgt);
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ async, dims, tgt);
/* If running synchronously, unmap immediately. */
if (async < acc_async_noval)
gomp_unmap_vars (tgt, true);
else
- {
- gomp_copy_from_async (tgt);
- acc_dev->openacc.register_async_cleanup_func (tgt);
- }
+ tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
+/* Legacy entry point, only provide host execution. */
+
+void
+GOACC_parallel (int device, void (*fn) (void *),
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds,
+ int num_gangs, int num_workers, int vector_length,
+ int async, int num_waits, ...)
+{
+ goacc_save_and_set_bind (acc_device_host);
+ fn (hostaddrs);
+ goacc_restore_bind ();
+}
+
void
GOACC_data_start (int device, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds)
__FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
#endif
- select_acc_device (device);
+ goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|| host_fallback)
{
- tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ GOMP_MAP_VARS_OPENACC);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- false);
+ GOMP_MAP_VARS_OPENACC);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
bool data_enter = false;
size_t i;
- select_acc_device (device);
+ goacc_lazy_initialize ();
thr = goacc_thread ();
acc_dev = thr->dev;
|| host_fallback)
return;
- if (num_waits > 0)
+ if (num_waits)
{
va_list ap;
va_start (ap, num_waits);
-
- goacc_wait (async, num_waits, ap);
-
+ goacc_wait (async, num_waits, &ap);
va_end (ap);
}
break;
}
- if (kind == GOMP_MAP_FORCE_DEALLOC
+ if (kind == GOMP_MAP_DELETE
|| kind == GOMP_MAP_FORCE_FROM)
break;
== GOMP_MAP_FORCE_FROM,
async, 1);
break;
- case GOMP_MAP_FORCE_DEALLOC:
+ case GOMP_MAP_DELETE:
acc_delete (hostaddrs[i], sizes[i]);
break;
case GOMP_MAP_FORCE_FROM:
}
static void
-goacc_wait (int async, int num_waits, va_list ap)
+goacc_wait (int async, int num_waits, va_list *ap)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
- int i;
-
- assert (num_waits >= 0);
-
- if (async == acc_async_sync && num_waits == 0)
- {
- acc_wait_all ();
- return;
- }
-
- if (async == acc_async_sync && num_waits)
- {
- for (i = 0; i < num_waits; i++)
- {
- int qid = va_arg (ap, int);
-
- if (acc_async_test (qid))
- continue;
- acc_wait (qid);
- }
- return;
- }
-
- if (async == acc_async_noval && num_waits == 0)
+ while (num_waits--)
{
- acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
- return;
- }
-
- for (i = 0; i < num_waits; i++)
- {
- int qid = va_arg (ap, int);
-
+ int qid = va_arg (*ap, int);
+
if (acc_async_test (qid))
continue;
- /* If we're waiting on the same asynchronous queue as we're launching on,
- the queue itself will order work as required, so there's no need to
- wait explicitly. */
- if (qid != async)
+ if (async == acc_async_sync)
+ acc_wait (qid);
+ else if (qid == async)
+ ;/* If we're waiting on the same asynchronous queue as we're
+ launching on, the queue itself will order work as
+ required, so there's no need to wait explicitly. */
+ else
acc_dev->openacc.async_wait_async_func (qid, async);
}
}
bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
size_t i;
- select_acc_device (device);
+ goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
|| host_fallback)
return;
- if (num_waits > 0)
+ if (num_waits)
{
va_list ap;
va_start (ap, num_waits);
-
- goacc_wait (async, num_waits, ap);
-
+ goacc_wait (async, num_waits, &ap);
va_end (ap);
}
void
GOACC_wait (int async, int num_waits, ...)
{
- va_list ap;
-
- va_start (ap, num_waits);
-
- goacc_wait (async, num_waits, ap);
+ if (num_waits)
+ {
+ va_list ap;
- va_end (ap);
+ va_start (ap, num_waits);
+ goacc_wait (async, num_waits, &ap);
+ va_end (ap);
+ }
+ else if (async == acc_async_sync)
+ acc_wait_all ();
+ else if (async == acc_async_noval)
+ goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
}
int
{
return 0;
}
+
+void
+GOACC_declare (int device, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ int i;
+
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
+ continue;
+
+ switch (kind)
+ {
+ case GOMP_MAP_FORCE_ALLOC:
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_FORCE_TO:
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_DELETE:
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ break;
+
+ case GOMP_MAP_FORCE_DEVICEPTR:
+ break;
+
+ case GOMP_MAP_ALLOC:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ break;
+
+ case GOMP_MAP_TO:
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+
+ break;
+
+ case GOMP_MAP_FROM:
+ kinds[i] = GOMP_MAP_FORCE_FROM;
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ break;
+
+ case GOMP_MAP_FORCE_PRESENT:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
+ (unsigned long) sizes[i]);
+ break;
+
+ default:
+ assert (0);
+ break;
+ }
+ }
+}