+2019-12-19 Julian Brown <julian@codesourcery.com>
+ Thomas Schwinge <thomas@codesourcery.com>
+
+ * libgomp.h (struct splay_tree_aux): Add attach_count field.
+ (gomp_attach_pointer, gomp_detach_pointer): Add prototypes.
+ * libgomp.map (OACC_2.6): New section. Add acc_attach,
+ acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize,
+ acc_detach_finalize_async.
+ * oacc-mem.c (acc_attach_async, acc_attach, goacc_detach_internal,
+ acc_detach, acc_detach_async, acc_detach_finalize,
+ acc_detach_finalize_async): New functions.
+ * openacc.h (acc_attach, acc_attach_async, acc_detach,
+ (acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add
+ prototypes.
+ * target.c (gomp_attach_pointer, gomp_detach_pointer): New functions.
+ (gomp_remove_var_internal): Free attachment counts if present.
+ * testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test.
+
2019-12-19 Julian Brown <julian@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
struct splay_tree_aux {
/* Pointer to the original mapping of "omp declare target link" object. */
splay_tree_key link_key;
+ /* For a block with attached pointers, the attachment counters for each.
+ Only used for OpenACC. */
+ uintptr_t *attach_count;
};
struct splay_tree_key_s {
struct goacc_asyncqueue *, void *, const void *,
size_t);
extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
+extern void gomp_attach_pointer (struct gomp_device_descr *,
+ struct goacc_asyncqueue *, splay_tree,
+ splay_tree_key, uintptr_t, size_t,
+ struct gomp_coalesce_buf *);
+extern void gomp_detach_pointer (struct gomp_device_descr *,
+ struct goacc_asyncqueue *, splay_tree_key,
+ uintptr_t, bool, struct gomp_coalesce_buf *);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
acc_register_library;
} OACC_2.5;
+OACC_2.6 {
+ global:
+ acc_attach;
+ acc_attach_async;
+ acc_detach;
+ acc_detach_async;
+ acc_detach_finalize;
+ acc_detach_finalize_async;
+} OACC_2.5.1;
+
GOACC_2.0 {
global:
GOACC_data_end;
update_dev_host (0, h, s, async);
}
+void
+acc_attach_async (void **hostaddr, int async)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
+ struct splay_tree_key_s cur_node;
+ splay_tree_key n;
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ gomp_fatal ("struct not mapped for acc_attach");
+
+ gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
+ 0, NULL);
+
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_attach (void **hostaddr)
+{
+ acc_attach_async (hostaddr, acc_async_sync);
+}
+
+static void
+goacc_detach_internal (void **hostaddr, int async, bool finalize)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ struct splay_tree_key_s cur_node;
+ splay_tree_key n;
+ struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ gomp_fatal ("struct not mapped for acc_detach");
+
+ gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
+
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_detach (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, false);
+}
+
+void
+acc_detach_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, false);
+}
+
+void
+acc_detach_finalize (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, true);
+}
+
+void
+acc_detach_finalize_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, true);
+}
+
/* Some types of (pointer) variables use several consecutive mappings, which
must be treated as a group for enter/exit data directives. This function
returns the last mapping in such a group (inclusive), or POS for singleton
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_attach (void **) __GOACC_NOTHROW;
+void acc_attach_async (void **, int) __GOACC_NOTHROW;
+void acc_detach (void **) __GOACC_NOTHROW;
+void acc_detach_async (void **, int) __GOACC_NOTHROW;
/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_detach_finalize (void **) __GOACC_NOTHROW;
+void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
/* Async functions, specified in OpenACC 2.5. */
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
(void *) cur_node.host_end);
}
+attribute_hidden void
+gomp_attach_pointer (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, splay_tree mem_map,
+ splay_tree_key n, uintptr_t attach_to, size_t bias,
+ struct gomp_coalesce_buf *cbufp)
+{
+ struct splay_tree_key_s s;
+ size_t size, idx;
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("enclosing struct not mapped for attach");
+ }
+
+ size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
+ /* We might have a pointer in a packed struct: however we cannot have more
+ than one such pointer in each pointer-sized portion of the struct, so
+ this is safe. */
+ idx = (attach_to - n->host_start) / sizeof (void *);
+
+ if (!n->aux)
+ n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
+
+ if (!n->aux->attach_count)
+ n->aux->attach_count
+ = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
+
+ if (n->aux->attach_count[idx] < UINTPTR_MAX)
+ n->aux->attach_count[idx]++;
+ else
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attach count overflow");
+ }
+
+ if (n->aux->attach_count[idx] == 1)
+ {
+ uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
+ - n->host_start;
+ uintptr_t target = (uintptr_t) *(void **) attach_to;
+ splay_tree_key tn;
+ uintptr_t data;
+
+ if ((void *) target == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attempt to attach null pointer");
+ }
+
+ s.host_start = target + bias;
+ s.host_end = s.host_start + 1;
+ tn = splay_tree_lookup (mem_map, &s);
+
+ if (!tn)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("pointer target not mapped for attach");
+ }
+
+ data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
+
+ gomp_debug (1,
+ "%s: attaching host %p, target %p (struct base %p) to %p\n",
+ __FUNCTION__, (void *) attach_to, (void *) devptr,
+ (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
+
+ gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
+ sizeof (void *), cbufp);
+ }
+ else
+ gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+ (void *) attach_to, (int) n->aux->attach_count[idx]);
+}
+
+attribute_hidden void
+gomp_detach_pointer (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, splay_tree_key n,
+ uintptr_t detach_from, bool finalize,
+ struct gomp_coalesce_buf *cbufp)
+{
+ size_t idx;
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("enclosing struct not mapped for detach");
+ }
+
+ idx = (detach_from - n->host_start) / sizeof (void *);
+
+ if (!n->aux || !n->aux->attach_count)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("no attachment counters for struct");
+ }
+
+ if (finalize)
+ n->aux->attach_count[idx] = 1;
+
+ if (n->aux->attach_count[idx] == 0)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attach count underflow");
+ }
+ else
+ n->aux->attach_count[idx]--;
+
+ if (n->aux->attach_count[idx] == 0)
+ {
+ uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
+ - n->host_start;
+ uintptr_t target = (uintptr_t) *(void **) detach_from;
+
+ gomp_debug (1,
+ "%s: detaching host %p, target %p (struct base %p) to %p\n",
+ __FUNCTION__, (void *) detach_from, (void *) devptr,
+ (void *) (n->tgt->tgt_start + n->tgt_offset),
+ (void *) target);
+
+ gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
+ sizeof (void *), cbufp);
+ }
+ else
+ gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+ (void *) detach_from, (int) n->aux->attach_count[idx]);
+}
+
attribute_hidden uintptr_t
gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
{
if (k->aux->link_key)
splay_tree_insert (&devicep->mem_map,
(splay_tree_node) k->aux->link_key);
+ if (k->aux->attach_count)
+ free (k->aux->attach_count);
free (k->aux);
k->aux = NULL;
}
--- /dev/null
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main ()
+{
+ int n = 100, i;
+ int *a = (int *) malloc (sizeof (int) * n);
+ int *b;
+
+ for (i = 0; i < n; i++)
+ a[i] = i+1;
+
+#pragma acc enter data copyin(a[:n]) create(b)
+
+ b = a;
+ acc_attach ((void **)&b);
+
+#pragma acc parallel loop present (b[:n])
+ for (i = 0; i < n; i++)
+ b[i] = i+1;
+
+ acc_detach ((void **)&b);
+
+#pragma acc exit data copyout(a[:n], b)
+
+ for (i = 0; i < 10; i++)
+ assert (a[i] == b[i]);
+
+ free (a);
+
+ return 0;
+}
--- /dev/null
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+struct node
+{
+ struct node *next;
+ int val;
+};
+
+int
+sum_nodes (struct node *head)
+{
+ int i = 0, sum = 0;
+
+#pragma acc parallel reduction(+:sum) present(head[:1])
+ {
+ for (; head != NULL; head = head->next)
+ sum += head->val;
+ }
+
+ return sum;
+}
+
+void
+insert (struct node *head, int val)
+{
+ struct node *n = (struct node *) malloc (sizeof (struct node));
+
+ if (head->next)
+ acc_detach ((void **) &head->next);
+
+ n->val = val;
+ n->next = head->next;
+ head->next = n;
+
+ acc_copyin (n, sizeof (struct node));
+ acc_attach((void **) &head->next);
+
+ if (n->next)
+ acc_attach ((void **) &n->next);
+}
+
+void
+destroy (struct node *head)
+{
+ while (head->next != NULL)
+ {
+ acc_detach ((void **) &head->next);
+ struct node * n = head->next;
+ head->next = n->next;
+ if (n->next)
+ acc_detach ((void **) &n->next);
+
+ acc_delete (n, sizeof (struct node));
+ if (head->next)
+ acc_attach((void **) &head->next);
+
+ free (n);
+ }
+}
+
+int
+main ()
+{
+ struct node list = { .next = NULL, .val = 0 };
+ int i;
+
+ acc_copyin (&list, sizeof (struct node));
+
+ for (i = 0; i < 10; i++)
+ insert (&list, 2);
+
+ assert (sum_nodes (&list) == 10 * 2);
+
+ destroy (&list);
+
+ acc_delete (&list, sizeof (struct node));
+
+ return 0;
+}