OpenACC 2.6 deep copy: attach/detach API routines
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 splay_tree_key n = tgt->list[0].key;
409 assert (n->refcount == 1);
410 assert (n->virtual_refcount == 0);
411 /* Special reference counting behavior. */
412 n->refcount = REFCOUNT_INFINITY;
413
414 if (profiling_p)
415 {
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
418 }
419 }
420 }
421
422 void
423 acc_unmap_data (void *h)
424 {
425 struct goacc_thread *thr = goacc_thread ();
426 struct gomp_device_descr *acc_dev = thr->dev;
427
428 /* No need to call lazy open, as the address must have been mapped. */
429
430 /* This is a no-op on shared-memory targets. */
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
432 return;
433
434 acc_prof_info prof_info;
435 acc_api_info api_info;
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
437
438 gomp_mutex_lock (&acc_dev->lock);
439
440 splay_tree_key n = lookup_host (acc_dev, h, 1);
441
442 if (!n)
443 {
444 gomp_mutex_unlock (&acc_dev->lock);
445 gomp_fatal ("%p is not a mapped block", (void *)h);
446 }
447
448 size_t host_size = n->host_end - n->host_start;
449
450 if (n->host_start != (uintptr_t) h)
451 {
452 gomp_mutex_unlock (&acc_dev->lock);
453 gomp_fatal ("[%p,%d] surrounds %p",
454 (void *) n->host_start, (int) host_size, (void *) h);
455 }
456 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
457 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
458 the different 'REFCOUNT_INFINITY' cases, or simply separate
459 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
460 etc.)? */
461 else if (n->refcount != REFCOUNT_INFINITY)
462 {
463 gomp_mutex_unlock (&acc_dev->lock);
464 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
465 " by 'acc_map_data'",
466 (void *) h, (int) host_size);
467 }
468
469 splay_tree_remove (&acc_dev->mem_map, n);
470
471 struct target_mem_desc *tgt = n->tgt;
472
473 if (tgt->refcount == REFCOUNT_INFINITY)
474 {
475 gomp_mutex_unlock (&acc_dev->lock);
476 gomp_fatal ("cannot unmap target block");
477 }
478 else if (tgt->refcount > 1)
479 tgt->refcount--;
480 else
481 {
482 free (tgt->array);
483 free (tgt);
484 }
485
486 gomp_mutex_unlock (&acc_dev->lock);
487
488 if (profiling_p)
489 {
490 thr->prof_info = NULL;
491 thr->api_info = NULL;
492 }
493 }
494
495
496 /* Enter dynamic mapping for a single datum. Return the device pointer. */
497
498 static void *
499 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
500 {
501 void *d;
502 splay_tree_key n;
503
504 if (!hostaddrs[0] || !sizes[0])
505 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
506
507 goacc_lazy_initialize ();
508
509 struct goacc_thread *thr = goacc_thread ();
510 struct gomp_device_descr *acc_dev = thr->dev;
511
512 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
513 return hostaddrs[0];
514
515 acc_prof_info prof_info;
516 acc_api_info api_info;
517 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
518 if (profiling_p)
519 {
520 prof_info.async = async;
521 prof_info.async_queue = prof_info.async;
522 }
523
524 gomp_mutex_lock (&acc_dev->lock);
525
526 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
527 if (n)
528 {
529 void *h = hostaddrs[0];
530 size_t s = sizes[0];
531
532 /* Present. */
533 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
534
535 if ((h + s) > (void *)n->host_end)
536 {
537 gomp_mutex_unlock (&acc_dev->lock);
538 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
539 }
540
541 assert (n->refcount != REFCOUNT_LINK);
542 if (n->refcount != REFCOUNT_INFINITY)
543 {
544 n->refcount++;
545 n->virtual_refcount++;
546 }
547
548 gomp_mutex_unlock (&acc_dev->lock);
549 }
550 else
551 {
552 const size_t mapnum = 1;
553
554 gomp_mutex_unlock (&acc_dev->lock);
555
556 goacc_aq aq = get_goacc_asyncqueue (async);
557
558 gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
559 true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
560
561 gomp_mutex_lock (&acc_dev->lock);
562 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
563 assert (n != NULL);
564 assert (n->tgt_offset == 0);
565 assert ((uintptr_t) hostaddrs[0] == n->host_start);
566 d = (void *) n->tgt->tgt_start;
567 gomp_mutex_unlock (&acc_dev->lock);
568 }
569
570 if (profiling_p)
571 {
572 thr->prof_info = NULL;
573 thr->api_info = NULL;
574 }
575
576 return d;
577 }
578
579 void *
580 acc_create (void *h, size_t s)
581 {
582 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
583 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
584 }
585
586 void
587 acc_create_async (void *h, size_t s, int async)
588 {
589 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
590 goacc_enter_datum (&h, &s, &kinds, async);
591 }
592
593 /* acc_present_or_create used to be what acc_create is now. */
594 /* acc_pcreate is acc_present_or_create by a different name. */
595 #ifdef HAVE_ATTRIBUTE_ALIAS
596 strong_alias (acc_create, acc_present_or_create)
597 strong_alias (acc_create, acc_pcreate)
598 #else
599 void *
600 acc_present_or_create (void *h, size_t s)
601 {
602 return acc_create (h, s);
603 }
604
605 void *
606 acc_pcreate (void *h, size_t s)
607 {
608 return acc_create (h, s);
609 }
610 #endif
611
612 void *
613 acc_copyin (void *h, size_t s)
614 {
615 unsigned short kinds[1] = { GOMP_MAP_TO };
616 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
617 }
618
619 void
620 acc_copyin_async (void *h, size_t s, int async)
621 {
622 unsigned short kinds[1] = { GOMP_MAP_TO };
623 goacc_enter_datum (&h, &s, &kinds, async);
624 }
625
626 /* acc_present_or_copyin used to be what acc_copyin is now. */
627 /* acc_pcopyin is acc_present_or_copyin by a different name. */
628 #ifdef HAVE_ATTRIBUTE_ALIAS
629 strong_alias (acc_copyin, acc_present_or_copyin)
630 strong_alias (acc_copyin, acc_pcopyin)
631 #else
632 void *
633 acc_present_or_copyin (void *h, size_t s)
634 {
635 return acc_copyin (h, s);
636 }
637
638 void *
639 acc_pcopyin (void *h, size_t s)
640 {
641 return acc_copyin (h, s);
642 }
643 #endif
644
645
646 /* Exit a dynamic mapping for a single variable. */
647
648 static void
649 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
650 {
651 /* No need to call lazy open, as the data must already have been
652 mapped. */
653
654 kind &= 0xff;
655
656 struct goacc_thread *thr = goacc_thread ();
657 struct gomp_device_descr *acc_dev = thr->dev;
658
659 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
660 return;
661
662 acc_prof_info prof_info;
663 acc_api_info api_info;
664 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
665 if (profiling_p)
666 {
667 prof_info.async = async;
668 prof_info.async_queue = prof_info.async;
669 }
670
671 gomp_mutex_lock (&acc_dev->lock);
672
673 splay_tree_key n = lookup_host (acc_dev, h, s);
674 if (!n)
675 /* PR92726, RP92970, PR92984: no-op. */
676 goto out;
677
678 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
679 {
680 size_t host_size = n->host_end - n->host_start;
681 gomp_mutex_unlock (&acc_dev->lock);
682 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
683 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
684 }
685
686 bool finalize = (kind == GOMP_MAP_DELETE
687 || kind == GOMP_MAP_FORCE_FROM);
688 if (finalize)
689 {
690 if (n->refcount != REFCOUNT_INFINITY)
691 n->refcount -= n->virtual_refcount;
692 n->virtual_refcount = 0;
693 }
694
695 if (n->virtual_refcount > 0)
696 {
697 if (n->refcount != REFCOUNT_INFINITY)
698 n->refcount--;
699 n->virtual_refcount--;
700 }
701 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
702 n->refcount--;
703
704 if (n->refcount == 0)
705 {
706 goacc_aq aq = get_goacc_asyncqueue (async);
707
708 bool copyout = (kind == GOMP_MAP_FROM
709 || kind == GOMP_MAP_FORCE_FROM);
710 if (copyout)
711 {
712 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
713 + (uintptr_t) h - n->host_start);
714 gomp_copy_dev2host (acc_dev, aq, h, d, s);
715 }
716
717 if (aq)
718 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
719 'gomp_unref_tgt' comment in
720 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
721 PR92881. */
722 gomp_remove_var_async (acc_dev, n, aq);
723 else
724 {
725 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
726 assert (is_tgt_unmapped);
727 }
728 }
729
730 out:
731 gomp_mutex_unlock (&acc_dev->lock);
732
733 if (profiling_p)
734 {
735 thr->prof_info = NULL;
736 thr->api_info = NULL;
737 }
738 }
739
740 void
741 acc_delete (void *h , size_t s)
742 {
743 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
744 }
745
746 void
747 acc_delete_async (void *h , size_t s, int async)
748 {
749 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
750 }
751
752 void
753 acc_delete_finalize (void *h , size_t s)
754 {
755 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
756 }
757
758 void
759 acc_delete_finalize_async (void *h , size_t s, int async)
760 {
761 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
762 }
763
764 void
765 acc_copyout (void *h, size_t s)
766 {
767 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
768 }
769
770 void
771 acc_copyout_async (void *h, size_t s, int async)
772 {
773 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
774 }
775
776 void
777 acc_copyout_finalize (void *h, size_t s)
778 {
779 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
780 }
781
782 void
783 acc_copyout_finalize_async (void *h, size_t s, int async)
784 {
785 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
786 }
787
788 static void
789 update_dev_host (int is_dev, void *h, size_t s, int async)
790 {
791 splay_tree_key n;
792 void *d;
793
794 goacc_lazy_initialize ();
795
796 struct goacc_thread *thr = goacc_thread ();
797 struct gomp_device_descr *acc_dev = thr->dev;
798
799 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
800 return;
801
802 /* Fortran optional arguments that are non-present result in a
803 NULL host address here. This can safely be ignored as it is
804 not possible to 'update' a non-present optional argument. */
805 if (h == NULL)
806 return;
807
808 acc_prof_info prof_info;
809 acc_api_info api_info;
810 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
811 if (profiling_p)
812 {
813 prof_info.async = async;
814 prof_info.async_queue = prof_info.async;
815 }
816
817 gomp_mutex_lock (&acc_dev->lock);
818
819 n = lookup_host (acc_dev, h, s);
820
821 if (!n)
822 {
823 gomp_mutex_unlock (&acc_dev->lock);
824 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
825 }
826
827 d = (void *) (n->tgt->tgt_start + n->tgt_offset
828 + (uintptr_t) h - n->host_start);
829
830 goacc_aq aq = get_goacc_asyncqueue (async);
831
832 if (is_dev)
833 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
834 else
835 gomp_copy_dev2host (acc_dev, aq, h, d, s);
836
837 gomp_mutex_unlock (&acc_dev->lock);
838
839 if (profiling_p)
840 {
841 thr->prof_info = NULL;
842 thr->api_info = NULL;
843 }
844 }
845
846 void
847 acc_update_device (void *h, size_t s)
848 {
849 update_dev_host (1, h, s, acc_async_sync);
850 }
851
852 void
853 acc_update_device_async (void *h, size_t s, int async)
854 {
855 update_dev_host (1, h, s, async);
856 }
857
858 void
859 acc_update_self (void *h, size_t s)
860 {
861 update_dev_host (0, h, s, acc_async_sync);
862 }
863
864 void
865 acc_update_self_async (void *h, size_t s, int async)
866 {
867 update_dev_host (0, h, s, async);
868 }
869
870 void
871 acc_attach_async (void **hostaddr, int async)
872 {
873 struct goacc_thread *thr = goacc_thread ();
874 struct gomp_device_descr *acc_dev = thr->dev;
875 goacc_aq aq = get_goacc_asyncqueue (async);
876
877 struct splay_tree_key_s cur_node;
878 splay_tree_key n;
879
880 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
881 return;
882
883 gomp_mutex_lock (&acc_dev->lock);
884
885 cur_node.host_start = (uintptr_t) hostaddr;
886 cur_node.host_end = cur_node.host_start + sizeof (void *);
887 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
888
889 if (n == NULL)
890 gomp_fatal ("struct not mapped for acc_attach");
891
892 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
893 0, NULL);
894
895 gomp_mutex_unlock (&acc_dev->lock);
896 }
897
898 void
899 acc_attach (void **hostaddr)
900 {
901 acc_attach_async (hostaddr, acc_async_sync);
902 }
903
904 static void
905 goacc_detach_internal (void **hostaddr, int async, bool finalize)
906 {
907 struct goacc_thread *thr = goacc_thread ();
908 struct gomp_device_descr *acc_dev = thr->dev;
909 struct splay_tree_key_s cur_node;
910 splay_tree_key n;
911 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
912
913 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
914 return;
915
916 gomp_mutex_lock (&acc_dev->lock);
917
918 cur_node.host_start = (uintptr_t) hostaddr;
919 cur_node.host_end = cur_node.host_start + sizeof (void *);
920 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
921
922 if (n == NULL)
923 gomp_fatal ("struct not mapped for acc_detach");
924
925 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
926
927 gomp_mutex_unlock (&acc_dev->lock);
928 }
929
930 void
931 acc_detach (void **hostaddr)
932 {
933 goacc_detach_internal (hostaddr, acc_async_sync, false);
934 }
935
936 void
937 acc_detach_async (void **hostaddr, int async)
938 {
939 goacc_detach_internal (hostaddr, async, false);
940 }
941
942 void
943 acc_detach_finalize (void **hostaddr)
944 {
945 goacc_detach_internal (hostaddr, acc_async_sync, true);
946 }
947
948 void
949 acc_detach_finalize_async (void **hostaddr, int async)
950 {
951 goacc_detach_internal (hostaddr, async, true);
952 }
953
954 /* Some types of (pointer) variables use several consecutive mappings, which
955 must be treated as a group for enter/exit data directives. This function
956 returns the last mapping in such a group (inclusive), or POS for singleton
957 mappings. */
958
959 static int
960 find_group_last (int pos, size_t mapnum, unsigned short *kinds)
961 {
962 unsigned char kind0 = kinds[pos] & 0xff;
963 int first_pos = pos, last_pos = pos;
964
965 if (kind0 == GOMP_MAP_TO_PSET)
966 {
967 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
968 last_pos = ++pos;
969 /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
970 assert (last_pos > first_pos);
971 }
972 else
973 {
974 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
975 mapping. */
976 if (pos + 1 < mapnum
977 && (kinds[pos + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER)
978 return pos + 1;
979
980 /* We can have one or several GOMP_MAP_POINTER mappings after a to/from
981 (etc.) mapping. */
982 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
983 last_pos = ++pos;
984 }
985
986 return last_pos;
987 }
988
989 /* Map variables for OpenACC "enter data". We can't just call
990 gomp_map_vars_async once, because individual mapped variables might have
991 "exit data" called for them at different times. */
992
993 static void
994 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
995 void **hostaddrs, size_t *sizes,
996 unsigned short *kinds, goacc_aq aq)
997 {
998 for (size_t i = 0; i < mapnum; i++)
999 {
1000 int group_last = find_group_last (i, mapnum, kinds);
1001
1002 gomp_map_vars_async (acc_dev, aq,
1003 (group_last - i) + 1,
1004 &hostaddrs[i], NULL,
1005 &sizes[i], &kinds[i], true,
1006 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
1007
1008 i = group_last;
1009 }
1010 }
1011
1012 /* Unmap variables for OpenACC "exit data". */
1013
1014 static void
1015 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1016 void **hostaddrs, size_t *sizes,
1017 unsigned short *kinds, goacc_aq aq)
1018 {
1019 gomp_mutex_lock (&acc_dev->lock);
1020
1021 for (size_t i = 0; i < mapnum; ++i)
1022 {
1023 unsigned char kind = kinds[i] & 0xff;
1024 bool copyfrom = false;
1025 bool finalize = false;
1026
1027 if (kind == GOMP_MAP_FORCE_FROM
1028 || kind == GOMP_MAP_DELETE)
1029 finalize = true;
1030
1031 switch (kind)
1032 {
1033 case GOMP_MAP_FROM:
1034 case GOMP_MAP_FORCE_FROM:
1035 case GOMP_MAP_ALWAYS_FROM:
1036 copyfrom = true;
1037 /* Fallthrough. */
1038
1039 case GOMP_MAP_TO_PSET:
1040 case GOMP_MAP_POINTER:
1041 case GOMP_MAP_DELETE:
1042 case GOMP_MAP_RELEASE:
1043 {
1044 struct splay_tree_key_s cur_node;
1045 size_t size;
1046 if (kind == GOMP_MAP_POINTER)
1047 size = sizeof (void *);
1048 else
1049 size = sizes[i];
1050 cur_node.host_start = (uintptr_t) hostaddrs[i];
1051 cur_node.host_end = cur_node.host_start + size;
1052 splay_tree_key n
1053 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1054
1055 if (n == NULL)
1056 continue;
1057
1058 if (finalize)
1059 {
1060 if (n->refcount != REFCOUNT_INFINITY)
1061 n->refcount -= n->virtual_refcount;
1062 n->virtual_refcount = 0;
1063 }
1064
1065 if (n->virtual_refcount > 0)
1066 {
1067 if (n->refcount != REFCOUNT_INFINITY)
1068 n->refcount--;
1069 n->virtual_refcount--;
1070 }
1071 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
1072 n->refcount--;
1073
1074 if (copyfrom
1075 && (kind != GOMP_MAP_FROM || n->refcount == 0))
1076 gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
1077 (void *) (n->tgt->tgt_start + n->tgt_offset
1078 + cur_node.host_start
1079 - n->host_start),
1080 cur_node.host_end - cur_node.host_start);
1081
1082 if (n->refcount == 0)
1083 gomp_remove_var_async (acc_dev, n, aq);
1084 }
1085 break;
1086 default:
1087 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1088 kind);
1089 }
1090 }
1091
1092 gomp_mutex_unlock (&acc_dev->lock);
1093 }
1094
1095 void
1096 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1097 size_t *sizes, unsigned short *kinds, int async,
1098 int num_waits, ...)
1099 {
1100 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1101
1102 struct goacc_thread *thr;
1103 struct gomp_device_descr *acc_dev;
1104 bool data_enter = false;
1105 size_t i;
1106
1107 goacc_lazy_initialize ();
1108
1109 thr = goacc_thread ();
1110 acc_dev = thr->dev;
1111
1112 /* Determine if this is an "acc enter data". */
1113 for (i = 0; i < mapnum; ++i)
1114 {
1115 unsigned char kind = kinds[i] & 0xff;
1116
1117 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1118 continue;
1119
1120 if (kind == GOMP_MAP_FORCE_ALLOC
1121 || kind == GOMP_MAP_FORCE_PRESENT
1122 || kind == GOMP_MAP_FORCE_TO
1123 || kind == GOMP_MAP_TO
1124 || kind == GOMP_MAP_ALLOC)
1125 {
1126 data_enter = true;
1127 break;
1128 }
1129
1130 if (kind == GOMP_MAP_RELEASE
1131 || kind == GOMP_MAP_DELETE
1132 || kind == GOMP_MAP_FROM
1133 || kind == GOMP_MAP_FORCE_FROM)
1134 break;
1135
1136 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1137 kind);
1138 }
1139
1140 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1141
1142 acc_prof_info prof_info;
1143 if (profiling_p)
1144 {
1145 thr->prof_info = &prof_info;
1146
1147 prof_info.event_type
1148 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1149 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1150 prof_info.version = _ACC_PROF_INFO_VERSION;
1151 prof_info.device_type = acc_device_type (acc_dev->type);
1152 prof_info.device_number = acc_dev->target_id;
1153 prof_info.thread_id = -1;
1154 prof_info.async = async;
1155 prof_info.async_queue = prof_info.async;
1156 prof_info.src_file = NULL;
1157 prof_info.func_name = NULL;
1158 prof_info.line_no = -1;
1159 prof_info.end_line_no = -1;
1160 prof_info.func_line_no = -1;
1161 prof_info.func_end_line_no = -1;
1162 }
1163 acc_event_info enter_exit_data_event_info;
1164 if (profiling_p)
1165 {
1166 enter_exit_data_event_info.other_event.event_type
1167 = prof_info.event_type;
1168 enter_exit_data_event_info.other_event.valid_bytes
1169 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1170 enter_exit_data_event_info.other_event.parent_construct
1171 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1172 enter_exit_data_event_info.other_event.implicit = 0;
1173 enter_exit_data_event_info.other_event.tool_info = NULL;
1174 }
1175 acc_api_info api_info;
1176 if (profiling_p)
1177 {
1178 thr->api_info = &api_info;
1179
1180 api_info.device_api = acc_device_api_none;
1181 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1182 api_info.device_type = prof_info.device_type;
1183 api_info.vendor = -1;
1184 api_info.device_handle = NULL;
1185 api_info.context_handle = NULL;
1186 api_info.async_handle = NULL;
1187 }
1188
1189 if (profiling_p)
1190 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1191 &api_info);
1192
1193 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1194 || (flags & GOACC_FLAG_HOST_FALLBACK))
1195 {
1196 prof_info.device_type = acc_device_host;
1197 api_info.device_type = prof_info.device_type;
1198
1199 goto out_prof;
1200 }
1201
1202 if (num_waits)
1203 {
1204 va_list ap;
1205
1206 va_start (ap, num_waits);
1207 goacc_wait (async, num_waits, &ap);
1208 va_end (ap);
1209 }
1210
1211 goacc_aq aq = get_goacc_asyncqueue (async);
1212
1213 if (data_enter)
1214 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1215 else
1216 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1217
1218 out_prof:
1219 if (profiling_p)
1220 {
1221 prof_info.event_type
1222 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1223 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1224 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1225 &api_info);
1226
1227 thr->prof_info = NULL;
1228 thr->api_info = NULL;
1229 }
1230 }