[PR92116, PR92877] [OpenACC] Replace 'openacc.data_environ' by standard libgomp mechanics
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_OPENACC);
407 splay_tree_key n = tgt->list[0].key;
408 assert (n->refcount == 1);
409 assert (n->dynamic_refcount == 0);
410 /* Special reference counting behavior. */
411 n->refcount = REFCOUNT_INFINITY;
412
413 if (profiling_p)
414 {
415 thr->prof_info = NULL;
416 thr->api_info = NULL;
417 }
418 }
419 }
420
421 void
422 acc_unmap_data (void *h)
423 {
424 struct goacc_thread *thr = goacc_thread ();
425 struct gomp_device_descr *acc_dev = thr->dev;
426
427 /* No need to call lazy open, as the address must have been mapped. */
428
429 /* This is a no-op on shared-memory targets. */
430 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
431 return;
432
433 acc_prof_info prof_info;
434 acc_api_info api_info;
435 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
436
437 size_t host_size;
438
439 gomp_mutex_lock (&acc_dev->lock);
440
441 splay_tree_key n = lookup_host (acc_dev, h, 1);
442 struct target_mem_desc *t;
443
444 if (!n)
445 {
446 gomp_mutex_unlock (&acc_dev->lock);
447 gomp_fatal ("%p is not a mapped block", (void *)h);
448 }
449
450 host_size = n->host_end - n->host_start;
451
452 if (n->host_start != (uintptr_t) h)
453 {
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n->host_start, (int) host_size, (void *) h);
457 }
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462 etc.)? */
463 else if (n->refcount != REFCOUNT_INFINITY)
464 {
465 gomp_mutex_unlock (&acc_dev->lock);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h, (int) host_size);
469 }
470
471 /* Mark for removal. */
472 n->refcount = 1;
473
474 t = n->tgt;
475
476 if (t->refcount == 2)
477 {
478 /* This is the last reference, so pull the descriptor off the
479 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
480 freeing the device memory. */
481 t->tgt_end = 0;
482 t->to_free = 0;
483 }
484
485 gomp_mutex_unlock (&acc_dev->lock);
486
487 gomp_unmap_vars (t, true);
488
489 if (profiling_p)
490 {
491 thr->prof_info = NULL;
492 thr->api_info = NULL;
493 }
494 }
495
496 #define FLAG_PRESENT (1 << 0)
497 #define FLAG_CREATE (1 << 1)
498 #define FLAG_COPY (1 << 2)
499
500 static void *
501 present_create_copy (unsigned f, void *h, size_t s, int async)
502 {
503 void *d;
504 splay_tree_key n;
505
506 if (!h || !s)
507 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
508
509 goacc_lazy_initialize ();
510
511 struct goacc_thread *thr = goacc_thread ();
512 struct gomp_device_descr *acc_dev = thr->dev;
513
514 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
515 return h;
516
517 acc_prof_info prof_info;
518 acc_api_info api_info;
519 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
520 if (profiling_p)
521 {
522 prof_info.async = async;
523 prof_info.async_queue = prof_info.async;
524 }
525
526 gomp_mutex_lock (&acc_dev->lock);
527
528 n = lookup_host (acc_dev, h, s);
529 if (n)
530 {
531 /* Present. */
532 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
533
534 if (!(f & FLAG_PRESENT))
535 {
536 gomp_mutex_unlock (&acc_dev->lock);
537 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
538 (void *)h, (int)s, (void *)d, (int)s);
539 }
540 if ((h + s) > (void *)n->host_end)
541 {
542 gomp_mutex_unlock (&acc_dev->lock);
543 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
544 }
545
546 if (n->refcount != REFCOUNT_INFINITY)
547 {
548 n->refcount++;
549 n->dynamic_refcount++;
550 }
551 gomp_mutex_unlock (&acc_dev->lock);
552 }
553 else if (!(f & FLAG_CREATE))
554 {
555 gomp_mutex_unlock (&acc_dev->lock);
556 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
557 }
558 else
559 {
560 struct target_mem_desc *tgt;
561 size_t mapnum = 1;
562 unsigned short kinds;
563 void *hostaddrs = h;
564
565 if (f & FLAG_COPY)
566 kinds = GOMP_MAP_TO;
567 else
568 kinds = GOMP_MAP_ALLOC;
569
570 gomp_mutex_unlock (&acc_dev->lock);
571
572 goacc_aq aq = get_goacc_asyncqueue (async);
573
574 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
575 &kinds, true, GOMP_MAP_VARS_OPENACC);
576 /* Initialize dynamic refcount. */
577 tgt->list[0].key->dynamic_refcount = 1;
578
579 d = tgt->to_free;
580 }
581
582 if (profiling_p)
583 {
584 thr->prof_info = NULL;
585 thr->api_info = NULL;
586 }
587
588 return d;
589 }
590
591 void *
592 acc_create (void *h, size_t s)
593 {
594 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
595 }
596
597 void
598 acc_create_async (void *h, size_t s, int async)
599 {
600 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
601 }
602
603 /* acc_present_or_create used to be what acc_create is now. */
604 /* acc_pcreate is acc_present_or_create by a different name. */
605 #ifdef HAVE_ATTRIBUTE_ALIAS
606 strong_alias (acc_create, acc_present_or_create)
607 strong_alias (acc_create, acc_pcreate)
608 #else
609 void *
610 acc_present_or_create (void *h, size_t s)
611 {
612 return acc_create (h, s);
613 }
614
615 void *
616 acc_pcreate (void *h, size_t s)
617 {
618 return acc_create (h, s);
619 }
620 #endif
621
622 void *
623 acc_copyin (void *h, size_t s)
624 {
625 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
626 acc_async_sync);
627 }
628
629 void
630 acc_copyin_async (void *h, size_t s, int async)
631 {
632 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
633 }
634
635 /* acc_present_or_copyin used to be what acc_copyin is now. */
636 /* acc_pcopyin is acc_present_or_copyin by a different name. */
637 #ifdef HAVE_ATTRIBUTE_ALIAS
638 strong_alias (acc_copyin, acc_present_or_copyin)
639 strong_alias (acc_copyin, acc_pcopyin)
640 #else
641 void *
642 acc_present_or_copyin (void *h, size_t s)
643 {
644 return acc_copyin (h, s);
645 }
646
647 void *
648 acc_pcopyin (void *h, size_t s)
649 {
650 return acc_copyin (h, s);
651 }
652 #endif
653
654 #define FLAG_COPYOUT (1 << 0)
655 #define FLAG_FINALIZE (1 << 1)
656
657 static void
658 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
659 {
660 splay_tree_key n;
661 void *d;
662 struct goacc_thread *thr = goacc_thread ();
663 struct gomp_device_descr *acc_dev = thr->dev;
664
665 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
666 return;
667
668 acc_prof_info prof_info;
669 acc_api_info api_info;
670 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
671 if (profiling_p)
672 {
673 prof_info.async = async;
674 prof_info.async_queue = prof_info.async;
675 }
676
677 gomp_mutex_lock (&acc_dev->lock);
678
679 n = lookup_host (acc_dev, h, s);
680
681 /* No need to call lazy open, as the data must already have been
682 mapped. */
683
684 if (!n)
685 {
686 gomp_mutex_unlock (&acc_dev->lock);
687 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
688 }
689
690 d = (void *) (n->tgt->tgt_start + n->tgt_offset
691 + (uintptr_t) h - n->host_start);
692
693 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
694 {
695 size_t host_size = n->host_end - n->host_start;
696 gomp_mutex_unlock (&acc_dev->lock);
697 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
698 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
699 }
700
701 if (n->refcount == REFCOUNT_INFINITY)
702 {
703 n->refcount = 0;
704 n->dynamic_refcount = 0;
705 }
706 if (n->refcount < n->dynamic_refcount)
707 {
708 gomp_mutex_unlock (&acc_dev->lock);
709 gomp_fatal ("Dynamic reference counting assert fail\n");
710 }
711
712 if (f & FLAG_FINALIZE)
713 {
714 n->refcount -= n->dynamic_refcount;
715 n->dynamic_refcount = 0;
716 }
717 else if (n->dynamic_refcount)
718 {
719 n->dynamic_refcount--;
720 n->refcount--;
721 }
722
723 if (n->refcount == 0)
724 {
725 if (f & FLAG_COPYOUT)
726 {
727 goacc_aq aq = get_goacc_asyncqueue (async);
728 gomp_copy_dev2host (acc_dev, aq, h, d, s);
729 }
730 gomp_remove_var (acc_dev, n);
731 }
732
733 gomp_mutex_unlock (&acc_dev->lock);
734
735 if (profiling_p)
736 {
737 thr->prof_info = NULL;
738 thr->api_info = NULL;
739 }
740 }
741
742 void
743 acc_delete (void *h , size_t s)
744 {
745 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
746 }
747
748 void
749 acc_delete_async (void *h , size_t s, int async)
750 {
751 delete_copyout (0, h, s, async, __FUNCTION__);
752 }
753
754 void
755 acc_delete_finalize (void *h , size_t s)
756 {
757 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
758 }
759
760 void
761 acc_delete_finalize_async (void *h , size_t s, int async)
762 {
763 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
764 }
765
766 void
767 acc_copyout (void *h, size_t s)
768 {
769 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
770 }
771
772 void
773 acc_copyout_async (void *h, size_t s, int async)
774 {
775 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
776 }
777
778 void
779 acc_copyout_finalize (void *h, size_t s)
780 {
781 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
782 __FUNCTION__);
783 }
784
785 void
786 acc_copyout_finalize_async (void *h, size_t s, int async)
787 {
788 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
789 }
790
791 static void
792 update_dev_host (int is_dev, void *h, size_t s, int async)
793 {
794 splay_tree_key n;
795 void *d;
796
797 goacc_lazy_initialize ();
798
799 struct goacc_thread *thr = goacc_thread ();
800 struct gomp_device_descr *acc_dev = thr->dev;
801
802 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
803 return;
804
805 /* Fortran optional arguments that are non-present result in a
806 NULL host address here. This can safely be ignored as it is
807 not possible to 'update' a non-present optional argument. */
808 if (h == NULL)
809 return;
810
811 acc_prof_info prof_info;
812 acc_api_info api_info;
813 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
814 if (profiling_p)
815 {
816 prof_info.async = async;
817 prof_info.async_queue = prof_info.async;
818 }
819
820 gomp_mutex_lock (&acc_dev->lock);
821
822 n = lookup_host (acc_dev, h, s);
823
824 if (!n)
825 {
826 gomp_mutex_unlock (&acc_dev->lock);
827 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
828 }
829
830 d = (void *) (n->tgt->tgt_start + n->tgt_offset
831 + (uintptr_t) h - n->host_start);
832
833 goacc_aq aq = get_goacc_asyncqueue (async);
834
835 if (is_dev)
836 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
837 else
838 gomp_copy_dev2host (acc_dev, aq, h, d, s);
839
840 gomp_mutex_unlock (&acc_dev->lock);
841
842 if (profiling_p)
843 {
844 thr->prof_info = NULL;
845 thr->api_info = NULL;
846 }
847 }
848
849 void
850 acc_update_device (void *h, size_t s)
851 {
852 update_dev_host (1, h, s, acc_async_sync);
853 }
854
855 void
856 acc_update_device_async (void *h, size_t s, int async)
857 {
858 update_dev_host (1, h, s, async);
859 }
860
861 void
862 acc_update_self (void *h, size_t s)
863 {
864 update_dev_host (0, h, s, acc_async_sync);
865 }
866
867 void
868 acc_update_self_async (void *h, size_t s, int async)
869 {
870 update_dev_host (0, h, s, async);
871 }
872
873 void
874 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
875 void *kinds, int async)
876 {
877 struct target_mem_desc *tgt;
878 struct goacc_thread *thr = goacc_thread ();
879 struct gomp_device_descr *acc_dev = thr->dev;
880
881 if (*hostaddrs == NULL)
882 return;
883
884 if (acc_is_present (*hostaddrs, *sizes))
885 {
886 splay_tree_key n;
887 gomp_mutex_lock (&acc_dev->lock);
888 n = lookup_host (acc_dev, *hostaddrs, *sizes);
889 gomp_mutex_unlock (&acc_dev->lock);
890
891 tgt = n->tgt;
892 for (size_t i = 0; i < tgt->list_count; i++)
893 if (tgt->list[i].key == n)
894 {
895 for (size_t j = 0; j < mapnum; j++)
896 if (i + j < tgt->list_count && tgt->list[i + j].key)
897 {
898 tgt->list[i + j].key->refcount++;
899 tgt->list[i + j].key->dynamic_refcount++;
900 }
901 return;
902 }
903 /* Should not reach here. */
904 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
905 }
906
907 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
908 goacc_aq aq = get_goacc_asyncqueue (async);
909 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
910 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
911 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
912
913 /* Initialize dynamic refcount. */
914 tgt->list[0].key->dynamic_refcount = 1;
915 }
916
917 void
918 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
919 int finalize, int mapnum)
920 {
921 struct goacc_thread *thr = goacc_thread ();
922 struct gomp_device_descr *acc_dev = thr->dev;
923 splay_tree_key n;
924 struct target_mem_desc *t;
925 int minrefs = (mapnum == 1) ? 2 : 3;
926
927 if (!acc_is_present (h, s))
928 return;
929
930 gomp_mutex_lock (&acc_dev->lock);
931
932 n = lookup_host (acc_dev, h, 1);
933
934 if (!n)
935 {
936 gomp_mutex_unlock (&acc_dev->lock);
937 gomp_fatal ("%p is not a mapped block", (void *)h);
938 }
939
940 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
941
942 t = n->tgt;
943
944 if (n->refcount < n->dynamic_refcount)
945 {
946 gomp_mutex_unlock (&acc_dev->lock);
947 gomp_fatal ("Dynamic reference counting assert fail\n");
948 }
949
950 if (finalize)
951 {
952 n->refcount -= n->dynamic_refcount;
953 n->dynamic_refcount = 0;
954 }
955 else if (n->dynamic_refcount)
956 {
957 n->dynamic_refcount--;
958 n->refcount--;
959 }
960
961 gomp_mutex_unlock (&acc_dev->lock);
962
963 if (n->refcount == 0)
964 {
965 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
966 n->refcount = 1;
967 t->refcount = minrefs;
968 for (size_t i = 0; i < t->list_count; i++)
969 if (t->list[i].key == n)
970 {
971 t->list[i].copy_from = force_copyfrom ? 1 : 0;
972 break;
973 }
974
975 /* If running synchronously, unmap immediately. */
976 if (async < acc_async_noval)
977 gomp_unmap_vars (t, true);
978 else
979 {
980 goacc_aq aq = get_goacc_asyncqueue (async);
981 gomp_unmap_vars_async (t, true, aq);
982 }
983 }
984
985 gomp_mutex_unlock (&acc_dev->lock);
986
987 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
988 }