[OpenMP/OpenACC/Fortran] Fix mapping of optional (present|absent) arguments
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Return block containing [D->S), or NULL if not contained.
54 The list isn't ordered by device address, so we have to iterate
55 over the whole array. This is not expected to be a common
56 operation. The device lock associated with TGT must be locked on entry, and
57 remains locked on exit. */
58
59 static splay_tree_key
60 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
61 {
62 int i;
63 struct target_mem_desc *t;
64
65 if (!tgt)
66 return NULL;
67
68 for (t = tgt; t != NULL; t = t->prev)
69 {
70 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
71 break;
72 }
73
74 if (!t)
75 return NULL;
76
77 for (i = 0; i < t->list_count; i++)
78 {
79 void * offset;
80
81 splay_tree_key k = &t->array[i].key;
82 offset = d - t->tgt_start + k->tgt_offset;
83
84 if (k->host_start + offset <= (void *) k->host_end)
85 return k;
86 }
87
88 return NULL;
89 }
90
91 /* OpenACC is silent on how memory exhaustion is indicated. We return
92 NULL. */
93
94 void *
95 acc_malloc (size_t s)
96 {
97 if (!s)
98 return NULL;
99
100 goacc_lazy_initialize ();
101
102 struct goacc_thread *thr = goacc_thread ();
103
104 assert (thr->dev);
105
106 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
107 return malloc (s);
108
109 acc_prof_info prof_info;
110 acc_api_info api_info;
111 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
112
113 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
114
115 if (profiling_p)
116 {
117 thr->prof_info = NULL;
118 thr->api_info = NULL;
119 }
120
121 return res;
122 }
123
124 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
125 the device address is mapped. We choose to check if it mapped,
126 and if it is, to unmap it. */
127 void
128 acc_free (void *d)
129 {
130 splay_tree_key k;
131
132 if (!d)
133 return;
134
135 struct goacc_thread *thr = goacc_thread ();
136
137 assert (thr && thr->dev);
138
139 struct gomp_device_descr *acc_dev = thr->dev;
140
141 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
142 return free (d);
143
144 acc_prof_info prof_info;
145 acc_api_info api_info;
146 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
147
148 gomp_mutex_lock (&acc_dev->lock);
149
150 /* We don't have to call lazy open here, as the ptr value must have
151 been returned by acc_malloc. It's not permitted to pass NULL in
152 (unless you got that null from acc_malloc). */
153 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
154 {
155 void *offset;
156
157 offset = d - k->tgt->tgt_start + k->tgt_offset;
158
159 gomp_mutex_unlock (&acc_dev->lock);
160
161 acc_unmap_data ((void *)(k->host_start + offset));
162 }
163 else
164 gomp_mutex_unlock (&acc_dev->lock);
165
166 if (!acc_dev->free_func (acc_dev->target_id, d))
167 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
168
169 if (profiling_p)
170 {
171 thr->prof_info = NULL;
172 thr->api_info = NULL;
173 }
174 }
175
176 static void
177 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
178 const char *libfnname)
179 {
180 /* No need to call lazy open here, as the device pointer must have
181 been obtained from a routine that did that. */
182 struct goacc_thread *thr = goacc_thread ();
183
184 assert (thr && thr->dev);
185
186 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
187 {
188 if (from)
189 memmove (h, d, s);
190 else
191 memmove (d, h, s);
192 return;
193 }
194
195 acc_prof_info prof_info;
196 acc_api_info api_info;
197 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
198 if (profiling_p)
199 {
200 prof_info.async = async;
201 prof_info.async_queue = prof_info.async;
202 }
203
204 goacc_aq aq = get_goacc_asyncqueue (async);
205 if (from)
206 gomp_copy_dev2host (thr->dev, aq, h, d, s);
207 else
208 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
209
210 if (profiling_p)
211 {
212 thr->prof_info = NULL;
213 thr->api_info = NULL;
214 }
215 }
216
217 void
218 acc_memcpy_to_device (void *d, void *h, size_t s)
219 {
220 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
221 }
222
223 void
224 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
225 {
226 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
227 }
228
229 void
230 acc_memcpy_from_device (void *h, void *d, size_t s)
231 {
232 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
233 }
234
235 void
236 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
237 {
238 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
239 }
240
241 /* Return the device pointer that corresponds to host data H. Or NULL
242 if no mapping. */
243
244 void *
245 acc_deviceptr (void *h)
246 {
247 splay_tree_key n;
248 void *d;
249 void *offset;
250
251 goacc_lazy_initialize ();
252
253 struct goacc_thread *thr = goacc_thread ();
254 struct gomp_device_descr *dev = thr->dev;
255
256 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
257 return h;
258
259 /* In the following, no OpenACC Profiling Interface events can possibly be
260 generated. */
261
262 gomp_mutex_lock (&dev->lock);
263
264 n = lookup_host (dev, h, 1);
265
266 if (!n)
267 {
268 gomp_mutex_unlock (&dev->lock);
269 return NULL;
270 }
271
272 offset = h - n->host_start;
273
274 d = n->tgt->tgt_start + n->tgt_offset + offset;
275
276 gomp_mutex_unlock (&dev->lock);
277
278 return d;
279 }
280
281 /* Return the host pointer that corresponds to device data D. Or NULL
282 if no mapping. */
283
284 void *
285 acc_hostptr (void *d)
286 {
287 splay_tree_key n;
288 void *h;
289 void *offset;
290
291 goacc_lazy_initialize ();
292
293 struct goacc_thread *thr = goacc_thread ();
294 struct gomp_device_descr *acc_dev = thr->dev;
295
296 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
297 return d;
298
299 /* In the following, no OpenACC Profiling Interface events can possibly be
300 generated. */
301
302 gomp_mutex_lock (&acc_dev->lock);
303
304 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
305
306 if (!n)
307 {
308 gomp_mutex_unlock (&acc_dev->lock);
309 return NULL;
310 }
311
312 offset = d - n->tgt->tgt_start + n->tgt_offset;
313
314 h = n->host_start + offset;
315
316 gomp_mutex_unlock (&acc_dev->lock);
317
318 return h;
319 }
320
321 /* Return 1 if host data [H,+S] is present on the device. */
322
323 int
324 acc_is_present (void *h, size_t s)
325 {
326 splay_tree_key n;
327
328 if (!s || !h)
329 return 0;
330
331 goacc_lazy_initialize ();
332
333 struct goacc_thread *thr = goacc_thread ();
334 struct gomp_device_descr *acc_dev = thr->dev;
335
336 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
337 return h != NULL;
338
339 /* In the following, no OpenACC Profiling Interface events can possibly be
340 generated. */
341
342 gomp_mutex_lock (&acc_dev->lock);
343
344 n = lookup_host (acc_dev, h, s);
345
346 if (n && ((uintptr_t)h < n->host_start
347 || (uintptr_t)h + s > n->host_end
348 || s > n->host_end - n->host_start))
349 n = NULL;
350
351 gomp_mutex_unlock (&acc_dev->lock);
352
353 return n != NULL;
354 }
355
356 /* Create a mapping for host [H,+S] -> device [D,+S] */
357
358 void
359 acc_map_data (void *h, void *d, size_t s)
360 {
361 struct target_mem_desc *tgt = NULL;
362 size_t mapnum = 1;
363 void *hostaddrs = h;
364 void *devaddrs = d;
365 size_t sizes = s;
366 unsigned short kinds = GOMP_MAP_ALLOC;
367
368 goacc_lazy_initialize ();
369
370 struct goacc_thread *thr = goacc_thread ();
371 struct gomp_device_descr *acc_dev = thr->dev;
372
373 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
374 {
375 if (d != h)
376 gomp_fatal ("cannot map data on shared-memory system");
377 }
378 else
379 {
380 struct goacc_thread *thr = goacc_thread ();
381
382 if (!d || !h || !s)
383 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
384 (void *)h, (int)s, (void *)d, (int)s);
385
386 acc_prof_info prof_info;
387 acc_api_info api_info;
388 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
389
390 gomp_mutex_lock (&acc_dev->lock);
391
392 if (lookup_host (acc_dev, h, s))
393 {
394 gomp_mutex_unlock (&acc_dev->lock);
395 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
396 (int)s);
397 }
398
399 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
400 {
401 gomp_mutex_unlock (&acc_dev->lock);
402 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
403 (int)s);
404 }
405
406 gomp_mutex_unlock (&acc_dev->lock);
407
408 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
409 &kinds, true, GOMP_MAP_VARS_OPENACC);
410 tgt->list[0].key->refcount = REFCOUNT_INFINITY;
411
412 if (profiling_p)
413 {
414 thr->prof_info = NULL;
415 thr->api_info = NULL;
416 }
417 }
418
419 gomp_mutex_lock (&acc_dev->lock);
420 tgt->prev = acc_dev->openacc.data_environ;
421 acc_dev->openacc.data_environ = tgt;
422 gomp_mutex_unlock (&acc_dev->lock);
423 }
424
425 void
426 acc_unmap_data (void *h)
427 {
428 struct goacc_thread *thr = goacc_thread ();
429 struct gomp_device_descr *acc_dev = thr->dev;
430
431 /* No need to call lazy open, as the address must have been mapped. */
432
433 /* This is a no-op on shared-memory targets. */
434 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
435 return;
436
437 acc_prof_info prof_info;
438 acc_api_info api_info;
439 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
440
441 size_t host_size;
442
443 gomp_mutex_lock (&acc_dev->lock);
444
445 splay_tree_key n = lookup_host (acc_dev, h, 1);
446 struct target_mem_desc *t;
447
448 if (!n)
449 {
450 gomp_mutex_unlock (&acc_dev->lock);
451 gomp_fatal ("%p is not a mapped block", (void *)h);
452 }
453
454 host_size = n->host_end - n->host_start;
455
456 if (n->host_start != (uintptr_t) h)
457 {
458 gomp_mutex_unlock (&acc_dev->lock);
459 gomp_fatal ("[%p,%d] surrounds %p",
460 (void *) n->host_start, (int) host_size, (void *) h);
461 }
462
463 /* Mark for removal. */
464 n->refcount = 1;
465
466 t = n->tgt;
467
468 if (t->refcount == 2)
469 {
470 struct target_mem_desc *tp;
471
472 /* This is the last reference, so pull the descriptor off the
473 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
474 freeing the device memory. */
475 t->tgt_end = 0;
476 t->to_free = 0;
477
478 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
479 tp = t, t = t->prev)
480 if (n->tgt == t)
481 {
482 if (tp)
483 tp->prev = t->prev;
484 else
485 acc_dev->openacc.data_environ = t->prev;
486
487 break;
488 }
489 }
490
491 gomp_mutex_unlock (&acc_dev->lock);
492
493 gomp_unmap_vars (t, true);
494
495 if (profiling_p)
496 {
497 thr->prof_info = NULL;
498 thr->api_info = NULL;
499 }
500 }
501
502 #define FLAG_PRESENT (1 << 0)
503 #define FLAG_CREATE (1 << 1)
504 #define FLAG_COPY (1 << 2)
505
506 static void *
507 present_create_copy (unsigned f, void *h, size_t s, int async)
508 {
509 void *d;
510 splay_tree_key n;
511
512 if (!h || !s)
513 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
514
515 goacc_lazy_initialize ();
516
517 struct goacc_thread *thr = goacc_thread ();
518 struct gomp_device_descr *acc_dev = thr->dev;
519
520 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
521 return h;
522
523 acc_prof_info prof_info;
524 acc_api_info api_info;
525 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
526 if (profiling_p)
527 {
528 prof_info.async = async;
529 prof_info.async_queue = prof_info.async;
530 }
531
532 gomp_mutex_lock (&acc_dev->lock);
533
534 n = lookup_host (acc_dev, h, s);
535 if (n)
536 {
537 /* Present. */
538 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
539
540 if (!(f & FLAG_PRESENT))
541 {
542 gomp_mutex_unlock (&acc_dev->lock);
543 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
544 (void *)h, (int)s, (void *)d, (int)s);
545 }
546 if ((h + s) > (void *)n->host_end)
547 {
548 gomp_mutex_unlock (&acc_dev->lock);
549 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
550 }
551
552 if (n->refcount != REFCOUNT_INFINITY)
553 {
554 n->refcount++;
555 n->dynamic_refcount++;
556 }
557 gomp_mutex_unlock (&acc_dev->lock);
558 }
559 else if (!(f & FLAG_CREATE))
560 {
561 gomp_mutex_unlock (&acc_dev->lock);
562 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
563 }
564 else
565 {
566 struct target_mem_desc *tgt;
567 size_t mapnum = 1;
568 unsigned short kinds;
569 void *hostaddrs = h;
570
571 if (f & FLAG_COPY)
572 kinds = GOMP_MAP_TO;
573 else
574 kinds = GOMP_MAP_ALLOC;
575
576 gomp_mutex_unlock (&acc_dev->lock);
577
578 goacc_aq aq = get_goacc_asyncqueue (async);
579
580 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
581 &kinds, true, GOMP_MAP_VARS_OPENACC);
582 /* Initialize dynamic refcount. */
583 tgt->list[0].key->dynamic_refcount = 1;
584
585 gomp_mutex_lock (&acc_dev->lock);
586
587 d = tgt->to_free;
588 tgt->prev = acc_dev->openacc.data_environ;
589 acc_dev->openacc.data_environ = tgt;
590
591 gomp_mutex_unlock (&acc_dev->lock);
592 }
593
594 if (profiling_p)
595 {
596 thr->prof_info = NULL;
597 thr->api_info = NULL;
598 }
599
600 return d;
601 }
602
603 void *
604 acc_create (void *h, size_t s)
605 {
606 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
607 }
608
609 void
610 acc_create_async (void *h, size_t s, int async)
611 {
612 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
613 }
614
615 /* acc_present_or_create used to be what acc_create is now. */
616 /* acc_pcreate is acc_present_or_create by a different name. */
617 #ifdef HAVE_ATTRIBUTE_ALIAS
618 strong_alias (acc_create, acc_present_or_create)
619 strong_alias (acc_create, acc_pcreate)
620 #else
621 void *
622 acc_present_or_create (void *h, size_t s)
623 {
624 return acc_create (h, s);
625 }
626
627 void *
628 acc_pcreate (void *h, size_t s)
629 {
630 return acc_create (h, s);
631 }
632 #endif
633
634 void *
635 acc_copyin (void *h, size_t s)
636 {
637 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
638 acc_async_sync);
639 }
640
641 void
642 acc_copyin_async (void *h, size_t s, int async)
643 {
644 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
645 }
646
647 /* acc_present_or_copyin used to be what acc_copyin is now. */
648 /* acc_pcopyin is acc_present_or_copyin by a different name. */
649 #ifdef HAVE_ATTRIBUTE_ALIAS
650 strong_alias (acc_copyin, acc_present_or_copyin)
651 strong_alias (acc_copyin, acc_pcopyin)
652 #else
653 void *
654 acc_present_or_copyin (void *h, size_t s)
655 {
656 return acc_copyin (h, s);
657 }
658
659 void *
660 acc_pcopyin (void *h, size_t s)
661 {
662 return acc_copyin (h, s);
663 }
664 #endif
665
666 #define FLAG_COPYOUT (1 << 0)
667 #define FLAG_FINALIZE (1 << 1)
668
669 static void
670 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
671 {
672 splay_tree_key n;
673 void *d;
674 struct goacc_thread *thr = goacc_thread ();
675 struct gomp_device_descr *acc_dev = thr->dev;
676
677 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
678 return;
679
680 acc_prof_info prof_info;
681 acc_api_info api_info;
682 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
683 if (profiling_p)
684 {
685 prof_info.async = async;
686 prof_info.async_queue = prof_info.async;
687 }
688
689 gomp_mutex_lock (&acc_dev->lock);
690
691 n = lookup_host (acc_dev, h, s);
692
693 /* No need to call lazy open, as the data must already have been
694 mapped. */
695
696 if (!n)
697 {
698 gomp_mutex_unlock (&acc_dev->lock);
699 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
700 }
701
702 d = (void *) (n->tgt->tgt_start + n->tgt_offset
703 + (uintptr_t) h - n->host_start);
704
705 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
706 {
707 size_t host_size = n->host_end - n->host_start;
708 gomp_mutex_unlock (&acc_dev->lock);
709 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
710 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
711 }
712
713 if (n->refcount == REFCOUNT_INFINITY)
714 {
715 n->refcount = 0;
716 n->dynamic_refcount = 0;
717 }
718 if (n->refcount < n->dynamic_refcount)
719 {
720 gomp_mutex_unlock (&acc_dev->lock);
721 gomp_fatal ("Dynamic reference counting assert fail\n");
722 }
723
724 if (f & FLAG_FINALIZE)
725 {
726 n->refcount -= n->dynamic_refcount;
727 n->dynamic_refcount = 0;
728 }
729 else if (n->dynamic_refcount)
730 {
731 n->dynamic_refcount--;
732 n->refcount--;
733 }
734
735 if (n->refcount == 0)
736 {
737 if (n->tgt->refcount == 2)
738 {
739 struct target_mem_desc *tp, *t;
740 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
741 tp = t, t = t->prev)
742 if (n->tgt == t)
743 {
744 if (tp)
745 tp->prev = t->prev;
746 else
747 acc_dev->openacc.data_environ = t->prev;
748 break;
749 }
750 }
751
752 if (f & FLAG_COPYOUT)
753 {
754 goacc_aq aq = get_goacc_asyncqueue (async);
755 gomp_copy_dev2host (acc_dev, aq, h, d, s);
756 }
757 gomp_remove_var (acc_dev, n);
758 }
759
760 gomp_mutex_unlock (&acc_dev->lock);
761
762 if (profiling_p)
763 {
764 thr->prof_info = NULL;
765 thr->api_info = NULL;
766 }
767 }
768
769 void
770 acc_delete (void *h , size_t s)
771 {
772 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
773 }
774
775 void
776 acc_delete_async (void *h , size_t s, int async)
777 {
778 delete_copyout (0, h, s, async, __FUNCTION__);
779 }
780
781 void
782 acc_delete_finalize (void *h , size_t s)
783 {
784 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
785 }
786
787 void
788 acc_delete_finalize_async (void *h , size_t s, int async)
789 {
790 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
791 }
792
793 void
794 acc_copyout (void *h, size_t s)
795 {
796 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
797 }
798
799 void
800 acc_copyout_async (void *h, size_t s, int async)
801 {
802 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
803 }
804
805 void
806 acc_copyout_finalize (void *h, size_t s)
807 {
808 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
809 __FUNCTION__);
810 }
811
812 void
813 acc_copyout_finalize_async (void *h, size_t s, int async)
814 {
815 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
816 }
817
818 static void
819 update_dev_host (int is_dev, void *h, size_t s, int async)
820 {
821 splay_tree_key n;
822 void *d;
823
824 goacc_lazy_initialize ();
825
826 struct goacc_thread *thr = goacc_thread ();
827 struct gomp_device_descr *acc_dev = thr->dev;
828
829 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
830 return;
831
832 /* Fortran optional arguments that are non-present result in a
833 NULL host address here. This can safely be ignored as it is
834 not possible to 'update' a non-present optional argument. */
835 if (h == NULL)
836 return;
837
838 acc_prof_info prof_info;
839 acc_api_info api_info;
840 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
841 if (profiling_p)
842 {
843 prof_info.async = async;
844 prof_info.async_queue = prof_info.async;
845 }
846
847 gomp_mutex_lock (&acc_dev->lock);
848
849 n = lookup_host (acc_dev, h, s);
850
851 if (!n)
852 {
853 gomp_mutex_unlock (&acc_dev->lock);
854 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
855 }
856
857 d = (void *) (n->tgt->tgt_start + n->tgt_offset
858 + (uintptr_t) h - n->host_start);
859
860 goacc_aq aq = get_goacc_asyncqueue (async);
861
862 if (is_dev)
863 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
864 else
865 gomp_copy_dev2host (acc_dev, aq, h, d, s);
866
867 gomp_mutex_unlock (&acc_dev->lock);
868
869 if (profiling_p)
870 {
871 thr->prof_info = NULL;
872 thr->api_info = NULL;
873 }
874 }
875
876 void
877 acc_update_device (void *h, size_t s)
878 {
879 update_dev_host (1, h, s, acc_async_sync);
880 }
881
882 void
883 acc_update_device_async (void *h, size_t s, int async)
884 {
885 update_dev_host (1, h, s, async);
886 }
887
888 void
889 acc_update_self (void *h, size_t s)
890 {
891 update_dev_host (0, h, s, acc_async_sync);
892 }
893
894 void
895 acc_update_self_async (void *h, size_t s, int async)
896 {
897 update_dev_host (0, h, s, async);
898 }
899
900 void
901 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
902 void *kinds, int async)
903 {
904 struct target_mem_desc *tgt;
905 struct goacc_thread *thr = goacc_thread ();
906 struct gomp_device_descr *acc_dev = thr->dev;
907
908 if (*hostaddrs == NULL)
909 return;
910
911 if (acc_is_present (*hostaddrs, *sizes))
912 {
913 splay_tree_key n;
914 gomp_mutex_lock (&acc_dev->lock);
915 n = lookup_host (acc_dev, *hostaddrs, *sizes);
916 gomp_mutex_unlock (&acc_dev->lock);
917
918 tgt = n->tgt;
919 for (size_t i = 0; i < tgt->list_count; i++)
920 if (tgt->list[i].key == n)
921 {
922 for (size_t j = 0; j < mapnum; j++)
923 if (i + j < tgt->list_count && tgt->list[i + j].key)
924 {
925 tgt->list[i + j].key->refcount++;
926 tgt->list[i + j].key->dynamic_refcount++;
927 }
928 return;
929 }
930 /* Should not reach here. */
931 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
932 }
933
934 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
935 goacc_aq aq = get_goacc_asyncqueue (async);
936 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
937 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
938 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
939
940 /* Initialize dynamic refcount. */
941 tgt->list[0].key->dynamic_refcount = 1;
942
943 gomp_mutex_lock (&acc_dev->lock);
944 tgt->prev = acc_dev->openacc.data_environ;
945 acc_dev->openacc.data_environ = tgt;
946 gomp_mutex_unlock (&acc_dev->lock);
947 }
948
949 void
950 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
951 int finalize, int mapnum)
952 {
953 struct goacc_thread *thr = goacc_thread ();
954 struct gomp_device_descr *acc_dev = thr->dev;
955 splay_tree_key n;
956 struct target_mem_desc *t;
957 int minrefs = (mapnum == 1) ? 2 : 3;
958
959 if (!acc_is_present (h, s))
960 return;
961
962 gomp_mutex_lock (&acc_dev->lock);
963
964 n = lookup_host (acc_dev, h, 1);
965
966 if (!n)
967 {
968 gomp_mutex_unlock (&acc_dev->lock);
969 gomp_fatal ("%p is not a mapped block", (void *)h);
970 }
971
972 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
973
974 t = n->tgt;
975
976 if (n->refcount < n->dynamic_refcount)
977 {
978 gomp_mutex_unlock (&acc_dev->lock);
979 gomp_fatal ("Dynamic reference counting assert fail\n");
980 }
981
982 if (finalize)
983 {
984 n->refcount -= n->dynamic_refcount;
985 n->dynamic_refcount = 0;
986 }
987 else if (n->dynamic_refcount)
988 {
989 n->dynamic_refcount--;
990 n->refcount--;
991 }
992
993 gomp_mutex_unlock (&acc_dev->lock);
994
995 if (n->refcount == 0)
996 {
997 if (t->refcount == minrefs)
998 {
999 /* This is the last reference, so pull the descriptor off the
1000 chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
1001 freeing the device memory. */
1002 struct target_mem_desc *tp;
1003 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
1004 tp = t, t = t->prev)
1005 {
1006 if (n->tgt == t)
1007 {
1008 if (tp)
1009 tp->prev = t->prev;
1010 else
1011 acc_dev->openacc.data_environ = t->prev;
1012 break;
1013 }
1014 }
1015 }
1016
1017 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
1018 n->refcount = 1;
1019 t->refcount = minrefs;
1020 for (size_t i = 0; i < t->list_count; i++)
1021 if (t->list[i].key == n)
1022 {
1023 t->list[i].copy_from = force_copyfrom ? 1 : 0;
1024 break;
1025 }
1026
1027 /* If running synchronously, unmap immediately. */
1028 if (async < acc_async_noval)
1029 gomp_unmap_vars (t, true);
1030 else
1031 {
1032 goacc_aq aq = get_goacc_asyncqueue (async);
1033 gomp_unmap_vars_async (t, true, aq);
1034 }
1035 }
1036
1037 gomp_mutex_unlock (&acc_dev->lock);
1038
1039 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1040 }