[PR92843] [OpenACC] Fix dynamic reference counting for structured 'REFCOUNT_INFINITY'
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_OPENACC);
407 splay_tree_key n = tgt->list[0].key;
408 assert (n->refcount == 1);
409 assert (n->dynamic_refcount == 0);
410 /* Special reference counting behavior. */
411 n->refcount = REFCOUNT_INFINITY;
412
413 if (profiling_p)
414 {
415 thr->prof_info = NULL;
416 thr->api_info = NULL;
417 }
418 }
419 }
420
421 void
422 acc_unmap_data (void *h)
423 {
424 struct goacc_thread *thr = goacc_thread ();
425 struct gomp_device_descr *acc_dev = thr->dev;
426
427 /* No need to call lazy open, as the address must have been mapped. */
428
429 /* This is a no-op on shared-memory targets. */
430 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
431 return;
432
433 acc_prof_info prof_info;
434 acc_api_info api_info;
435 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
436
437 size_t host_size;
438
439 gomp_mutex_lock (&acc_dev->lock);
440
441 splay_tree_key n = lookup_host (acc_dev, h, 1);
442 struct target_mem_desc *t;
443
444 if (!n)
445 {
446 gomp_mutex_unlock (&acc_dev->lock);
447 gomp_fatal ("%p is not a mapped block", (void *)h);
448 }
449
450 host_size = n->host_end - n->host_start;
451
452 if (n->host_start != (uintptr_t) h)
453 {
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n->host_start, (int) host_size, (void *) h);
457 }
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462 etc.)? */
463 else if (n->refcount != REFCOUNT_INFINITY)
464 {
465 gomp_mutex_unlock (&acc_dev->lock);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h, (int) host_size);
469 }
470
471 /* Mark for removal. */
472 n->refcount = 1;
473
474 t = n->tgt;
475
476 if (t->refcount == 2)
477 {
478 /* This is the last reference, so pull the descriptor off the
479 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
480 freeing the device memory. */
481 t->tgt_end = 0;
482 t->to_free = 0;
483 }
484
485 gomp_mutex_unlock (&acc_dev->lock);
486
487 gomp_unmap_vars (t, true);
488
489 if (profiling_p)
490 {
491 thr->prof_info = NULL;
492 thr->api_info = NULL;
493 }
494 }
495
496 #define FLAG_PRESENT (1 << 0)
497 #define FLAG_CREATE (1 << 1)
498 #define FLAG_COPY (1 << 2)
499
500 static void *
501 present_create_copy (unsigned f, void *h, size_t s, int async)
502 {
503 void *d;
504 splay_tree_key n;
505
506 if (!h || !s)
507 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
508
509 goacc_lazy_initialize ();
510
511 struct goacc_thread *thr = goacc_thread ();
512 struct gomp_device_descr *acc_dev = thr->dev;
513
514 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
515 return h;
516
517 acc_prof_info prof_info;
518 acc_api_info api_info;
519 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
520 if (profiling_p)
521 {
522 prof_info.async = async;
523 prof_info.async_queue = prof_info.async;
524 }
525
526 gomp_mutex_lock (&acc_dev->lock);
527
528 n = lookup_host (acc_dev, h, s);
529 if (n)
530 {
531 /* Present. */
532 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
533
534 if (!(f & FLAG_PRESENT))
535 {
536 gomp_mutex_unlock (&acc_dev->lock);
537 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
538 (void *)h, (int)s, (void *)d, (int)s);
539 }
540 if ((h + s) > (void *)n->host_end)
541 {
542 gomp_mutex_unlock (&acc_dev->lock);
543 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
544 }
545
546 assert (n->refcount != REFCOUNT_LINK);
547 if (n->refcount != REFCOUNT_INFINITY)
548 n->refcount++;
549 n->dynamic_refcount++;
550
551 gomp_mutex_unlock (&acc_dev->lock);
552 }
553 else if (!(f & FLAG_CREATE))
554 {
555 gomp_mutex_unlock (&acc_dev->lock);
556 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
557 }
558 else
559 {
560 struct target_mem_desc *tgt;
561 size_t mapnum = 1;
562 unsigned short kinds;
563 void *hostaddrs = h;
564
565 if (f & FLAG_COPY)
566 kinds = GOMP_MAP_TO;
567 else
568 kinds = GOMP_MAP_ALLOC;
569
570 gomp_mutex_unlock (&acc_dev->lock);
571
572 goacc_aq aq = get_goacc_asyncqueue (async);
573
574 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
575 &kinds, true, GOMP_MAP_VARS_OPENACC);
576 n = tgt->list[0].key;
577 assert (n->refcount == 1);
578 assert (n->dynamic_refcount == 0);
579 n->dynamic_refcount++;
580
581 d = tgt->to_free;
582 }
583
584 if (profiling_p)
585 {
586 thr->prof_info = NULL;
587 thr->api_info = NULL;
588 }
589
590 return d;
591 }
592
593 void *
594 acc_create (void *h, size_t s)
595 {
596 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
597 }
598
599 void
600 acc_create_async (void *h, size_t s, int async)
601 {
602 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
603 }
604
605 /* acc_present_or_create used to be what acc_create is now. */
606 /* acc_pcreate is acc_present_or_create by a different name. */
607 #ifdef HAVE_ATTRIBUTE_ALIAS
608 strong_alias (acc_create, acc_present_or_create)
609 strong_alias (acc_create, acc_pcreate)
610 #else
611 void *
612 acc_present_or_create (void *h, size_t s)
613 {
614 return acc_create (h, s);
615 }
616
617 void *
618 acc_pcreate (void *h, size_t s)
619 {
620 return acc_create (h, s);
621 }
622 #endif
623
624 void *
625 acc_copyin (void *h, size_t s)
626 {
627 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
628 acc_async_sync);
629 }
630
631 void
632 acc_copyin_async (void *h, size_t s, int async)
633 {
634 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
635 }
636
637 /* acc_present_or_copyin used to be what acc_copyin is now. */
638 /* acc_pcopyin is acc_present_or_copyin by a different name. */
639 #ifdef HAVE_ATTRIBUTE_ALIAS
640 strong_alias (acc_copyin, acc_present_or_copyin)
641 strong_alias (acc_copyin, acc_pcopyin)
642 #else
643 void *
644 acc_present_or_copyin (void *h, size_t s)
645 {
646 return acc_copyin (h, s);
647 }
648
649 void *
650 acc_pcopyin (void *h, size_t s)
651 {
652 return acc_copyin (h, s);
653 }
654 #endif
655
656 #define FLAG_COPYOUT (1 << 0)
657 #define FLAG_FINALIZE (1 << 1)
658
659 static void
660 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
661 {
662 splay_tree_key n;
663 void *d;
664 struct goacc_thread *thr = goacc_thread ();
665 struct gomp_device_descr *acc_dev = thr->dev;
666
667 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
668 return;
669
670 acc_prof_info prof_info;
671 acc_api_info api_info;
672 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
673 if (profiling_p)
674 {
675 prof_info.async = async;
676 prof_info.async_queue = prof_info.async;
677 }
678
679 gomp_mutex_lock (&acc_dev->lock);
680
681 n = lookup_host (acc_dev, h, s);
682
683 /* No need to call lazy open, as the data must already have been
684 mapped. */
685
686 if (!n)
687 {
688 gomp_mutex_unlock (&acc_dev->lock);
689 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
690 }
691
692 d = (void *) (n->tgt->tgt_start + n->tgt_offset
693 + (uintptr_t) h - n->host_start);
694
695 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
696 {
697 size_t host_size = n->host_end - n->host_start;
698 gomp_mutex_unlock (&acc_dev->lock);
699 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
700 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
701 }
702
703 assert (n->refcount != REFCOUNT_LINK);
704 if (n->refcount != REFCOUNT_INFINITY
705 && n->refcount < n->dynamic_refcount)
706 {
707 gomp_mutex_unlock (&acc_dev->lock);
708 gomp_fatal ("Dynamic reference counting assert fail\n");
709 }
710
711 if (f & FLAG_FINALIZE)
712 {
713 if (n->refcount != REFCOUNT_INFINITY)
714 n->refcount -= n->dynamic_refcount;
715 n->dynamic_refcount = 0;
716 }
717 else if (n->dynamic_refcount)
718 {
719 if (n->refcount != REFCOUNT_INFINITY)
720 n->refcount--;
721 n->dynamic_refcount--;
722 }
723
724 if (n->refcount == 0)
725 {
726 if (f & FLAG_COPYOUT)
727 {
728 goacc_aq aq = get_goacc_asyncqueue (async);
729 gomp_copy_dev2host (acc_dev, aq, h, d, s);
730 }
731 gomp_remove_var (acc_dev, n);
732 }
733
734 gomp_mutex_unlock (&acc_dev->lock);
735
736 if (profiling_p)
737 {
738 thr->prof_info = NULL;
739 thr->api_info = NULL;
740 }
741 }
742
743 void
744 acc_delete (void *h , size_t s)
745 {
746 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
747 }
748
749 void
750 acc_delete_async (void *h , size_t s, int async)
751 {
752 delete_copyout (0, h, s, async, __FUNCTION__);
753 }
754
755 void
756 acc_delete_finalize (void *h , size_t s)
757 {
758 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
759 }
760
761 void
762 acc_delete_finalize_async (void *h , size_t s, int async)
763 {
764 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
765 }
766
767 void
768 acc_copyout (void *h, size_t s)
769 {
770 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
771 }
772
773 void
774 acc_copyout_async (void *h, size_t s, int async)
775 {
776 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
777 }
778
779 void
780 acc_copyout_finalize (void *h, size_t s)
781 {
782 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
783 __FUNCTION__);
784 }
785
786 void
787 acc_copyout_finalize_async (void *h, size_t s, int async)
788 {
789 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
790 }
791
792 static void
793 update_dev_host (int is_dev, void *h, size_t s, int async)
794 {
795 splay_tree_key n;
796 void *d;
797
798 goacc_lazy_initialize ();
799
800 struct goacc_thread *thr = goacc_thread ();
801 struct gomp_device_descr *acc_dev = thr->dev;
802
803 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
804 return;
805
806 /* Fortran optional arguments that are non-present result in a
807 NULL host address here. This can safely be ignored as it is
808 not possible to 'update' a non-present optional argument. */
809 if (h == NULL)
810 return;
811
812 acc_prof_info prof_info;
813 acc_api_info api_info;
814 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
815 if (profiling_p)
816 {
817 prof_info.async = async;
818 prof_info.async_queue = prof_info.async;
819 }
820
821 gomp_mutex_lock (&acc_dev->lock);
822
823 n = lookup_host (acc_dev, h, s);
824
825 if (!n)
826 {
827 gomp_mutex_unlock (&acc_dev->lock);
828 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
829 }
830
831 d = (void *) (n->tgt->tgt_start + n->tgt_offset
832 + (uintptr_t) h - n->host_start);
833
834 goacc_aq aq = get_goacc_asyncqueue (async);
835
836 if (is_dev)
837 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
838 else
839 gomp_copy_dev2host (acc_dev, aq, h, d, s);
840
841 gomp_mutex_unlock (&acc_dev->lock);
842
843 if (profiling_p)
844 {
845 thr->prof_info = NULL;
846 thr->api_info = NULL;
847 }
848 }
849
850 void
851 acc_update_device (void *h, size_t s)
852 {
853 update_dev_host (1, h, s, acc_async_sync);
854 }
855
856 void
857 acc_update_device_async (void *h, size_t s, int async)
858 {
859 update_dev_host (1, h, s, async);
860 }
861
862 void
863 acc_update_self (void *h, size_t s)
864 {
865 update_dev_host (0, h, s, acc_async_sync);
866 }
867
868 void
869 acc_update_self_async (void *h, size_t s, int async)
870 {
871 update_dev_host (0, h, s, async);
872 }
873
874
875 /* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
876 functions. */
877
878 /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
879
880 Only the first mapping is considered in reference counting; the following
881 ones implicitly follow suit. */
882
883 static void
884 goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
885 void *kinds, int async)
886 {
887 struct target_mem_desc *tgt;
888 struct goacc_thread *thr = goacc_thread ();
889 struct gomp_device_descr *acc_dev = thr->dev;
890
891 if (*hostaddrs == NULL)
892 return;
893
894 if (acc_is_present (*hostaddrs, *sizes))
895 {
896 splay_tree_key n;
897 gomp_mutex_lock (&acc_dev->lock);
898 n = lookup_host (acc_dev, *hostaddrs, *sizes);
899 assert (n->refcount != REFCOUNT_INFINITY
900 && n->refcount != REFCOUNT_LINK);
901 gomp_mutex_unlock (&acc_dev->lock);
902
903 tgt = n->tgt;
904 for (size_t i = 0; i < tgt->list_count; i++)
905 if (tgt->list[i].key == n)
906 {
907 for (size_t j = 0; j < mapnum; j++)
908 if (i + j < tgt->list_count && tgt->list[i + j].key)
909 {
910 tgt->list[i + j].key->refcount++;
911 tgt->list[i + j].key->dynamic_refcount++;
912 }
913 return;
914 }
915 /* Should not reach here. */
916 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
917 }
918
919 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
920 goacc_aq aq = get_goacc_asyncqueue (async);
921 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
922 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
923 splay_tree_key n = tgt->list[0].key;
924 assert (n->refcount == 1);
925 assert (n->dynamic_refcount == 0);
926 n->dynamic_refcount++;
927 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
928 }
929
930 static void
931 goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
932 int finalize, int mapnum)
933 {
934 struct goacc_thread *thr = goacc_thread ();
935 struct gomp_device_descr *acc_dev = thr->dev;
936 splay_tree_key n;
937 struct target_mem_desc *t;
938 int minrefs = (mapnum == 1) ? 2 : 3;
939
940 if (!acc_is_present (h, s))
941 return;
942
943 gomp_mutex_lock (&acc_dev->lock);
944
945 n = lookup_host (acc_dev, h, 1);
946
947 if (!n)
948 {
949 gomp_mutex_unlock (&acc_dev->lock);
950 gomp_fatal ("%p is not a mapped block", (void *)h);
951 }
952
953 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
954
955 t = n->tgt;
956
957 assert (n->refcount != REFCOUNT_INFINITY
958 && n->refcount != REFCOUNT_LINK);
959 if (n->refcount < n->dynamic_refcount)
960 {
961 gomp_mutex_unlock (&acc_dev->lock);
962 gomp_fatal ("Dynamic reference counting assert fail\n");
963 }
964
965 if (finalize)
966 {
967 n->refcount -= n->dynamic_refcount;
968 n->dynamic_refcount = 0;
969 }
970 else if (n->dynamic_refcount)
971 {
972 n->refcount--;
973 n->dynamic_refcount--;
974 }
975
976 gomp_mutex_unlock (&acc_dev->lock);
977
978 if (n->refcount == 0)
979 {
980 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
981 n->refcount = 1;
982 t->refcount = minrefs;
983 for (size_t i = 0; i < t->list_count; i++)
984 if (t->list[i].key == n)
985 {
986 t->list[i].copy_from = force_copyfrom ? 1 : 0;
987 break;
988 }
989
990 /* If running synchronously, unmap immediately. */
991 if (async < acc_async_noval)
992 gomp_unmap_vars (t, true);
993 else
994 {
995 goacc_aq aq = get_goacc_asyncqueue (async);
996 gomp_unmap_vars_async (t, true, aq);
997 }
998 }
999
1000 gomp_mutex_unlock (&acc_dev->lock);
1001
1002 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1003 }
1004
1005 /* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
1006 'GOMP_MAP_POINTER'. */
1007
1008 static int
1009 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
1010 {
1011 if (pos + 1 >= mapnum)
1012 return 0;
1013
1014 unsigned char kind = kinds[pos+1] & 0xff;
1015
1016 if (kind == GOMP_MAP_TO_PSET)
1017 return 3;
1018 else if (kind == GOMP_MAP_POINTER)
1019 return 2;
1020
1021 return 0;
1022 }
1023
1024 void
1025 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1026 size_t *sizes, unsigned short *kinds, int async,
1027 int num_waits, ...)
1028 {
1029 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1030
1031 struct goacc_thread *thr;
1032 struct gomp_device_descr *acc_dev;
1033 bool data_enter = false;
1034 size_t i;
1035
1036 goacc_lazy_initialize ();
1037
1038 thr = goacc_thread ();
1039 acc_dev = thr->dev;
1040
1041 /* Determine whether "finalize" semantics apply to all mappings of this
1042 OpenACC directive. */
1043 bool finalize = false;
1044 if (mapnum > 0)
1045 {
1046 unsigned char kind = kinds[0] & 0xff;
1047 if (kind == GOMP_MAP_DELETE
1048 || kind == GOMP_MAP_FORCE_FROM)
1049 finalize = true;
1050 }
1051
1052 /* Determine if this is an "acc enter data". */
1053 for (i = 0; i < mapnum; ++i)
1054 {
1055 unsigned char kind = kinds[i] & 0xff;
1056
1057 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1058 continue;
1059
1060 if (kind == GOMP_MAP_FORCE_ALLOC
1061 || kind == GOMP_MAP_FORCE_PRESENT
1062 || kind == GOMP_MAP_FORCE_TO
1063 || kind == GOMP_MAP_TO
1064 || kind == GOMP_MAP_ALLOC)
1065 {
1066 data_enter = true;
1067 break;
1068 }
1069
1070 if (kind == GOMP_MAP_RELEASE
1071 || kind == GOMP_MAP_DELETE
1072 || kind == GOMP_MAP_FROM
1073 || kind == GOMP_MAP_FORCE_FROM)
1074 break;
1075
1076 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1077 kind);
1078 }
1079
1080 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1081
1082 acc_prof_info prof_info;
1083 if (profiling_p)
1084 {
1085 thr->prof_info = &prof_info;
1086
1087 prof_info.event_type
1088 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1089 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1090 prof_info.version = _ACC_PROF_INFO_VERSION;
1091 prof_info.device_type = acc_device_type (acc_dev->type);
1092 prof_info.device_number = acc_dev->target_id;
1093 prof_info.thread_id = -1;
1094 prof_info.async = async;
1095 prof_info.async_queue = prof_info.async;
1096 prof_info.src_file = NULL;
1097 prof_info.func_name = NULL;
1098 prof_info.line_no = -1;
1099 prof_info.end_line_no = -1;
1100 prof_info.func_line_no = -1;
1101 prof_info.func_end_line_no = -1;
1102 }
1103 acc_event_info enter_exit_data_event_info;
1104 if (profiling_p)
1105 {
1106 enter_exit_data_event_info.other_event.event_type
1107 = prof_info.event_type;
1108 enter_exit_data_event_info.other_event.valid_bytes
1109 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1110 enter_exit_data_event_info.other_event.parent_construct
1111 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1112 enter_exit_data_event_info.other_event.implicit = 0;
1113 enter_exit_data_event_info.other_event.tool_info = NULL;
1114 }
1115 acc_api_info api_info;
1116 if (profiling_p)
1117 {
1118 thr->api_info = &api_info;
1119
1120 api_info.device_api = acc_device_api_none;
1121 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1122 api_info.device_type = prof_info.device_type;
1123 api_info.vendor = -1;
1124 api_info.device_handle = NULL;
1125 api_info.context_handle = NULL;
1126 api_info.async_handle = NULL;
1127 }
1128
1129 if (profiling_p)
1130 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1131 &api_info);
1132
1133 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1134 || (flags & GOACC_FLAG_HOST_FALLBACK))
1135 {
1136 prof_info.device_type = acc_device_host;
1137 api_info.device_type = prof_info.device_type;
1138
1139 goto out_prof;
1140 }
1141
1142 if (num_waits)
1143 {
1144 va_list ap;
1145
1146 va_start (ap, num_waits);
1147 goacc_wait (async, num_waits, &ap);
1148 va_end (ap);
1149 }
1150
1151 /* In c, non-pointers and arrays are represented by a single data clause.
1152 Dynamically allocated arrays and subarrays are represented by a data
1153 clause followed by an internal GOMP_MAP_POINTER.
1154
1155 In fortran, scalars and not allocated arrays are represented by a
1156 single data clause. Allocated arrays and subarrays have three mappings:
1157 1) the original data clause, 2) a PSET 3) a pointer to the array data.
1158 */
1159
1160 if (data_enter)
1161 {
1162 for (i = 0; i < mapnum; i++)
1163 {
1164 unsigned char kind = kinds[i] & 0xff;
1165
1166 /* Scan for pointers and PSETs. */
1167 int pointer = find_pointer (i, mapnum, kinds);
1168
1169 if (!pointer)
1170 {
1171 switch (kind)
1172 {
1173 case GOMP_MAP_ALLOC:
1174 case GOMP_MAP_FORCE_ALLOC:
1175 acc_create_async (hostaddrs[i], sizes[i], async);
1176 break;
1177 case GOMP_MAP_TO:
1178 case GOMP_MAP_FORCE_TO:
1179 acc_copyin_async (hostaddrs[i], sizes[i], async);
1180 break;
1181 default:
1182 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1183 kind);
1184 break;
1185 }
1186 }
1187 else
1188 {
1189 goacc_insert_pointer (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
1190 async);
1191 /* Increment 'i' by two because OpenACC requires fortran
1192 arrays to be contiguous, so each PSET is associated with
1193 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
1194 one MAP_POINTER. */
1195 i += pointer - 1;
1196 }
1197 }
1198 }
1199 else
1200 for (i = 0; i < mapnum; ++i)
1201 {
1202 unsigned char kind = kinds[i] & 0xff;
1203
1204 int pointer = find_pointer (i, mapnum, kinds);
1205
1206 if (!pointer)
1207 {
1208 switch (kind)
1209 {
1210 case GOMP_MAP_RELEASE:
1211 case GOMP_MAP_DELETE:
1212 if (acc_is_present (hostaddrs[i], sizes[i]))
1213 {
1214 if (finalize)
1215 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
1216 else
1217 acc_delete_async (hostaddrs[i], sizes[i], async);
1218 }
1219 break;
1220 case GOMP_MAP_FROM:
1221 case GOMP_MAP_FORCE_FROM:
1222 if (finalize)
1223 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
1224 else
1225 acc_copyout_async (hostaddrs[i], sizes[i], async);
1226 break;
1227 default:
1228 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1229 kind);
1230 break;
1231 }
1232 }
1233 else
1234 {
1235 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
1236 || kind == GOMP_MAP_FROM);
1237 goacc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
1238 finalize, pointer);
1239 /* See the above comment. */
1240 i += pointer - 1;
1241 }
1242 }
1243
1244 out_prof:
1245 if (profiling_p)
1246 {
1247 prof_info.event_type
1248 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1249 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1250 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1251 &api_info);
1252
1253 thr->prof_info = NULL;
1254 thr->api_info = NULL;
1255 }
1256 }