a34f4cf0e91860cadeb4255e9d48d266ea355078
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 size_t mapnum = 1;
359 void *hostaddrs = h;
360 void *devaddrs = d;
361 size_t sizes = s;
362 unsigned short kinds = GOMP_MAP_ALLOC;
363
364 goacc_lazy_initialize ();
365
366 struct goacc_thread *thr = goacc_thread ();
367 struct gomp_device_descr *acc_dev = thr->dev;
368
369 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
370 {
371 if (d != h)
372 gomp_fatal ("cannot map data on shared-memory system");
373 }
374 else
375 {
376 struct goacc_thread *thr = goacc_thread ();
377
378 if (!d || !h || !s)
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h, (int)s, (void *)d, (int)s);
381
382 acc_prof_info prof_info;
383 acc_api_info api_info;
384 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
385
386 gomp_mutex_lock (&acc_dev->lock);
387
388 if (lookup_host (acc_dev, h, s))
389 {
390 gomp_mutex_unlock (&acc_dev->lock);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
392 (int)s);
393 }
394
395 if (lookup_dev (&thr->dev->mem_map, d, s))
396 {
397 gomp_mutex_unlock (&acc_dev->lock);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
399 (int)s);
400 }
401
402 gomp_mutex_unlock (&acc_dev->lock);
403
404 struct target_mem_desc *tgt
405 = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 assert (tgt->list_count == 1);
409 splay_tree_key n = tgt->list[0].key;
410 assert (n);
411 assert (n->refcount == 1);
412 assert (n->virtual_refcount == 0);
413 /* Special reference counting behavior. */
414 n->refcount = REFCOUNT_INFINITY;
415
416 if (profiling_p)
417 {
418 thr->prof_info = NULL;
419 thr->api_info = NULL;
420 }
421 }
422 }
423
424 void
425 acc_unmap_data (void *h)
426 {
427 struct goacc_thread *thr = goacc_thread ();
428 struct gomp_device_descr *acc_dev = thr->dev;
429
430 /* No need to call lazy open, as the address must have been mapped. */
431
432 /* This is a no-op on shared-memory targets. */
433 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
434 return;
435
436 acc_prof_info prof_info;
437 acc_api_info api_info;
438 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
443
444 if (!n)
445 {
446 gomp_mutex_unlock (&acc_dev->lock);
447 gomp_fatal ("%p is not a mapped block", (void *)h);
448 }
449
450 size_t host_size = n->host_end - n->host_start;
451
452 if (n->host_start != (uintptr_t) h)
453 {
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n->host_start, (int) host_size, (void *) h);
457 }
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
462 etc.)? */
463 else if (n->refcount != REFCOUNT_INFINITY)
464 {
465 gomp_mutex_unlock (&acc_dev->lock);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h, (int) host_size);
469 }
470
471 struct target_mem_desc *tgt = n->tgt;
472
473 if (tgt->refcount == REFCOUNT_INFINITY)
474 {
475 gomp_mutex_unlock (&acc_dev->lock);
476 gomp_fatal ("cannot unmap target block");
477 }
478
479 /* Above, we've verified that the mapping must have been set up by
480 'acc_map_data'. */
481 assert (tgt->refcount == 1);
482
483 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484 from freeing the target memory. */
485 tgt->tgt_end = 0;
486 tgt->to_free = NULL;
487
488 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
489 assert (is_tgt_unmapped);
490
491 gomp_mutex_unlock (&acc_dev->lock);
492
493 if (profiling_p)
494 {
495 thr->prof_info = NULL;
496 thr->api_info = NULL;
497 }
498 }
499
500
501 /* Enter dynamic mapping for a single datum. Return the device pointer. */
502
503 static void *
504 goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
505 {
506 void *d;
507 splay_tree_key n;
508
509 if (!hostaddrs[0] || !sizes[0])
510 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
511
512 goacc_lazy_initialize ();
513
514 struct goacc_thread *thr = goacc_thread ();
515 struct gomp_device_descr *acc_dev = thr->dev;
516
517 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
518 return hostaddrs[0];
519
520 acc_prof_info prof_info;
521 acc_api_info api_info;
522 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
523 if (profiling_p)
524 {
525 prof_info.async = async;
526 prof_info.async_queue = prof_info.async;
527 }
528
529 gomp_mutex_lock (&acc_dev->lock);
530
531 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
532 if (n)
533 {
534 void *h = hostaddrs[0];
535 size_t s = sizes[0];
536
537 /* Present. */
538 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
539
540 if ((h + s) > (void *)n->host_end)
541 {
542 gomp_mutex_unlock (&acc_dev->lock);
543 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
544 }
545
546 assert (n->refcount != REFCOUNT_LINK);
547 if (n->refcount != REFCOUNT_INFINITY)
548 {
549 n->refcount++;
550 n->virtual_refcount++;
551 }
552
553 gomp_mutex_unlock (&acc_dev->lock);
554 }
555 else
556 {
557 const size_t mapnum = 1;
558
559 gomp_mutex_unlock (&acc_dev->lock);
560
561 goacc_aq aq = get_goacc_asyncqueue (async);
562
563 struct target_mem_desc *tgt
564 = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
565 kinds, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
566 assert (tgt);
567 assert (tgt->list_count == 1);
568 n = tgt->list[0].key;
569 assert (n);
570 assert (n->refcount == 1);
571 assert (n->virtual_refcount == 0);
572
573 d = (void *) tgt->tgt_start;
574 }
575
576 if (profiling_p)
577 {
578 thr->prof_info = NULL;
579 thr->api_info = NULL;
580 }
581
582 return d;
583 }
584
585 void *
586 acc_create (void *h, size_t s)
587 {
588 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
589 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
590 }
591
592 void
593 acc_create_async (void *h, size_t s, int async)
594 {
595 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
596 goacc_enter_datum (&h, &s, &kinds, async);
597 }
598
599 /* acc_present_or_create used to be what acc_create is now. */
600 /* acc_pcreate is acc_present_or_create by a different name. */
601 #ifdef HAVE_ATTRIBUTE_ALIAS
602 strong_alias (acc_create, acc_present_or_create)
603 strong_alias (acc_create, acc_pcreate)
604 #else
605 void *
606 acc_present_or_create (void *h, size_t s)
607 {
608 return acc_create (h, s);
609 }
610
611 void *
612 acc_pcreate (void *h, size_t s)
613 {
614 return acc_create (h, s);
615 }
616 #endif
617
618 void *
619 acc_copyin (void *h, size_t s)
620 {
621 unsigned short kinds[1] = { GOMP_MAP_TO };
622 return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
623 }
624
625 void
626 acc_copyin_async (void *h, size_t s, int async)
627 {
628 unsigned short kinds[1] = { GOMP_MAP_TO };
629 goacc_enter_datum (&h, &s, &kinds, async);
630 }
631
632 /* acc_present_or_copyin used to be what acc_copyin is now. */
633 /* acc_pcopyin is acc_present_or_copyin by a different name. */
634 #ifdef HAVE_ATTRIBUTE_ALIAS
635 strong_alias (acc_copyin, acc_present_or_copyin)
636 strong_alias (acc_copyin, acc_pcopyin)
637 #else
638 void *
639 acc_present_or_copyin (void *h, size_t s)
640 {
641 return acc_copyin (h, s);
642 }
643
644 void *
645 acc_pcopyin (void *h, size_t s)
646 {
647 return acc_copyin (h, s);
648 }
649 #endif
650
651
652 /* Exit a dynamic mapping for a single variable. */
653
654 static void
655 goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
656 {
657 /* No need to call lazy open, as the data must already have been
658 mapped. */
659
660 kind &= 0xff;
661
662 struct goacc_thread *thr = goacc_thread ();
663 struct gomp_device_descr *acc_dev = thr->dev;
664
665 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
666 return;
667
668 acc_prof_info prof_info;
669 acc_api_info api_info;
670 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
671 if (profiling_p)
672 {
673 prof_info.async = async;
674 prof_info.async_queue = prof_info.async;
675 }
676
677 gomp_mutex_lock (&acc_dev->lock);
678
679 splay_tree_key n = lookup_host (acc_dev, h, s);
680 if (!n)
681 /* PR92726, RP92970, PR92984: no-op. */
682 goto out;
683
684 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
685 {
686 size_t host_size = n->host_end - n->host_start;
687 gomp_mutex_unlock (&acc_dev->lock);
688 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
689 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
690 }
691
692 bool finalize = (kind == GOMP_MAP_DELETE
693 || kind == GOMP_MAP_FORCE_FROM);
694 if (finalize)
695 {
696 if (n->refcount != REFCOUNT_INFINITY)
697 n->refcount -= n->virtual_refcount;
698 n->virtual_refcount = 0;
699 }
700
701 if (n->virtual_refcount > 0)
702 {
703 if (n->refcount != REFCOUNT_INFINITY)
704 n->refcount--;
705 n->virtual_refcount--;
706 }
707 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
708 n->refcount--;
709
710 if (n->refcount == 0)
711 {
712 goacc_aq aq = get_goacc_asyncqueue (async);
713
714 bool copyout = (kind == GOMP_MAP_FROM
715 || kind == GOMP_MAP_FORCE_FROM);
716 if (copyout)
717 {
718 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
719 + (uintptr_t) h - n->host_start);
720 gomp_copy_dev2host (acc_dev, aq, h, d, s);
721 }
722
723 if (aq)
724 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
725 'gomp_unref_tgt' comment in
726 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
727 PR92881. */
728 gomp_remove_var_async (acc_dev, n, aq);
729 else
730 {
731 size_t num_mappings = 0;
732 /* If the target_mem_desc represents a single data mapping, we can
733 check that it is freed when this splay tree key's refcount reaches
734 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
735 multiple members), fall back to skipping the test. */
736 for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
737 if (n->tgt->list[l_i].key)
738 ++num_mappings;
739 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
740 assert (is_tgt_unmapped || num_mappings > 1);
741 }
742 }
743
744 out:
745 gomp_mutex_unlock (&acc_dev->lock);
746
747 if (profiling_p)
748 {
749 thr->prof_info = NULL;
750 thr->api_info = NULL;
751 }
752 }
753
754 void
755 acc_delete (void *h , size_t s)
756 {
757 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
758 }
759
760 void
761 acc_delete_async (void *h , size_t s, int async)
762 {
763 goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
764 }
765
766 void
767 acc_delete_finalize (void *h , size_t s)
768 {
769 goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
770 }
771
772 void
773 acc_delete_finalize_async (void *h , size_t s, int async)
774 {
775 goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
776 }
777
778 void
779 acc_copyout (void *h, size_t s)
780 {
781 goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
782 }
783
784 void
785 acc_copyout_async (void *h, size_t s, int async)
786 {
787 goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
788 }
789
790 void
791 acc_copyout_finalize (void *h, size_t s)
792 {
793 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
794 }
795
796 void
797 acc_copyout_finalize_async (void *h, size_t s, int async)
798 {
799 goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
800 }
801
802 static void
803 update_dev_host (int is_dev, void *h, size_t s, int async)
804 {
805 splay_tree_key n;
806 void *d;
807
808 goacc_lazy_initialize ();
809
810 struct goacc_thread *thr = goacc_thread ();
811 struct gomp_device_descr *acc_dev = thr->dev;
812
813 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
814 return;
815
816 /* Fortran optional arguments that are non-present result in a
817 NULL host address here. This can safely be ignored as it is
818 not possible to 'update' a non-present optional argument. */
819 if (h == NULL)
820 return;
821
822 acc_prof_info prof_info;
823 acc_api_info api_info;
824 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
825 if (profiling_p)
826 {
827 prof_info.async = async;
828 prof_info.async_queue = prof_info.async;
829 }
830
831 gomp_mutex_lock (&acc_dev->lock);
832
833 n = lookup_host (acc_dev, h, s);
834
835 if (!n)
836 {
837 gomp_mutex_unlock (&acc_dev->lock);
838 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
839 }
840
841 d = (void *) (n->tgt->tgt_start + n->tgt_offset
842 + (uintptr_t) h - n->host_start);
843
844 goacc_aq aq = get_goacc_asyncqueue (async);
845
846 if (is_dev)
847 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
848 else
849 gomp_copy_dev2host (acc_dev, aq, h, d, s);
850
851 gomp_mutex_unlock (&acc_dev->lock);
852
853 if (profiling_p)
854 {
855 thr->prof_info = NULL;
856 thr->api_info = NULL;
857 }
858 }
859
860 void
861 acc_update_device (void *h, size_t s)
862 {
863 update_dev_host (1, h, s, acc_async_sync);
864 }
865
866 void
867 acc_update_device_async (void *h, size_t s, int async)
868 {
869 update_dev_host (1, h, s, async);
870 }
871
872 void
873 acc_update_self (void *h, size_t s)
874 {
875 update_dev_host (0, h, s, acc_async_sync);
876 }
877
878 void
879 acc_update_self_async (void *h, size_t s, int async)
880 {
881 update_dev_host (0, h, s, async);
882 }
883
884 void
885 acc_attach_async (void **hostaddr, int async)
886 {
887 struct goacc_thread *thr = goacc_thread ();
888 struct gomp_device_descr *acc_dev = thr->dev;
889 goacc_aq aq = get_goacc_asyncqueue (async);
890
891 struct splay_tree_key_s cur_node;
892 splay_tree_key n;
893
894 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
895 return;
896
897 gomp_mutex_lock (&acc_dev->lock);
898
899 cur_node.host_start = (uintptr_t) hostaddr;
900 cur_node.host_end = cur_node.host_start + sizeof (void *);
901 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
902
903 if (n == NULL)
904 {
905 gomp_mutex_unlock (&acc_dev->lock);
906 gomp_fatal ("struct not mapped for acc_attach");
907 }
908
909 gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
910 0, NULL);
911
912 gomp_mutex_unlock (&acc_dev->lock);
913 }
914
915 void
916 acc_attach (void **hostaddr)
917 {
918 acc_attach_async (hostaddr, acc_async_sync);
919 }
920
921 static void
922 goacc_detach_internal (void **hostaddr, int async, bool finalize)
923 {
924 struct goacc_thread *thr = goacc_thread ();
925 struct gomp_device_descr *acc_dev = thr->dev;
926 struct splay_tree_key_s cur_node;
927 splay_tree_key n;
928 struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
929
930 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
931 return;
932
933 gomp_mutex_lock (&acc_dev->lock);
934
935 cur_node.host_start = (uintptr_t) hostaddr;
936 cur_node.host_end = cur_node.host_start + sizeof (void *);
937 n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
938
939 if (n == NULL)
940 {
941 gomp_mutex_unlock (&acc_dev->lock);
942 gomp_fatal ("struct not mapped for acc_detach");
943 }
944
945 gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
946
947 gomp_mutex_unlock (&acc_dev->lock);
948 }
949
950 void
951 acc_detach (void **hostaddr)
952 {
953 goacc_detach_internal (hostaddr, acc_async_sync, false);
954 }
955
956 void
957 acc_detach_async (void **hostaddr, int async)
958 {
959 goacc_detach_internal (hostaddr, async, false);
960 }
961
962 void
963 acc_detach_finalize (void **hostaddr)
964 {
965 goacc_detach_internal (hostaddr, acc_async_sync, true);
966 }
967
968 void
969 acc_detach_finalize_async (void **hostaddr, int async)
970 {
971 goacc_detach_internal (hostaddr, async, true);
972 }
973
974 /* Some types of (pointer) variables use several consecutive mappings, which
975 must be treated as a group for enter/exit data directives. This function
976 returns the last mapping in such a group (inclusive), or POS for singleton
977 mappings. */
978
979 static int
980 find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
981 {
982 unsigned char kind0 = kinds[pos] & 0xff;
983 int first_pos = pos;
984
985 switch (kind0)
986 {
987 case GOMP_MAP_TO_PSET:
988 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
989 pos++;
990 /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
991 assert (pos > first_pos);
992 break;
993
994 case GOMP_MAP_STRUCT:
995 pos += sizes[pos];
996 break;
997
998 case GOMP_MAP_POINTER:
999 case GOMP_MAP_ALWAYS_POINTER:
1000 /* These mappings are only expected after some other mapping. If we
1001 see one by itself, something has gone wrong. */
1002 gomp_fatal ("unexpected mapping");
1003 break;
1004
1005 default:
1006 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1007 mapping. */
1008 if (pos + 1 < mapnum)
1009 {
1010 unsigned char kind1 = kinds[pos + 1] & 0xff;
1011 if (kind1 == GOMP_MAP_ALWAYS_POINTER)
1012 return pos + 1;
1013 }
1014
1015 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1016 (etc.) mapping. */
1017 while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
1018 pos++;
1019 }
1020
1021 return pos;
1022 }
1023
1024 /* Map variables for OpenACC "enter data". We can't just call
1025 gomp_map_vars_async once, because individual mapped variables might have
1026 "exit data" called for them at different times. */
1027
1028 static void
1029 goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1030 void **hostaddrs, size_t *sizes,
1031 unsigned short *kinds, goacc_aq aq)
1032 {
1033 for (size_t i = 0; i < mapnum; i++)
1034 {
1035 int group_last = find_group_last (i, mapnum, sizes, kinds);
1036
1037 gomp_map_vars_async (acc_dev, aq,
1038 (group_last - i) + 1,
1039 &hostaddrs[i], NULL,
1040 &sizes[i], &kinds[i], true,
1041 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
1042
1043 i = group_last;
1044 }
1045 }
1046
1047 /* Unmap variables for OpenACC "exit data". */
1048
1049 static void
1050 goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
1051 void **hostaddrs, size_t *sizes,
1052 unsigned short *kinds, goacc_aq aq)
1053 {
1054 gomp_mutex_lock (&acc_dev->lock);
1055
1056 /* Handle "detach" before copyback/deletion of mapped data. */
1057 for (size_t i = 0; i < mapnum; ++i)
1058 {
1059 unsigned char kind = kinds[i] & 0xff;
1060 bool finalize = false;
1061 switch (kind)
1062 {
1063 case GOMP_MAP_FORCE_DETACH:
1064 finalize = true;
1065 /* Fallthrough. */
1066
1067 case GOMP_MAP_DETACH:
1068 {
1069 struct splay_tree_key_s cur_node;
1070 uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
1071 cur_node.host_start = hostaddr;
1072 cur_node.host_end = cur_node.host_start + sizeof (void *);
1073 splay_tree_key n
1074 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1075
1076 if (n == NULL)
1077 {
1078 gomp_mutex_unlock (&acc_dev->lock);
1079 gomp_fatal ("struct not mapped for detach operation");
1080 }
1081
1082 gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
1083 }
1084 break;
1085 default:
1086 ;
1087 }
1088 }
1089
1090 for (size_t i = 0; i < mapnum; ++i)
1091 {
1092 unsigned char kind = kinds[i] & 0xff;
1093 bool copyfrom = false;
1094 bool finalize = false;
1095
1096 if (kind == GOMP_MAP_FORCE_FROM
1097 || kind == GOMP_MAP_DELETE
1098 || kind == GOMP_MAP_FORCE_DETACH)
1099 finalize = true;
1100
1101 switch (kind)
1102 {
1103 case GOMP_MAP_FROM:
1104 case GOMP_MAP_FORCE_FROM:
1105 case GOMP_MAP_ALWAYS_FROM:
1106 copyfrom = true;
1107 /* Fallthrough. */
1108
1109 case GOMP_MAP_TO_PSET:
1110 case GOMP_MAP_POINTER:
1111 case GOMP_MAP_DELETE:
1112 case GOMP_MAP_RELEASE:
1113 case GOMP_MAP_DETACH:
1114 case GOMP_MAP_FORCE_DETACH:
1115 {
1116 struct splay_tree_key_s cur_node;
1117 size_t size;
1118 if (kind == GOMP_MAP_POINTER
1119 || kind == GOMP_MAP_DETACH
1120 || kind == GOMP_MAP_FORCE_DETACH)
1121 size = sizeof (void *);
1122 else
1123 size = sizes[i];
1124 cur_node.host_start = (uintptr_t) hostaddrs[i];
1125 cur_node.host_end = cur_node.host_start + size;
1126 splay_tree_key n
1127 = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
1128
1129 if (n == NULL)
1130 continue;
1131
1132 if (finalize)
1133 {
1134 if (n->refcount != REFCOUNT_INFINITY)
1135 n->refcount -= n->virtual_refcount;
1136 n->virtual_refcount = 0;
1137 }
1138
1139 if (n->virtual_refcount > 0)
1140 {
1141 if (n->refcount != REFCOUNT_INFINITY)
1142 n->refcount--;
1143 n->virtual_refcount--;
1144 }
1145 else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
1146 n->refcount--;
1147
1148 if (copyfrom
1149 && (kind != GOMP_MAP_FROM || n->refcount == 0))
1150 gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
1151 (void *) (n->tgt->tgt_start + n->tgt_offset
1152 + cur_node.host_start
1153 - n->host_start),
1154 cur_node.host_end - cur_node.host_start);
1155
1156 if (n->refcount == 0)
1157 {
1158 if (aq)
1159 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
1160 'gomp_unref_tgt' comment in
1161 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
1162 PR92881. */
1163 gomp_remove_var_async (acc_dev, n, aq);
1164 else
1165 {
1166 size_t num_mappings = 0;
1167 /* If the target_mem_desc represents a single data mapping,
1168 we can check that it is freed when this splay tree key's
1169 refcount reaches zero. Otherwise (e.g. for a
1170 'GOMP_MAP_STRUCT' mapping with multiple members), fall
1171 back to skipping the test. */
1172 for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
1173 if (n->tgt->list[l_i].key)
1174 ++num_mappings;
1175 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
1176 assert (is_tgt_unmapped || num_mappings > 1);
1177 }
1178 }
1179 }
1180 break;
1181
1182 case GOMP_MAP_STRUCT:
1183 {
1184 int elems = sizes[i];
1185 for (int j = 1; j <= elems; j++)
1186 {
1187 assert (i + j < mapnum);
1188
1189 kind = kinds[i + j] & 0xff;
1190
1191 finalize = false;
1192 if (kind == GOMP_MAP_FORCE_FROM
1193 || kind == GOMP_MAP_DELETE
1194 || kind == GOMP_MAP_FORCE_DETACH)
1195 finalize = true;
1196
1197 struct splay_tree_key_s k;
1198 k.host_start = (uintptr_t) hostaddrs[i + j];
1199 k.host_end = k.host_start + sizes[i + j];
1200 splay_tree_key str;
1201 str = splay_tree_lookup (&acc_dev->mem_map, &k);
1202 if (str)
1203 {
1204 if (finalize)
1205 {
1206 if (str->refcount != REFCOUNT_INFINITY)
1207 str->refcount -= str->virtual_refcount;
1208 str->virtual_refcount = 0;
1209 }
1210 if (str->virtual_refcount > 0)
1211 {
1212 if (str->refcount != REFCOUNT_INFINITY)
1213 str->refcount--;
1214 str->virtual_refcount--;
1215 }
1216 else if (str->refcount > 0
1217 && str->refcount != REFCOUNT_INFINITY)
1218 str->refcount--;
1219 if (str->refcount == 0)
1220 {
1221 if (aq)
1222 /* TODO We can't do the 'is_tgt_unmapped' checking --
1223 see the 'gomp_unref_tgt' comment in
1224 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
1225 PR92881. */
1226 gomp_remove_var_async (acc_dev, str, aq);
1227 else
1228 {
1229 size_t num_mappings = 0;
1230 /* If the target_mem_desc represents a single data
1231 mapping, we can check that it is freed when this
1232 splay tree key's refcount reaches zero.
1233 Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping
1234 with multiple members), fall back to skipping
1235 the test. */
1236 for (size_t l_i = 0; l_i < str->tgt->list_count; ++l_i)
1237 if (str->tgt->list[l_i].key)
1238 ++num_mappings;
1239 bool is_tgt_unmapped = gomp_remove_var (acc_dev, str);
1240 assert (is_tgt_unmapped || num_mappings > 1);
1241 }
1242 }
1243 }
1244 }
1245 i += elems;
1246 }
1247 break;
1248
1249 default:
1250 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1251 kind);
1252 }
1253 }
1254
1255 gomp_mutex_unlock (&acc_dev->lock);
1256 }
1257
1258 void
1259 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1260 size_t *sizes, unsigned short *kinds, int async,
1261 int num_waits, ...)
1262 {
1263 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1264
1265 struct goacc_thread *thr;
1266 struct gomp_device_descr *acc_dev;
1267 bool data_enter = false;
1268 size_t i;
1269
1270 goacc_lazy_initialize ();
1271
1272 thr = goacc_thread ();
1273 acc_dev = thr->dev;
1274
1275 /* Determine if this is an "acc enter data". */
1276 for (i = 0; i < mapnum; ++i)
1277 {
1278 unsigned char kind = kinds[i] & 0xff;
1279
1280 if (kind == GOMP_MAP_POINTER
1281 || kind == GOMP_MAP_TO_PSET
1282 || kind == GOMP_MAP_STRUCT)
1283 continue;
1284
1285 if (kind == GOMP_MAP_FORCE_ALLOC
1286 || kind == GOMP_MAP_FORCE_PRESENT
1287 || kind == GOMP_MAP_ATTACH
1288 || kind == GOMP_MAP_FORCE_TO
1289 || kind == GOMP_MAP_TO
1290 || kind == GOMP_MAP_ALLOC)
1291 {
1292 data_enter = true;
1293 break;
1294 }
1295
1296 if (kind == GOMP_MAP_RELEASE
1297 || kind == GOMP_MAP_DELETE
1298 || kind == GOMP_MAP_DETACH
1299 || kind == GOMP_MAP_FORCE_DETACH
1300 || kind == GOMP_MAP_FROM
1301 || kind == GOMP_MAP_FORCE_FROM)
1302 break;
1303
1304 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1305 kind);
1306 }
1307
1308 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1309
1310 acc_prof_info prof_info;
1311 if (profiling_p)
1312 {
1313 thr->prof_info = &prof_info;
1314
1315 prof_info.event_type
1316 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1317 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1318 prof_info.version = _ACC_PROF_INFO_VERSION;
1319 prof_info.device_type = acc_device_type (acc_dev->type);
1320 prof_info.device_number = acc_dev->target_id;
1321 prof_info.thread_id = -1;
1322 prof_info.async = async;
1323 prof_info.async_queue = prof_info.async;
1324 prof_info.src_file = NULL;
1325 prof_info.func_name = NULL;
1326 prof_info.line_no = -1;
1327 prof_info.end_line_no = -1;
1328 prof_info.func_line_no = -1;
1329 prof_info.func_end_line_no = -1;
1330 }
1331 acc_event_info enter_exit_data_event_info;
1332 if (profiling_p)
1333 {
1334 enter_exit_data_event_info.other_event.event_type
1335 = prof_info.event_type;
1336 enter_exit_data_event_info.other_event.valid_bytes
1337 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1338 enter_exit_data_event_info.other_event.parent_construct
1339 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1340 enter_exit_data_event_info.other_event.implicit = 0;
1341 enter_exit_data_event_info.other_event.tool_info = NULL;
1342 }
1343 acc_api_info api_info;
1344 if (profiling_p)
1345 {
1346 thr->api_info = &api_info;
1347
1348 api_info.device_api = acc_device_api_none;
1349 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1350 api_info.device_type = prof_info.device_type;
1351 api_info.vendor = -1;
1352 api_info.device_handle = NULL;
1353 api_info.context_handle = NULL;
1354 api_info.async_handle = NULL;
1355 }
1356
1357 if (profiling_p)
1358 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1359 &api_info);
1360
1361 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1362 || (flags & GOACC_FLAG_HOST_FALLBACK))
1363 {
1364 prof_info.device_type = acc_device_host;
1365 api_info.device_type = prof_info.device_type;
1366
1367 goto out_prof;
1368 }
1369
1370 if (num_waits)
1371 {
1372 va_list ap;
1373
1374 va_start (ap, num_waits);
1375 goacc_wait (async, num_waits, &ap);
1376 va_end (ap);
1377 }
1378
1379 goacc_aq aq = get_goacc_asyncqueue (async);
1380
1381 if (data_enter)
1382 goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1383 else
1384 goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
1385
1386 out_prof:
1387 if (profiling_p)
1388 {
1389 prof_info.event_type
1390 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1391 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1392 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1393 &api_info);
1394
1395 thr->prof_info = NULL;
1396 thr->api_info = NULL;
1397 }
1398 }