[PR92848] [OpenACC] Use 'GOMP_MAP_VARS_ENTER_DATA' for dynamic data lifetimes
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 splay_tree_key n = tgt->list[0].key;
409 assert (n->refcount == 1);
410 assert (n->dynamic_refcount == 0);
411 /* Special reference counting behavior. */
412 n->refcount = REFCOUNT_INFINITY;
413
414 if (profiling_p)
415 {
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
418 }
419 }
420 }
421
422 void
423 acc_unmap_data (void *h)
424 {
425 struct goacc_thread *thr = goacc_thread ();
426 struct gomp_device_descr *acc_dev = thr->dev;
427
428 /* No need to call lazy open, as the address must have been mapped. */
429
430 /* This is a no-op on shared-memory targets. */
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
432 return;
433
434 acc_prof_info prof_info;
435 acc_api_info api_info;
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
437
438 size_t host_size;
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
443 struct target_mem_desc *t;
444
445 if (!n)
446 {
447 gomp_mutex_unlock (&acc_dev->lock);
448 gomp_fatal ("%p is not a mapped block", (void *)h);
449 }
450
451 host_size = n->host_end - n->host_start;
452
453 if (n->host_start != (uintptr_t) h)
454 {
455 gomp_mutex_unlock (&acc_dev->lock);
456 gomp_fatal ("[%p,%d] surrounds %p",
457 (void *) n->host_start, (int) host_size, (void *) h);
458 }
459 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
460 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
461 the different 'REFCOUNT_INFINITY' cases, or simply separate
462 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 etc.)? */
464 else if (n->refcount != REFCOUNT_INFINITY)
465 {
466 gomp_mutex_unlock (&acc_dev->lock);
467 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
468 " by 'acc_map_data'",
469 (void *) h, (int) host_size);
470 }
471
472 t = n->tgt;
473
474 if (t->refcount == 1)
475 {
476 /* This is the last reference, so pull the descriptor off the
477 chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from
478 freeing the device memory. */
479 t->tgt_end = 0;
480 t->to_free = 0;
481 }
482
483 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
484 assert (is_tgt_unmapped);
485
486 gomp_mutex_unlock (&acc_dev->lock);
487
488 if (profiling_p)
489 {
490 thr->prof_info = NULL;
491 thr->api_info = NULL;
492 }
493 }
494
495 #define FLAG_PRESENT (1 << 0)
496 #define FLAG_CREATE (1 << 1)
497 #define FLAG_COPY (1 << 2)
498
499 static void *
500 present_create_copy (unsigned f, void *h, size_t s, int async)
501 {
502 void *d;
503 splay_tree_key n;
504
505 if (!h || !s)
506 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
507
508 goacc_lazy_initialize ();
509
510 struct goacc_thread *thr = goacc_thread ();
511 struct gomp_device_descr *acc_dev = thr->dev;
512
513 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
514 return h;
515
516 acc_prof_info prof_info;
517 acc_api_info api_info;
518 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
519 if (profiling_p)
520 {
521 prof_info.async = async;
522 prof_info.async_queue = prof_info.async;
523 }
524
525 gomp_mutex_lock (&acc_dev->lock);
526
527 n = lookup_host (acc_dev, h, s);
528 if (n)
529 {
530 /* Present. */
531 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
532
533 if (!(f & FLAG_PRESENT))
534 {
535 gomp_mutex_unlock (&acc_dev->lock);
536 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
537 (void *)h, (int)s, (void *)d, (int)s);
538 }
539 if ((h + s) > (void *)n->host_end)
540 {
541 gomp_mutex_unlock (&acc_dev->lock);
542 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
543 }
544
545 assert (n->refcount != REFCOUNT_LINK);
546 if (n->refcount != REFCOUNT_INFINITY)
547 n->refcount++;
548 n->dynamic_refcount++;
549
550 gomp_mutex_unlock (&acc_dev->lock);
551 }
552 else if (!(f & FLAG_CREATE))
553 {
554 gomp_mutex_unlock (&acc_dev->lock);
555 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
556 }
557 else
558 {
559 struct target_mem_desc *tgt;
560 size_t mapnum = 1;
561 unsigned short kinds;
562 void *hostaddrs = h;
563
564 if (f & FLAG_COPY)
565 kinds = GOMP_MAP_TO;
566 else
567 kinds = GOMP_MAP_ALLOC;
568
569 gomp_mutex_unlock (&acc_dev->lock);
570
571 goacc_aq aq = get_goacc_asyncqueue (async);
572
573 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
574 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
575 assert (tgt);
576 n = tgt->list[0].key;
577 assert (n->refcount == 1);
578 assert (n->dynamic_refcount == 0);
579 n->dynamic_refcount++;
580
581 d = tgt->to_free;
582 }
583
584 if (profiling_p)
585 {
586 thr->prof_info = NULL;
587 thr->api_info = NULL;
588 }
589
590 return d;
591 }
592
593 void *
594 acc_create (void *h, size_t s)
595 {
596 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
597 }
598
599 void
600 acc_create_async (void *h, size_t s, int async)
601 {
602 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
603 }
604
605 /* acc_present_or_create used to be what acc_create is now. */
606 /* acc_pcreate is acc_present_or_create by a different name. */
607 #ifdef HAVE_ATTRIBUTE_ALIAS
608 strong_alias (acc_create, acc_present_or_create)
609 strong_alias (acc_create, acc_pcreate)
610 #else
611 void *
612 acc_present_or_create (void *h, size_t s)
613 {
614 return acc_create (h, s);
615 }
616
617 void *
618 acc_pcreate (void *h, size_t s)
619 {
620 return acc_create (h, s);
621 }
622 #endif
623
624 void *
625 acc_copyin (void *h, size_t s)
626 {
627 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
628 acc_async_sync);
629 }
630
631 void
632 acc_copyin_async (void *h, size_t s, int async)
633 {
634 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
635 }
636
637 /* acc_present_or_copyin used to be what acc_copyin is now. */
638 /* acc_pcopyin is acc_present_or_copyin by a different name. */
639 #ifdef HAVE_ATTRIBUTE_ALIAS
640 strong_alias (acc_copyin, acc_present_or_copyin)
641 strong_alias (acc_copyin, acc_pcopyin)
642 #else
643 void *
644 acc_present_or_copyin (void *h, size_t s)
645 {
646 return acc_copyin (h, s);
647 }
648
649 void *
650 acc_pcopyin (void *h, size_t s)
651 {
652 return acc_copyin (h, s);
653 }
654 #endif
655
656 #define FLAG_COPYOUT (1 << 0)
657 #define FLAG_FINALIZE (1 << 1)
658
659 static void
660 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
661 {
662 splay_tree_key n;
663 struct goacc_thread *thr = goacc_thread ();
664 struct gomp_device_descr *acc_dev = thr->dev;
665
666 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
667 return;
668
669 acc_prof_info prof_info;
670 acc_api_info api_info;
671 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
672 if (profiling_p)
673 {
674 prof_info.async = async;
675 prof_info.async_queue = prof_info.async;
676 }
677
678 gomp_mutex_lock (&acc_dev->lock);
679
680 n = lookup_host (acc_dev, h, s);
681
682 /* No need to call lazy open, as the data must already have been
683 mapped. */
684
685 if (!n)
686 {
687 gomp_mutex_unlock (&acc_dev->lock);
688 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
689 }
690
691 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
692 {
693 size_t host_size = n->host_end - n->host_start;
694 gomp_mutex_unlock (&acc_dev->lock);
695 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
696 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
697 }
698
699 assert (n->refcount != REFCOUNT_LINK);
700 if (n->refcount != REFCOUNT_INFINITY
701 && n->refcount < n->dynamic_refcount)
702 {
703 gomp_mutex_unlock (&acc_dev->lock);
704 gomp_fatal ("Dynamic reference counting assert fail\n");
705 }
706
707 if (f & FLAG_FINALIZE)
708 {
709 if (n->refcount != REFCOUNT_INFINITY)
710 n->refcount -= n->dynamic_refcount;
711 n->dynamic_refcount = 0;
712 }
713 else if (n->dynamic_refcount)
714 {
715 if (n->refcount != REFCOUNT_INFINITY)
716 n->refcount--;
717 n->dynamic_refcount--;
718 }
719
720 if (n->refcount == 0)
721 {
722 goacc_aq aq = get_goacc_asyncqueue (async);
723
724 if (f & FLAG_COPYOUT)
725 {
726 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
727 + (uintptr_t) h - n->host_start);
728 gomp_copy_dev2host (acc_dev, aq, h, d, s);
729 }
730
731 if (aq)
732 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
733 'gomp_unref_tgt' comment in
734 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
735 PR92881. */
736 gomp_remove_var_async (acc_dev, n, aq);
737 else
738 {
739 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
740 assert (is_tgt_unmapped);
741 }
742 }
743
744 gomp_mutex_unlock (&acc_dev->lock);
745
746 if (profiling_p)
747 {
748 thr->prof_info = NULL;
749 thr->api_info = NULL;
750 }
751 }
752
753 void
754 acc_delete (void *h , size_t s)
755 {
756 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
757 }
758
759 void
760 acc_delete_async (void *h , size_t s, int async)
761 {
762 delete_copyout (0, h, s, async, __FUNCTION__);
763 }
764
765 void
766 acc_delete_finalize (void *h , size_t s)
767 {
768 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
769 }
770
771 void
772 acc_delete_finalize_async (void *h , size_t s, int async)
773 {
774 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
775 }
776
777 void
778 acc_copyout (void *h, size_t s)
779 {
780 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
781 }
782
783 void
784 acc_copyout_async (void *h, size_t s, int async)
785 {
786 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
787 }
788
789 void
790 acc_copyout_finalize (void *h, size_t s)
791 {
792 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
793 __FUNCTION__);
794 }
795
796 void
797 acc_copyout_finalize_async (void *h, size_t s, int async)
798 {
799 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
800 }
801
802 static void
803 update_dev_host (int is_dev, void *h, size_t s, int async)
804 {
805 splay_tree_key n;
806 void *d;
807
808 goacc_lazy_initialize ();
809
810 struct goacc_thread *thr = goacc_thread ();
811 struct gomp_device_descr *acc_dev = thr->dev;
812
813 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
814 return;
815
816 /* Fortran optional arguments that are non-present result in a
817 NULL host address here. This can safely be ignored as it is
818 not possible to 'update' a non-present optional argument. */
819 if (h == NULL)
820 return;
821
822 acc_prof_info prof_info;
823 acc_api_info api_info;
824 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
825 if (profiling_p)
826 {
827 prof_info.async = async;
828 prof_info.async_queue = prof_info.async;
829 }
830
831 gomp_mutex_lock (&acc_dev->lock);
832
833 n = lookup_host (acc_dev, h, s);
834
835 if (!n)
836 {
837 gomp_mutex_unlock (&acc_dev->lock);
838 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
839 }
840
841 d = (void *) (n->tgt->tgt_start + n->tgt_offset
842 + (uintptr_t) h - n->host_start);
843
844 goacc_aq aq = get_goacc_asyncqueue (async);
845
846 if (is_dev)
847 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
848 else
849 gomp_copy_dev2host (acc_dev, aq, h, d, s);
850
851 gomp_mutex_unlock (&acc_dev->lock);
852
853 if (profiling_p)
854 {
855 thr->prof_info = NULL;
856 thr->api_info = NULL;
857 }
858 }
859
860 void
861 acc_update_device (void *h, size_t s)
862 {
863 update_dev_host (1, h, s, acc_async_sync);
864 }
865
866 void
867 acc_update_device_async (void *h, size_t s, int async)
868 {
869 update_dev_host (1, h, s, async);
870 }
871
872 void
873 acc_update_self (void *h, size_t s)
874 {
875 update_dev_host (0, h, s, acc_async_sync);
876 }
877
878 void
879 acc_update_self_async (void *h, size_t s, int async)
880 {
881 update_dev_host (0, h, s, async);
882 }
883
884
885 /* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
886 functions. */
887
888 /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
889
890 Only the first mapping is considered in reference counting; the following
891 ones implicitly follow suit. Similarly, 'copyout' ('force_copyfrom') is
892 done only for the first mapping. */
893
894 static void
895 goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
896 void *kinds, int async)
897 {
898 struct target_mem_desc *tgt;
899 struct goacc_thread *thr = goacc_thread ();
900 struct gomp_device_descr *acc_dev = thr->dev;
901
902 if (*hostaddrs == NULL)
903 return;
904
905 if (acc_is_present (*hostaddrs, *sizes))
906 {
907 splay_tree_key n;
908 gomp_mutex_lock (&acc_dev->lock);
909 n = lookup_host (acc_dev, *hostaddrs, *sizes);
910 assert (n->refcount != REFCOUNT_INFINITY
911 && n->refcount != REFCOUNT_LINK);
912 gomp_mutex_unlock (&acc_dev->lock);
913
914 tgt = n->tgt;
915 for (size_t i = 0; i < tgt->list_count; i++)
916 if (tgt->list[i].key == n)
917 {
918 for (size_t j = 0; j < mapnum; j++)
919 if (i + j < tgt->list_count && tgt->list[i + j].key)
920 {
921 tgt->list[i + j].key->refcount++;
922 tgt->list[i + j].key->dynamic_refcount++;
923 }
924 return;
925 }
926 /* Should not reach here. */
927 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
928 }
929
930 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
931 goacc_aq aq = get_goacc_asyncqueue (async);
932 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
933 NULL, sizes, kinds, true, GOMP_MAP_VARS_ENTER_DATA);
934 assert (tgt);
935 splay_tree_key n = tgt->list[0].key;
936 assert (n->refcount == 1);
937 assert (n->dynamic_refcount == 0);
938 n->dynamic_refcount++;
939 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
940 }
941
942 static void
943 goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
944 int finalize)
945 {
946 struct goacc_thread *thr = goacc_thread ();
947 struct gomp_device_descr *acc_dev = thr->dev;
948 splay_tree_key n;
949 struct target_mem_desc *t;
950
951 if (!acc_is_present (h, s))
952 return;
953
954 gomp_mutex_lock (&acc_dev->lock);
955
956 n = lookup_host (acc_dev, h, 1);
957
958 if (!n)
959 {
960 gomp_mutex_unlock (&acc_dev->lock);
961 gomp_fatal ("%p is not a mapped block", (void *)h);
962 }
963
964 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
965
966 t = n->tgt;
967
968 assert (n->refcount != REFCOUNT_INFINITY
969 && n->refcount != REFCOUNT_LINK);
970 if (n->refcount < n->dynamic_refcount)
971 {
972 gomp_mutex_unlock (&acc_dev->lock);
973 gomp_fatal ("Dynamic reference counting assert fail\n");
974 }
975
976 if (finalize)
977 {
978 n->refcount -= n->dynamic_refcount;
979 n->dynamic_refcount = 0;
980 }
981 else if (n->dynamic_refcount)
982 {
983 n->refcount--;
984 n->dynamic_refcount--;
985 }
986
987 if (n->refcount == 0)
988 {
989 goacc_aq aq = get_goacc_asyncqueue (async);
990
991 if (force_copyfrom)
992 {
993 void *d = (void *) (t->tgt_start + n->tgt_offset
994 + (uintptr_t) h - n->host_start);
995
996 gomp_copy_dev2host (acc_dev, aq, h, d, s);
997 }
998
999 if (aq)
1000 {
1001 /* TODO The way the following code is currently implemented, we need
1002 the 'is_tgt_unmapped' return value from 'gomp_remove_var', so
1003 can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt'
1004 comment in
1005 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
1006 PR92881 -- so have to synchronize here. */
1007 if (!acc_dev->openacc.async.synchronize_func (aq))
1008 {
1009 gomp_mutex_unlock (&acc_dev->lock);
1010 gomp_fatal ("synchronize failed");
1011 }
1012 }
1013 bool is_tgt_unmapped = false;
1014 for (size_t i = 0; i < t->list_count; i++)
1015 {
1016 is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
1017 if (is_tgt_unmapped)
1018 break;
1019 }
1020 assert (is_tgt_unmapped);
1021 }
1022
1023 gomp_mutex_unlock (&acc_dev->lock);
1024
1025 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1026 }
1027
1028 /* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
1029 'GOMP_MAP_POINTER'. */
1030
1031 static int
1032 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
1033 {
1034 if (pos + 1 >= mapnum)
1035 return 0;
1036
1037 unsigned char kind = kinds[pos+1] & 0xff;
1038
1039 if (kind == GOMP_MAP_TO_PSET)
1040 return 3;
1041 else if (kind == GOMP_MAP_POINTER)
1042 return 2;
1043
1044 return 0;
1045 }
1046
1047 void
1048 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1049 size_t *sizes, unsigned short *kinds, int async,
1050 int num_waits, ...)
1051 {
1052 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1053
1054 struct goacc_thread *thr;
1055 struct gomp_device_descr *acc_dev;
1056 bool data_enter = false;
1057 size_t i;
1058
1059 goacc_lazy_initialize ();
1060
1061 thr = goacc_thread ();
1062 acc_dev = thr->dev;
1063
1064 /* Determine whether "finalize" semantics apply to all mappings of this
1065 OpenACC directive. */
1066 bool finalize = false;
1067 if (mapnum > 0)
1068 {
1069 unsigned char kind = kinds[0] & 0xff;
1070 if (kind == GOMP_MAP_DELETE
1071 || kind == GOMP_MAP_FORCE_FROM)
1072 finalize = true;
1073 }
1074
1075 /* Determine if this is an "acc enter data". */
1076 for (i = 0; i < mapnum; ++i)
1077 {
1078 unsigned char kind = kinds[i] & 0xff;
1079
1080 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1081 continue;
1082
1083 if (kind == GOMP_MAP_FORCE_ALLOC
1084 || kind == GOMP_MAP_FORCE_PRESENT
1085 || kind == GOMP_MAP_FORCE_TO
1086 || kind == GOMP_MAP_TO
1087 || kind == GOMP_MAP_ALLOC)
1088 {
1089 data_enter = true;
1090 break;
1091 }
1092
1093 if (kind == GOMP_MAP_RELEASE
1094 || kind == GOMP_MAP_DELETE
1095 || kind == GOMP_MAP_FROM
1096 || kind == GOMP_MAP_FORCE_FROM)
1097 break;
1098
1099 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1100 kind);
1101 }
1102
1103 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1104
1105 acc_prof_info prof_info;
1106 if (profiling_p)
1107 {
1108 thr->prof_info = &prof_info;
1109
1110 prof_info.event_type
1111 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1112 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1113 prof_info.version = _ACC_PROF_INFO_VERSION;
1114 prof_info.device_type = acc_device_type (acc_dev->type);
1115 prof_info.device_number = acc_dev->target_id;
1116 prof_info.thread_id = -1;
1117 prof_info.async = async;
1118 prof_info.async_queue = prof_info.async;
1119 prof_info.src_file = NULL;
1120 prof_info.func_name = NULL;
1121 prof_info.line_no = -1;
1122 prof_info.end_line_no = -1;
1123 prof_info.func_line_no = -1;
1124 prof_info.func_end_line_no = -1;
1125 }
1126 acc_event_info enter_exit_data_event_info;
1127 if (profiling_p)
1128 {
1129 enter_exit_data_event_info.other_event.event_type
1130 = prof_info.event_type;
1131 enter_exit_data_event_info.other_event.valid_bytes
1132 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1133 enter_exit_data_event_info.other_event.parent_construct
1134 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1135 enter_exit_data_event_info.other_event.implicit = 0;
1136 enter_exit_data_event_info.other_event.tool_info = NULL;
1137 }
1138 acc_api_info api_info;
1139 if (profiling_p)
1140 {
1141 thr->api_info = &api_info;
1142
1143 api_info.device_api = acc_device_api_none;
1144 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1145 api_info.device_type = prof_info.device_type;
1146 api_info.vendor = -1;
1147 api_info.device_handle = NULL;
1148 api_info.context_handle = NULL;
1149 api_info.async_handle = NULL;
1150 }
1151
1152 if (profiling_p)
1153 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1154 &api_info);
1155
1156 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1157 || (flags & GOACC_FLAG_HOST_FALLBACK))
1158 {
1159 prof_info.device_type = acc_device_host;
1160 api_info.device_type = prof_info.device_type;
1161
1162 goto out_prof;
1163 }
1164
1165 if (num_waits)
1166 {
1167 va_list ap;
1168
1169 va_start (ap, num_waits);
1170 goacc_wait (async, num_waits, &ap);
1171 va_end (ap);
1172 }
1173
1174 /* In c, non-pointers and arrays are represented by a single data clause.
1175 Dynamically allocated arrays and subarrays are represented by a data
1176 clause followed by an internal GOMP_MAP_POINTER.
1177
1178 In fortran, scalars and not allocated arrays are represented by a
1179 single data clause. Allocated arrays and subarrays have three mappings:
1180 1) the original data clause, 2) a PSET 3) a pointer to the array data.
1181 */
1182
1183 if (data_enter)
1184 {
1185 for (i = 0; i < mapnum; i++)
1186 {
1187 unsigned char kind = kinds[i] & 0xff;
1188
1189 /* Scan for pointers and PSETs. */
1190 int pointer = find_pointer (i, mapnum, kinds);
1191
1192 if (!pointer)
1193 {
1194 switch (kind)
1195 {
1196 case GOMP_MAP_ALLOC:
1197 case GOMP_MAP_FORCE_ALLOC:
1198 acc_create_async (hostaddrs[i], sizes[i], async);
1199 break;
1200 case GOMP_MAP_TO:
1201 case GOMP_MAP_FORCE_TO:
1202 acc_copyin_async (hostaddrs[i], sizes[i], async);
1203 break;
1204 default:
1205 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1206 kind);
1207 break;
1208 }
1209 }
1210 else
1211 {
1212 goacc_insert_pointer (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
1213 async);
1214 /* Increment 'i' by two because OpenACC requires fortran
1215 arrays to be contiguous, so each PSET is associated with
1216 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
1217 one MAP_POINTER. */
1218 i += pointer - 1;
1219 }
1220 }
1221 }
1222 else
1223 for (i = 0; i < mapnum; ++i)
1224 {
1225 unsigned char kind = kinds[i] & 0xff;
1226
1227 int pointer = find_pointer (i, mapnum, kinds);
1228
1229 if (!pointer)
1230 {
1231 switch (kind)
1232 {
1233 case GOMP_MAP_RELEASE:
1234 case GOMP_MAP_DELETE:
1235 if (acc_is_present (hostaddrs[i], sizes[i]))
1236 {
1237 if (finalize)
1238 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
1239 else
1240 acc_delete_async (hostaddrs[i], sizes[i], async);
1241 }
1242 break;
1243 case GOMP_MAP_FROM:
1244 case GOMP_MAP_FORCE_FROM:
1245 if (finalize)
1246 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
1247 else
1248 acc_copyout_async (hostaddrs[i], sizes[i], async);
1249 break;
1250 default:
1251 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1252 kind);
1253 break;
1254 }
1255 }
1256 else
1257 {
1258 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
1259 || kind == GOMP_MAP_FROM);
1260 goacc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
1261 finalize);
1262 /* See the above comment. */
1263 i += pointer - 1;
1264 }
1265 }
1266
1267 out_prof:
1268 if (profiling_p)
1269 {
1270 prof_info.event_type
1271 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1272 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1273 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1274 &api_info);
1275
1276 thr->prof_info = NULL;
1277 thr->api_info = NULL;
1278 }
1279 }