[OpenACC] Refactor 'present_create_copy' into 'goacc_enter_data'
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 splay_tree_key n = tgt->list[0].key;
409 assert (n->refcount == 1);
410 assert (n->dynamic_refcount == 0);
411 /* Special reference counting behavior. */
412 n->refcount = REFCOUNT_INFINITY;
413
414 if (profiling_p)
415 {
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
418 }
419 }
420 }
421
422 void
423 acc_unmap_data (void *h)
424 {
425 struct goacc_thread *thr = goacc_thread ();
426 struct gomp_device_descr *acc_dev = thr->dev;
427
428 /* No need to call lazy open, as the address must have been mapped. */
429
430 /* This is a no-op on shared-memory targets. */
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
432 return;
433
434 acc_prof_info prof_info;
435 acc_api_info api_info;
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
437
438 size_t host_size;
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
443 struct target_mem_desc *t;
444
445 if (!n)
446 {
447 gomp_mutex_unlock (&acc_dev->lock);
448 gomp_fatal ("%p is not a mapped block", (void *)h);
449 }
450
451 host_size = n->host_end - n->host_start;
452
453 if (n->host_start != (uintptr_t) h)
454 {
455 gomp_mutex_unlock (&acc_dev->lock);
456 gomp_fatal ("[%p,%d] surrounds %p",
457 (void *) n->host_start, (int) host_size, (void *) h);
458 }
459 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
460 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
461 the different 'REFCOUNT_INFINITY' cases, or simply separate
462 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 etc.)? */
464 else if (n->refcount != REFCOUNT_INFINITY)
465 {
466 gomp_mutex_unlock (&acc_dev->lock);
467 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
468 " by 'acc_map_data'",
469 (void *) h, (int) host_size);
470 }
471
472 t = n->tgt;
473
474 if (t->refcount == 1)
475 {
476 /* This is the last reference, so pull the descriptor off the
477 chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from
478 freeing the device memory. */
479 t->tgt_end = 0;
480 t->to_free = 0;
481 }
482
483 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
484 assert (is_tgt_unmapped);
485
486 gomp_mutex_unlock (&acc_dev->lock);
487
488 if (profiling_p)
489 {
490 thr->prof_info = NULL;
491 thr->api_info = NULL;
492 }
493 }
494
495
496 /* Enter a dynamic mapping.
497
498 Return the device pointer. */
499
500 static void *
501 goacc_enter_data (void *h, size_t s, unsigned short kind, int async)
502 {
503 void *d;
504 splay_tree_key n;
505
506 if (!h || !s)
507 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
508
509 goacc_lazy_initialize ();
510
511 struct goacc_thread *thr = goacc_thread ();
512 struct gomp_device_descr *acc_dev = thr->dev;
513
514 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
515 return h;
516
517 acc_prof_info prof_info;
518 acc_api_info api_info;
519 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
520 if (profiling_p)
521 {
522 prof_info.async = async;
523 prof_info.async_queue = prof_info.async;
524 }
525
526 gomp_mutex_lock (&acc_dev->lock);
527
528 n = lookup_host (acc_dev, h, s);
529 if (n)
530 {
531 /* Present. */
532 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
533
534 if ((h + s) > (void *)n->host_end)
535 {
536 gomp_mutex_unlock (&acc_dev->lock);
537 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
538 }
539
540 assert (n->refcount != REFCOUNT_LINK);
541 if (n->refcount != REFCOUNT_INFINITY)
542 n->refcount++;
543 n->dynamic_refcount++;
544
545 gomp_mutex_unlock (&acc_dev->lock);
546 }
547 else
548 {
549 struct target_mem_desc *tgt;
550 size_t mapnum = 1;
551 void *hostaddrs = h;
552
553 gomp_mutex_unlock (&acc_dev->lock);
554
555 goacc_aq aq = get_goacc_asyncqueue (async);
556
557 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
558 &kind, true, GOMP_MAP_VARS_ENTER_DATA);
559 assert (tgt);
560 n = tgt->list[0].key;
561 assert (n->refcount == 1);
562 assert (n->dynamic_refcount == 0);
563 n->dynamic_refcount++;
564
565 d = tgt->to_free;
566 }
567
568 if (profiling_p)
569 {
570 thr->prof_info = NULL;
571 thr->api_info = NULL;
572 }
573
574 return d;
575 }
576
577 void *
578 acc_create (void *h, size_t s)
579 {
580 return goacc_enter_data (h, s, GOMP_MAP_ALLOC, acc_async_sync);
581 }
582
583 void
584 acc_create_async (void *h, size_t s, int async)
585 {
586 goacc_enter_data (h, s, GOMP_MAP_ALLOC, async);
587 }
588
589 /* acc_present_or_create used to be what acc_create is now. */
590 /* acc_pcreate is acc_present_or_create by a different name. */
591 #ifdef HAVE_ATTRIBUTE_ALIAS
592 strong_alias (acc_create, acc_present_or_create)
593 strong_alias (acc_create, acc_pcreate)
594 #else
595 void *
596 acc_present_or_create (void *h, size_t s)
597 {
598 return acc_create (h, s);
599 }
600
601 void *
602 acc_pcreate (void *h, size_t s)
603 {
604 return acc_create (h, s);
605 }
606 #endif
607
608 void *
609 acc_copyin (void *h, size_t s)
610 {
611 return goacc_enter_data (h, s, GOMP_MAP_TO, acc_async_sync);
612 }
613
614 void
615 acc_copyin_async (void *h, size_t s, int async)
616 {
617 goacc_enter_data (h, s, GOMP_MAP_TO, async);
618 }
619
620 /* acc_present_or_copyin used to be what acc_copyin is now. */
621 /* acc_pcopyin is acc_present_or_copyin by a different name. */
622 #ifdef HAVE_ATTRIBUTE_ALIAS
623 strong_alias (acc_copyin, acc_present_or_copyin)
624 strong_alias (acc_copyin, acc_pcopyin)
625 #else
626 void *
627 acc_present_or_copyin (void *h, size_t s)
628 {
629 return acc_copyin (h, s);
630 }
631
632 void *
633 acc_pcopyin (void *h, size_t s)
634 {
635 return acc_copyin (h, s);
636 }
637 #endif
638
639 #define FLAG_COPYOUT (1 << 0)
640 #define FLAG_FINALIZE (1 << 1)
641
642 static void
643 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
644 {
645 /* No need to call lazy open, as the data must already have been
646 mapped. */
647
648 struct goacc_thread *thr = goacc_thread ();
649 struct gomp_device_descr *acc_dev = thr->dev;
650
651 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
652 return;
653
654 acc_prof_info prof_info;
655 acc_api_info api_info;
656 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
657 if (profiling_p)
658 {
659 prof_info.async = async;
660 prof_info.async_queue = prof_info.async;
661 }
662
663 gomp_mutex_lock (&acc_dev->lock);
664
665 splay_tree_key n = lookup_host (acc_dev, h, s);
666 if (!n)
667 /* PR92726, RP92970, PR92984: no-op. */
668 goto out;
669
670 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
671 {
672 size_t host_size = n->host_end - n->host_start;
673 gomp_mutex_unlock (&acc_dev->lock);
674 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
675 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
676 }
677
678 assert (n->refcount != REFCOUNT_LINK);
679 if (n->refcount != REFCOUNT_INFINITY
680 && n->refcount < n->dynamic_refcount)
681 {
682 gomp_mutex_unlock (&acc_dev->lock);
683 gomp_fatal ("Dynamic reference counting assert fail\n");
684 }
685
686 if (f & FLAG_FINALIZE)
687 {
688 if (n->refcount != REFCOUNT_INFINITY)
689 n->refcount -= n->dynamic_refcount;
690 n->dynamic_refcount = 0;
691 }
692 else if (n->dynamic_refcount)
693 {
694 if (n->refcount != REFCOUNT_INFINITY)
695 n->refcount--;
696 n->dynamic_refcount--;
697 }
698
699 if (n->refcount == 0)
700 {
701 goacc_aq aq = get_goacc_asyncqueue (async);
702
703 if (f & FLAG_COPYOUT)
704 {
705 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
706 + (uintptr_t) h - n->host_start);
707 gomp_copy_dev2host (acc_dev, aq, h, d, s);
708 }
709
710 if (aq)
711 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
712 'gomp_unref_tgt' comment in
713 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
714 PR92881. */
715 gomp_remove_var_async (acc_dev, n, aq);
716 else
717 {
718 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
719 assert (is_tgt_unmapped);
720 }
721 }
722
723 out:
724 gomp_mutex_unlock (&acc_dev->lock);
725
726 if (profiling_p)
727 {
728 thr->prof_info = NULL;
729 thr->api_info = NULL;
730 }
731 }
732
733 void
734 acc_delete (void *h , size_t s)
735 {
736 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
737 }
738
739 void
740 acc_delete_async (void *h , size_t s, int async)
741 {
742 delete_copyout (0, h, s, async, __FUNCTION__);
743 }
744
745 void
746 acc_delete_finalize (void *h , size_t s)
747 {
748 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
749 }
750
751 void
752 acc_delete_finalize_async (void *h , size_t s, int async)
753 {
754 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
755 }
756
757 void
758 acc_copyout (void *h, size_t s)
759 {
760 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
761 }
762
763 void
764 acc_copyout_async (void *h, size_t s, int async)
765 {
766 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
767 }
768
769 void
770 acc_copyout_finalize (void *h, size_t s)
771 {
772 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
773 __FUNCTION__);
774 }
775
776 void
777 acc_copyout_finalize_async (void *h, size_t s, int async)
778 {
779 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
780 }
781
782 static void
783 update_dev_host (int is_dev, void *h, size_t s, int async)
784 {
785 splay_tree_key n;
786 void *d;
787
788 goacc_lazy_initialize ();
789
790 struct goacc_thread *thr = goacc_thread ();
791 struct gomp_device_descr *acc_dev = thr->dev;
792
793 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
794 return;
795
796 /* Fortran optional arguments that are non-present result in a
797 NULL host address here. This can safely be ignored as it is
798 not possible to 'update' a non-present optional argument. */
799 if (h == NULL)
800 return;
801
802 acc_prof_info prof_info;
803 acc_api_info api_info;
804 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
805 if (profiling_p)
806 {
807 prof_info.async = async;
808 prof_info.async_queue = prof_info.async;
809 }
810
811 gomp_mutex_lock (&acc_dev->lock);
812
813 n = lookup_host (acc_dev, h, s);
814
815 if (!n)
816 {
817 gomp_mutex_unlock (&acc_dev->lock);
818 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
819 }
820
821 d = (void *) (n->tgt->tgt_start + n->tgt_offset
822 + (uintptr_t) h - n->host_start);
823
824 goacc_aq aq = get_goacc_asyncqueue (async);
825
826 if (is_dev)
827 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
828 else
829 gomp_copy_dev2host (acc_dev, aq, h, d, s);
830
831 gomp_mutex_unlock (&acc_dev->lock);
832
833 if (profiling_p)
834 {
835 thr->prof_info = NULL;
836 thr->api_info = NULL;
837 }
838 }
839
840 void
841 acc_update_device (void *h, size_t s)
842 {
843 update_dev_host (1, h, s, acc_async_sync);
844 }
845
846 void
847 acc_update_device_async (void *h, size_t s, int async)
848 {
849 update_dev_host (1, h, s, async);
850 }
851
852 void
853 acc_update_self (void *h, size_t s)
854 {
855 update_dev_host (0, h, s, acc_async_sync);
856 }
857
858 void
859 acc_update_self_async (void *h, size_t s, int async)
860 {
861 update_dev_host (0, h, s, async);
862 }
863
864
865 /* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
866 functions. */
867
868 /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
869
870 Only the first mapping is considered in reference counting; the following
871 ones implicitly follow suit. Similarly, 'copyout' ('force_copyfrom') is
872 done only for the first mapping. */
873
874 static void
875 goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
876 void *kinds, int async)
877 {
878 struct target_mem_desc *tgt;
879 struct goacc_thread *thr = goacc_thread ();
880 struct gomp_device_descr *acc_dev = thr->dev;
881
882 if (*hostaddrs == NULL)
883 return;
884
885 if (acc_is_present (*hostaddrs, *sizes))
886 {
887 splay_tree_key n;
888 gomp_mutex_lock (&acc_dev->lock);
889 n = lookup_host (acc_dev, *hostaddrs, *sizes);
890 assert (n->refcount != REFCOUNT_INFINITY
891 && n->refcount != REFCOUNT_LINK);
892 gomp_mutex_unlock (&acc_dev->lock);
893
894 tgt = n->tgt;
895 for (size_t i = 0; i < tgt->list_count; i++)
896 if (tgt->list[i].key == n)
897 {
898 for (size_t j = 0; j < mapnum; j++)
899 if (i + j < tgt->list_count && tgt->list[i + j].key)
900 {
901 tgt->list[i + j].key->refcount++;
902 tgt->list[i + j].key->dynamic_refcount++;
903 }
904 return;
905 }
906 /* Should not reach here. */
907 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
908 }
909
910 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
911 goacc_aq aq = get_goacc_asyncqueue (async);
912 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
913 NULL, sizes, kinds, true, GOMP_MAP_VARS_ENTER_DATA);
914 assert (tgt);
915 splay_tree_key n = tgt->list[0].key;
916 assert (n->refcount == 1);
917 assert (n->dynamic_refcount == 0);
918 n->dynamic_refcount++;
919 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
920 }
921
922 static void
923 goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
924 int finalize)
925 {
926 struct goacc_thread *thr = goacc_thread ();
927 struct gomp_device_descr *acc_dev = thr->dev;
928 splay_tree_key n;
929 struct target_mem_desc *t;
930
931 if (!acc_is_present (h, s))
932 return;
933
934 gomp_mutex_lock (&acc_dev->lock);
935
936 n = lookup_host (acc_dev, h, 1);
937
938 if (!n)
939 {
940 gomp_mutex_unlock (&acc_dev->lock);
941 gomp_fatal ("%p is not a mapped block", (void *)h);
942 }
943
944 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
945
946 t = n->tgt;
947
948 assert (n->refcount != REFCOUNT_INFINITY
949 && n->refcount != REFCOUNT_LINK);
950 if (n->refcount < n->dynamic_refcount)
951 {
952 gomp_mutex_unlock (&acc_dev->lock);
953 gomp_fatal ("Dynamic reference counting assert fail\n");
954 }
955
956 if (finalize)
957 {
958 n->refcount -= n->dynamic_refcount;
959 n->dynamic_refcount = 0;
960 }
961 else if (n->dynamic_refcount)
962 {
963 n->refcount--;
964 n->dynamic_refcount--;
965 }
966
967 if (n->refcount == 0)
968 {
969 goacc_aq aq = get_goacc_asyncqueue (async);
970
971 if (force_copyfrom)
972 {
973 void *d = (void *) (t->tgt_start + n->tgt_offset
974 + (uintptr_t) h - n->host_start);
975
976 gomp_copy_dev2host (acc_dev, aq, h, d, s);
977 }
978
979 if (aq)
980 {
981 /* TODO The way the following code is currently implemented, we need
982 the 'is_tgt_unmapped' return value from 'gomp_remove_var', so
983 can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt'
984 comment in
985 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
986 PR92881 -- so have to synchronize here. */
987 if (!acc_dev->openacc.async.synchronize_func (aq))
988 {
989 gomp_mutex_unlock (&acc_dev->lock);
990 gomp_fatal ("synchronize failed");
991 }
992 }
993 bool is_tgt_unmapped = false;
994 for (size_t i = 0; i < t->list_count; i++)
995 {
996 is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
997 if (is_tgt_unmapped)
998 break;
999 }
1000 assert (is_tgt_unmapped);
1001 }
1002
1003 gomp_mutex_unlock (&acc_dev->lock);
1004
1005 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1006 }
1007
1008 /* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
1009 'GOMP_MAP_POINTER'. */
1010
1011 static int
1012 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
1013 {
1014 if (pos + 1 >= mapnum)
1015 return 0;
1016
1017 unsigned char kind = kinds[pos+1] & 0xff;
1018
1019 if (kind == GOMP_MAP_TO_PSET)
1020 return 3;
1021 else if (kind == GOMP_MAP_POINTER)
1022 return 2;
1023
1024 return 0;
1025 }
1026
1027 void
1028 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1029 size_t *sizes, unsigned short *kinds, int async,
1030 int num_waits, ...)
1031 {
1032 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1033
1034 struct goacc_thread *thr;
1035 struct gomp_device_descr *acc_dev;
1036 bool data_enter = false;
1037 size_t i;
1038
1039 goacc_lazy_initialize ();
1040
1041 thr = goacc_thread ();
1042 acc_dev = thr->dev;
1043
1044 /* Determine if this is an "acc enter data". */
1045 for (i = 0; i < mapnum; ++i)
1046 {
1047 unsigned char kind = kinds[i] & 0xff;
1048
1049 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1050 continue;
1051
1052 if (kind == GOMP_MAP_FORCE_ALLOC
1053 || kind == GOMP_MAP_FORCE_PRESENT
1054 || kind == GOMP_MAP_FORCE_TO
1055 || kind == GOMP_MAP_TO
1056 || kind == GOMP_MAP_ALLOC)
1057 {
1058 data_enter = true;
1059 break;
1060 }
1061
1062 if (kind == GOMP_MAP_RELEASE
1063 || kind == GOMP_MAP_DELETE
1064 || kind == GOMP_MAP_FROM
1065 || kind == GOMP_MAP_FORCE_FROM)
1066 break;
1067
1068 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1069 kind);
1070 }
1071
1072 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1073
1074 acc_prof_info prof_info;
1075 if (profiling_p)
1076 {
1077 thr->prof_info = &prof_info;
1078
1079 prof_info.event_type
1080 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1081 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1082 prof_info.version = _ACC_PROF_INFO_VERSION;
1083 prof_info.device_type = acc_device_type (acc_dev->type);
1084 prof_info.device_number = acc_dev->target_id;
1085 prof_info.thread_id = -1;
1086 prof_info.async = async;
1087 prof_info.async_queue = prof_info.async;
1088 prof_info.src_file = NULL;
1089 prof_info.func_name = NULL;
1090 prof_info.line_no = -1;
1091 prof_info.end_line_no = -1;
1092 prof_info.func_line_no = -1;
1093 prof_info.func_end_line_no = -1;
1094 }
1095 acc_event_info enter_exit_data_event_info;
1096 if (profiling_p)
1097 {
1098 enter_exit_data_event_info.other_event.event_type
1099 = prof_info.event_type;
1100 enter_exit_data_event_info.other_event.valid_bytes
1101 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1102 enter_exit_data_event_info.other_event.parent_construct
1103 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1104 enter_exit_data_event_info.other_event.implicit = 0;
1105 enter_exit_data_event_info.other_event.tool_info = NULL;
1106 }
1107 acc_api_info api_info;
1108 if (profiling_p)
1109 {
1110 thr->api_info = &api_info;
1111
1112 api_info.device_api = acc_device_api_none;
1113 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1114 api_info.device_type = prof_info.device_type;
1115 api_info.vendor = -1;
1116 api_info.device_handle = NULL;
1117 api_info.context_handle = NULL;
1118 api_info.async_handle = NULL;
1119 }
1120
1121 if (profiling_p)
1122 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1123 &api_info);
1124
1125 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1126 || (flags & GOACC_FLAG_HOST_FALLBACK))
1127 {
1128 prof_info.device_type = acc_device_host;
1129 api_info.device_type = prof_info.device_type;
1130
1131 goto out_prof;
1132 }
1133
1134 if (num_waits)
1135 {
1136 va_list ap;
1137
1138 va_start (ap, num_waits);
1139 goacc_wait (async, num_waits, &ap);
1140 va_end (ap);
1141 }
1142
1143 /* In c, non-pointers and arrays are represented by a single data clause.
1144 Dynamically allocated arrays and subarrays are represented by a data
1145 clause followed by an internal GOMP_MAP_POINTER.
1146
1147 In fortran, scalars and not allocated arrays are represented by a
1148 single data clause. Allocated arrays and subarrays have three mappings:
1149 1) the original data clause, 2) a PSET 3) a pointer to the array data.
1150 */
1151
1152 if (data_enter)
1153 {
1154 for (i = 0; i < mapnum; i++)
1155 {
1156 unsigned char kind = kinds[i] & 0xff;
1157
1158 /* Scan for pointers and PSETs. */
1159 int pointer = find_pointer (i, mapnum, kinds);
1160
1161 if (!pointer)
1162 {
1163 switch (kind)
1164 {
1165 case GOMP_MAP_ALLOC:
1166 case GOMP_MAP_FORCE_ALLOC:
1167 acc_create_async (hostaddrs[i], sizes[i], async);
1168 break;
1169 case GOMP_MAP_TO:
1170 case GOMP_MAP_FORCE_TO:
1171 acc_copyin_async (hostaddrs[i], sizes[i], async);
1172 break;
1173 default:
1174 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1175 kind);
1176 break;
1177 }
1178 }
1179 else
1180 {
1181 goacc_insert_pointer (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
1182 async);
1183 /* Increment 'i' by two because OpenACC requires fortran
1184 arrays to be contiguous, so each PSET is associated with
1185 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
1186 one MAP_POINTER. */
1187 i += pointer - 1;
1188 }
1189 }
1190 }
1191 else
1192 for (i = 0; i < mapnum; ++i)
1193 {
1194 unsigned char kind = kinds[i] & 0xff;
1195
1196 bool finalize = (kind == GOMP_MAP_DELETE
1197 || kind == GOMP_MAP_FORCE_FROM);
1198
1199 int pointer = find_pointer (i, mapnum, kinds);
1200
1201 if (!pointer)
1202 {
1203 switch (kind)
1204 {
1205 case GOMP_MAP_RELEASE:
1206 case GOMP_MAP_DELETE:
1207 if (finalize)
1208 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
1209 else
1210 acc_delete_async (hostaddrs[i], sizes[i], async);
1211 break;
1212 case GOMP_MAP_FROM:
1213 case GOMP_MAP_FORCE_FROM:
1214 if (finalize)
1215 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
1216 else
1217 acc_copyout_async (hostaddrs[i], sizes[i], async);
1218 break;
1219 default:
1220 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1221 kind);
1222 break;
1223 }
1224 }
1225 else
1226 {
1227 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
1228 || kind == GOMP_MAP_FROM);
1229 goacc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
1230 finalize);
1231 /* See the above comment. */
1232 i += pointer - 1;
1233 }
1234 }
1235
1236 out_prof:
1237 if (profiling_p)
1238 {
1239 prof_info.event_type
1240 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1241 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1242 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1243 &api_info);
1244
1245 thr->prof_info = NULL;
1246 thr->api_info = NULL;
1247 }
1248 }