[OpenACC] Refactor 'goacc_enter_data' so that it can be called from 'goacc_insert_poi...
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Helper for lookup_dev. Iterate over splay tree. */
54
55 static splay_tree_key
56 lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
57 {
58 splay_tree_key key = &node->key;
59 if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
60 return key;
61
62 key = NULL;
63 if (node->left)
64 key = lookup_dev_1 (node->left, d, s);
65 if (!key && node->right)
66 key = lookup_dev_1 (node->right, d, s);
67
68 return key;
69 }
70
71 /* Return block containing [D->S), or NULL if not contained.
72
73 This iterates over the splay tree. This is not expected to be a common
74 operation.
75
76 The device lock associated with MEM_MAP must be locked on entry, and remains
77 locked on exit. */
78
79 static splay_tree_key
80 lookup_dev (splay_tree mem_map, void *d, size_t s)
81 {
82 if (!mem_map || !mem_map->root)
83 return NULL;
84
85 return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
86 }
87
88
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
90 NULL. */
91
92 void *
93 acc_malloc (size_t s)
94 {
95 if (!s)
96 return NULL;
97
98 goacc_lazy_initialize ();
99
100 struct goacc_thread *thr = goacc_thread ();
101
102 assert (thr->dev);
103
104 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
105 return malloc (s);
106
107 acc_prof_info prof_info;
108 acc_api_info api_info;
109 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
110
111 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
112
113 if (profiling_p)
114 {
115 thr->prof_info = NULL;
116 thr->api_info = NULL;
117 }
118
119 return res;
120 }
121
122 void
123 acc_free (void *d)
124 {
125 splay_tree_key k;
126
127 if (!d)
128 return;
129
130 struct goacc_thread *thr = goacc_thread ();
131
132 assert (thr && thr->dev);
133
134 struct gomp_device_descr *acc_dev = thr->dev;
135
136 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
137 return free (d);
138
139 acc_prof_info prof_info;
140 acc_api_info api_info;
141 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
142
143 gomp_mutex_lock (&acc_dev->lock);
144
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
149 {
150 void *offset = d - k->tgt->tgt_start + k->tgt_offset;
151 void *h = k->host_start + offset;
152 size_t h_size = k->host_end - k->host_start;
153 gomp_mutex_unlock (&acc_dev->lock);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
158 d, h, (int) h_size);
159 }
160 else
161 gomp_mutex_unlock (&acc_dev->lock);
162
163 if (!acc_dev->free_func (acc_dev->target_id, d))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
165
166 if (profiling_p)
167 {
168 thr->prof_info = NULL;
169 thr->api_info = NULL;
170 }
171 }
172
173 static void
174 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
175 const char *libfnname)
176 {
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread *thr = goacc_thread ();
180
181 assert (thr && thr->dev);
182
183 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
184 {
185 if (from)
186 memmove (h, d, s);
187 else
188 memmove (d, h, s);
189 return;
190 }
191
192 acc_prof_info prof_info;
193 acc_api_info api_info;
194 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
195 if (profiling_p)
196 {
197 prof_info.async = async;
198 prof_info.async_queue = prof_info.async;
199 }
200
201 goacc_aq aq = get_goacc_asyncqueue (async);
202 if (from)
203 gomp_copy_dev2host (thr->dev, aq, h, d, s);
204 else
205 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
206
207 if (profiling_p)
208 {
209 thr->prof_info = NULL;
210 thr->api_info = NULL;
211 }
212 }
213
214 void
215 acc_memcpy_to_device (void *d, void *h, size_t s)
216 {
217 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
218 }
219
220 void
221 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
222 {
223 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
224 }
225
226 void
227 acc_memcpy_from_device (void *h, void *d, size_t s)
228 {
229 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
230 }
231
232 void
233 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
234 {
235 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
236 }
237
238 /* Return the device pointer that corresponds to host data H. Or NULL
239 if no mapping. */
240
241 void *
242 acc_deviceptr (void *h)
243 {
244 splay_tree_key n;
245 void *d;
246 void *offset;
247
248 goacc_lazy_initialize ();
249
250 struct goacc_thread *thr = goacc_thread ();
251 struct gomp_device_descr *dev = thr->dev;
252
253 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
254 return h;
255
256 /* In the following, no OpenACC Profiling Interface events can possibly be
257 generated. */
258
259 gomp_mutex_lock (&dev->lock);
260
261 n = lookup_host (dev, h, 1);
262
263 if (!n)
264 {
265 gomp_mutex_unlock (&dev->lock);
266 return NULL;
267 }
268
269 offset = h - n->host_start;
270
271 d = n->tgt->tgt_start + n->tgt_offset + offset;
272
273 gomp_mutex_unlock (&dev->lock);
274
275 return d;
276 }
277
278 /* Return the host pointer that corresponds to device data D. Or NULL
279 if no mapping. */
280
281 void *
282 acc_hostptr (void *d)
283 {
284 splay_tree_key n;
285 void *h;
286 void *offset;
287
288 goacc_lazy_initialize ();
289
290 struct goacc_thread *thr = goacc_thread ();
291 struct gomp_device_descr *acc_dev = thr->dev;
292
293 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
294 return d;
295
296 /* In the following, no OpenACC Profiling Interface events can possibly be
297 generated. */
298
299 gomp_mutex_lock (&acc_dev->lock);
300
301 n = lookup_dev (&acc_dev->mem_map, d, 1);
302
303 if (!n)
304 {
305 gomp_mutex_unlock (&acc_dev->lock);
306 return NULL;
307 }
308
309 offset = d - n->tgt->tgt_start + n->tgt_offset;
310
311 h = n->host_start + offset;
312
313 gomp_mutex_unlock (&acc_dev->lock);
314
315 return h;
316 }
317
318 /* Return 1 if host data [H,+S] is present on the device. */
319
320 int
321 acc_is_present (void *h, size_t s)
322 {
323 splay_tree_key n;
324
325 if (!s || !h)
326 return 0;
327
328 goacc_lazy_initialize ();
329
330 struct goacc_thread *thr = goacc_thread ();
331 struct gomp_device_descr *acc_dev = thr->dev;
332
333 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
334 return h != NULL;
335
336 /* In the following, no OpenACC Profiling Interface events can possibly be
337 generated. */
338
339 gomp_mutex_lock (&acc_dev->lock);
340
341 n = lookup_host (acc_dev, h, s);
342
343 if (n && ((uintptr_t)h < n->host_start
344 || (uintptr_t)h + s > n->host_end
345 || s > n->host_end - n->host_start))
346 n = NULL;
347
348 gomp_mutex_unlock (&acc_dev->lock);
349
350 return n != NULL;
351 }
352
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
354
355 void
356 acc_map_data (void *h, void *d, size_t s)
357 {
358 struct target_mem_desc *tgt = NULL;
359 size_t mapnum = 1;
360 void *hostaddrs = h;
361 void *devaddrs = d;
362 size_t sizes = s;
363 unsigned short kinds = GOMP_MAP_ALLOC;
364
365 goacc_lazy_initialize ();
366
367 struct goacc_thread *thr = goacc_thread ();
368 struct gomp_device_descr *acc_dev = thr->dev;
369
370 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
371 {
372 if (d != h)
373 gomp_fatal ("cannot map data on shared-memory system");
374 }
375 else
376 {
377 struct goacc_thread *thr = goacc_thread ();
378
379 if (!d || !h || !s)
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h, (int)s, (void *)d, (int)s);
382
383 acc_prof_info prof_info;
384 acc_api_info api_info;
385 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
386
387 gomp_mutex_lock (&acc_dev->lock);
388
389 if (lookup_host (acc_dev, h, s))
390 {
391 gomp_mutex_unlock (&acc_dev->lock);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
393 (int)s);
394 }
395
396 if (lookup_dev (&thr->dev->mem_map, d, s))
397 {
398 gomp_mutex_unlock (&acc_dev->lock);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
400 (int)s);
401 }
402
403 gomp_mutex_unlock (&acc_dev->lock);
404
405 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
406 &kinds, true, GOMP_MAP_VARS_ENTER_DATA);
407 assert (tgt);
408 splay_tree_key n = tgt->list[0].key;
409 assert (n->refcount == 1);
410 assert (n->dynamic_refcount == 0);
411 /* Special reference counting behavior. */
412 n->refcount = REFCOUNT_INFINITY;
413
414 if (profiling_p)
415 {
416 thr->prof_info = NULL;
417 thr->api_info = NULL;
418 }
419 }
420 }
421
422 void
423 acc_unmap_data (void *h)
424 {
425 struct goacc_thread *thr = goacc_thread ();
426 struct gomp_device_descr *acc_dev = thr->dev;
427
428 /* No need to call lazy open, as the address must have been mapped. */
429
430 /* This is a no-op on shared-memory targets. */
431 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
432 return;
433
434 acc_prof_info prof_info;
435 acc_api_info api_info;
436 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
437
438 size_t host_size;
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 splay_tree_key n = lookup_host (acc_dev, h, 1);
443 struct target_mem_desc *t;
444
445 if (!n)
446 {
447 gomp_mutex_unlock (&acc_dev->lock);
448 gomp_fatal ("%p is not a mapped block", (void *)h);
449 }
450
451 host_size = n->host_end - n->host_start;
452
453 if (n->host_start != (uintptr_t) h)
454 {
455 gomp_mutex_unlock (&acc_dev->lock);
456 gomp_fatal ("[%p,%d] surrounds %p",
457 (void *) n->host_start, (int) host_size, (void *) h);
458 }
459 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
460 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
461 the different 'REFCOUNT_INFINITY' cases, or simply separate
462 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 etc.)? */
464 else if (n->refcount != REFCOUNT_INFINITY)
465 {
466 gomp_mutex_unlock (&acc_dev->lock);
467 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
468 " by 'acc_map_data'",
469 (void *) h, (int) host_size);
470 }
471
472 t = n->tgt;
473
474 if (t->refcount == 1)
475 {
476 /* This is the last reference, so pull the descriptor off the
477 chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from
478 freeing the device memory. */
479 t->tgt_end = 0;
480 t->to_free = 0;
481 }
482
483 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
484 assert (is_tgt_unmapped);
485
486 gomp_mutex_unlock (&acc_dev->lock);
487
488 if (profiling_p)
489 {
490 thr->prof_info = NULL;
491 thr->api_info = NULL;
492 }
493 }
494
495
496 /* Enter dynamic mappings.
497
498 The handling for MAPNUM bigger than one is special handling for
499 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. For these, only the first mapping
500 is considered in reference counting; the following ones implicitly follow
501 suit.
502
503 If there's just one mapping, return the device pointer. */
504
505 static void *
506 goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
507 int async)
508 {
509 void *d;
510 splay_tree_key n;
511
512 assert (mapnum > 0);
513 if (mapnum == 1
514 && (!hostaddrs[0] || !sizes[0]))
515 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
516 else if (mapnum > 1
517 && !hostaddrs[0])
518 return /* n/a */ (void *) -1;
519
520 goacc_lazy_initialize ();
521
522 struct goacc_thread *thr = goacc_thread ();
523 struct gomp_device_descr *acc_dev = thr->dev;
524
525 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
526 {
527 if (mapnum == 1)
528 return hostaddrs[0];
529 else
530 return /* n/a */ (void *) -1;
531 }
532
533 acc_prof_info prof_info;
534 acc_api_info api_info;
535 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
536 if (profiling_p)
537 {
538 prof_info.async = async;
539 prof_info.async_queue = prof_info.async;
540 }
541
542 gomp_mutex_lock (&acc_dev->lock);
543
544 n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
545 if (n && mapnum == 1)
546 {
547 void *h = hostaddrs[0];
548 size_t s = sizes[0];
549
550 /* Present. */
551 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
552
553 if ((h + s) > (void *)n->host_end)
554 {
555 gomp_mutex_unlock (&acc_dev->lock);
556 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
557 }
558
559 assert (n->refcount != REFCOUNT_LINK);
560 if (n->refcount != REFCOUNT_INFINITY)
561 n->refcount++;
562 n->dynamic_refcount++;
563
564 gomp_mutex_unlock (&acc_dev->lock);
565 }
566 else if (n && mapnum > 1)
567 {
568 d = /* n/a */ (void *) -1;
569
570 assert (n->refcount != REFCOUNT_INFINITY
571 && n->refcount != REFCOUNT_LINK);
572
573 bool processed = false;
574
575 struct target_mem_desc *tgt = n->tgt;
576 for (size_t i = 0; i < tgt->list_count; i++)
577 if (tgt->list[i].key == n)
578 {
579 for (size_t j = 0; j < mapnum; j++)
580 if (i + j < tgt->list_count && tgt->list[i + j].key)
581 {
582 tgt->list[i + j].key->refcount++;
583 tgt->list[i + j].key->dynamic_refcount++;
584 }
585 processed = true;
586 }
587
588 gomp_mutex_unlock (&acc_dev->lock);
589 if (!processed)
590 gomp_fatal ("dynamic refcount incrementing failed for pointer/pset");
591 }
592 else
593 {
594 gomp_mutex_unlock (&acc_dev->lock);
595
596 goacc_aq aq = get_goacc_asyncqueue (async);
597
598 struct target_mem_desc *tgt
599 = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
600 kinds, true, GOMP_MAP_VARS_ENTER_DATA);
601 assert (tgt);
602 n = tgt->list[0].key;
603 assert (n->refcount == 1);
604 assert (n->dynamic_refcount == 0);
605 n->dynamic_refcount++;
606
607 d = tgt->to_free;
608 }
609
610 if (profiling_p)
611 {
612 thr->prof_info = NULL;
613 thr->api_info = NULL;
614 }
615
616 return d;
617 }
618
619 void *
620 acc_create (void *h, size_t s)
621 {
622 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
623 return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync);
624 }
625
626 void
627 acc_create_async (void *h, size_t s, int async)
628 {
629 unsigned short kinds[1] = { GOMP_MAP_ALLOC };
630 goacc_enter_data (1, &h, &s, &kinds, async);
631 }
632
633 /* acc_present_or_create used to be what acc_create is now. */
634 /* acc_pcreate is acc_present_or_create by a different name. */
635 #ifdef HAVE_ATTRIBUTE_ALIAS
636 strong_alias (acc_create, acc_present_or_create)
637 strong_alias (acc_create, acc_pcreate)
638 #else
639 void *
640 acc_present_or_create (void *h, size_t s)
641 {
642 return acc_create (h, s);
643 }
644
645 void *
646 acc_pcreate (void *h, size_t s)
647 {
648 return acc_create (h, s);
649 }
650 #endif
651
652 void *
653 acc_copyin (void *h, size_t s)
654 {
655 unsigned short kinds[1] = { GOMP_MAP_TO };
656 return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync);
657 }
658
659 void
660 acc_copyin_async (void *h, size_t s, int async)
661 {
662 unsigned short kinds[1] = { GOMP_MAP_TO };
663 goacc_enter_data (1, &h, &s, &kinds, async);
664 }
665
666 /* acc_present_or_copyin used to be what acc_copyin is now. */
667 /* acc_pcopyin is acc_present_or_copyin by a different name. */
668 #ifdef HAVE_ATTRIBUTE_ALIAS
669 strong_alias (acc_copyin, acc_present_or_copyin)
670 strong_alias (acc_copyin, acc_pcopyin)
671 #else
672 void *
673 acc_present_or_copyin (void *h, size_t s)
674 {
675 return acc_copyin (h, s);
676 }
677
678 void *
679 acc_pcopyin (void *h, size_t s)
680 {
681 return acc_copyin (h, s);
682 }
683 #endif
684
685
686 /* Exit a dynamic mapping. */
687
688 static void
689 goacc_exit_data (void *h, size_t s, unsigned short kind, int async)
690 {
691 /* No need to call lazy open, as the data must already have been
692 mapped. */
693
694 kind &= 0xff;
695
696 struct goacc_thread *thr = goacc_thread ();
697 struct gomp_device_descr *acc_dev = thr->dev;
698
699 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
700 return;
701
702 acc_prof_info prof_info;
703 acc_api_info api_info;
704 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
705 if (profiling_p)
706 {
707 prof_info.async = async;
708 prof_info.async_queue = prof_info.async;
709 }
710
711 gomp_mutex_lock (&acc_dev->lock);
712
713 splay_tree_key n = lookup_host (acc_dev, h, s);
714 if (!n)
715 /* PR92726, RP92970, PR92984: no-op. */
716 goto out;
717
718 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
719 {
720 size_t host_size = n->host_end - n->host_start;
721 gomp_mutex_unlock (&acc_dev->lock);
722 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
723 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
724 }
725
726 assert (n->refcount != REFCOUNT_LINK);
727 if (n->refcount != REFCOUNT_INFINITY
728 && n->refcount < n->dynamic_refcount)
729 {
730 gomp_mutex_unlock (&acc_dev->lock);
731 gomp_fatal ("Dynamic reference counting assert fail\n");
732 }
733
734 bool finalize = (kind == GOMP_MAP_DELETE
735 || kind == GOMP_MAP_FORCE_FROM);
736 if (finalize)
737 {
738 if (n->refcount != REFCOUNT_INFINITY)
739 n->refcount -= n->dynamic_refcount;
740 n->dynamic_refcount = 0;
741 }
742 else if (n->dynamic_refcount)
743 {
744 if (n->refcount != REFCOUNT_INFINITY)
745 n->refcount--;
746 n->dynamic_refcount--;
747 }
748
749 if (n->refcount == 0)
750 {
751 goacc_aq aq = get_goacc_asyncqueue (async);
752
753 bool copyout = (kind == GOMP_MAP_FROM
754 || kind == GOMP_MAP_FORCE_FROM);
755 if (copyout)
756 {
757 void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
758 + (uintptr_t) h - n->host_start);
759 gomp_copy_dev2host (acc_dev, aq, h, d, s);
760 }
761
762 if (aq)
763 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
764 'gomp_unref_tgt' comment in
765 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
766 PR92881. */
767 gomp_remove_var_async (acc_dev, n, aq);
768 else
769 {
770 bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
771 assert (is_tgt_unmapped);
772 }
773 }
774
775 out:
776 gomp_mutex_unlock (&acc_dev->lock);
777
778 if (profiling_p)
779 {
780 thr->prof_info = NULL;
781 thr->api_info = NULL;
782 }
783 }
784
785 void
786 acc_delete (void *h , size_t s)
787 {
788 goacc_exit_data (h, s, GOMP_MAP_RELEASE, acc_async_sync);
789 }
790
791 void
792 acc_delete_async (void *h , size_t s, int async)
793 {
794 goacc_exit_data (h, s, GOMP_MAP_RELEASE, async);
795 }
796
797 void
798 acc_delete_finalize (void *h , size_t s)
799 {
800 goacc_exit_data (h, s, GOMP_MAP_DELETE, acc_async_sync);
801 }
802
803 void
804 acc_delete_finalize_async (void *h , size_t s, int async)
805 {
806 goacc_exit_data (h, s, GOMP_MAP_DELETE, async);
807 }
808
809 void
810 acc_copyout (void *h, size_t s)
811 {
812 goacc_exit_data (h, s, GOMP_MAP_FROM, acc_async_sync);
813 }
814
815 void
816 acc_copyout_async (void *h, size_t s, int async)
817 {
818 goacc_exit_data (h, s, GOMP_MAP_FROM, async);
819 }
820
821 void
822 acc_copyout_finalize (void *h, size_t s)
823 {
824 goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
825 }
826
827 void
828 acc_copyout_finalize_async (void *h, size_t s, int async)
829 {
830 goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, async);
831 }
832
833 static void
834 update_dev_host (int is_dev, void *h, size_t s, int async)
835 {
836 splay_tree_key n;
837 void *d;
838
839 goacc_lazy_initialize ();
840
841 struct goacc_thread *thr = goacc_thread ();
842 struct gomp_device_descr *acc_dev = thr->dev;
843
844 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
845 return;
846
847 /* Fortran optional arguments that are non-present result in a
848 NULL host address here. This can safely be ignored as it is
849 not possible to 'update' a non-present optional argument. */
850 if (h == NULL)
851 return;
852
853 acc_prof_info prof_info;
854 acc_api_info api_info;
855 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
856 if (profiling_p)
857 {
858 prof_info.async = async;
859 prof_info.async_queue = prof_info.async;
860 }
861
862 gomp_mutex_lock (&acc_dev->lock);
863
864 n = lookup_host (acc_dev, h, s);
865
866 if (!n)
867 {
868 gomp_mutex_unlock (&acc_dev->lock);
869 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
870 }
871
872 d = (void *) (n->tgt->tgt_start + n->tgt_offset
873 + (uintptr_t) h - n->host_start);
874
875 goacc_aq aq = get_goacc_asyncqueue (async);
876
877 if (is_dev)
878 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
879 else
880 gomp_copy_dev2host (acc_dev, aq, h, d, s);
881
882 gomp_mutex_unlock (&acc_dev->lock);
883
884 if (profiling_p)
885 {
886 thr->prof_info = NULL;
887 thr->api_info = NULL;
888 }
889 }
890
891 void
892 acc_update_device (void *h, size_t s)
893 {
894 update_dev_host (1, h, s, acc_async_sync);
895 }
896
897 void
898 acc_update_device_async (void *h, size_t s, int async)
899 {
900 update_dev_host (1, h, s, async);
901 }
902
903 void
904 acc_update_self (void *h, size_t s)
905 {
906 update_dev_host (0, h, s, acc_async_sync);
907 }
908
909 void
910 acc_update_self_async (void *h, size_t s, int async)
911 {
912 update_dev_host (0, h, s, async);
913 }
914
915
916 /* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
917 functions. */
918
919 /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
920
921 Only the first mapping is considered in reference counting; the following
922 ones implicitly follow suit. Similarly, 'copyout' is done only for the
923 first mapping. */
924
925 static void
926 goacc_remove_pointer (void *h, size_t s, unsigned short kind, int async)
927 {
928 kind &= 0xff;
929
930 struct goacc_thread *thr = goacc_thread ();
931 struct gomp_device_descr *acc_dev = thr->dev;
932 splay_tree_key n;
933 struct target_mem_desc *t;
934
935 if (!acc_is_present (h, s))
936 return;
937
938 gomp_mutex_lock (&acc_dev->lock);
939
940 n = lookup_host (acc_dev, h, 1);
941
942 if (!n)
943 {
944 gomp_mutex_unlock (&acc_dev->lock);
945 gomp_fatal ("%p is not a mapped block", (void *)h);
946 }
947
948 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
949
950 t = n->tgt;
951
952 assert (n->refcount != REFCOUNT_INFINITY
953 && n->refcount != REFCOUNT_LINK);
954 if (n->refcount < n->dynamic_refcount)
955 {
956 gomp_mutex_unlock (&acc_dev->lock);
957 gomp_fatal ("Dynamic reference counting assert fail\n");
958 }
959
960 bool finalize = (kind == GOMP_MAP_DELETE
961 || kind == GOMP_MAP_FORCE_FROM);
962 if (finalize)
963 {
964 n->refcount -= n->dynamic_refcount;
965 n->dynamic_refcount = 0;
966 }
967 else if (n->dynamic_refcount)
968 {
969 n->refcount--;
970 n->dynamic_refcount--;
971 }
972
973 if (n->refcount == 0)
974 {
975 goacc_aq aq = get_goacc_asyncqueue (async);
976
977 bool copyout = (kind == GOMP_MAP_FROM
978 || kind == GOMP_MAP_FORCE_FROM);
979 if (copyout)
980 {
981 void *d = (void *) (t->tgt_start + n->tgt_offset
982 + (uintptr_t) h - n->host_start);
983 gomp_copy_dev2host (acc_dev, aq, h, d, s);
984 }
985
986 if (aq)
987 {
988 /* TODO The way the following code is currently implemented, we need
989 the 'is_tgt_unmapped' return value from 'gomp_remove_var', so
990 can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt'
991 comment in
992 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
993 PR92881 -- so have to synchronize here. */
994 if (!acc_dev->openacc.async.synchronize_func (aq))
995 {
996 gomp_mutex_unlock (&acc_dev->lock);
997 gomp_fatal ("synchronize failed");
998 }
999 }
1000 bool is_tgt_unmapped = false;
1001 for (size_t i = 0; i < t->list_count; i++)
1002 {
1003 is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
1004 if (is_tgt_unmapped)
1005 break;
1006 }
1007 assert (is_tgt_unmapped);
1008 }
1009
1010 gomp_mutex_unlock (&acc_dev->lock);
1011
1012 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1013 }
1014
1015 /* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
1016 'GOMP_MAP_POINTER'. */
1017
1018 static int
1019 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
1020 {
1021 if (pos + 1 >= mapnum)
1022 return 0;
1023
1024 unsigned char kind = kinds[pos+1] & 0xff;
1025
1026 if (kind == GOMP_MAP_TO_PSET)
1027 return 3;
1028 else if (kind == GOMP_MAP_POINTER)
1029 return 2;
1030
1031 return 0;
1032 }
1033
1034 void
1035 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
1036 size_t *sizes, unsigned short *kinds, int async,
1037 int num_waits, ...)
1038 {
1039 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
1040
1041 struct goacc_thread *thr;
1042 struct gomp_device_descr *acc_dev;
1043 bool data_enter = false;
1044 size_t i;
1045
1046 goacc_lazy_initialize ();
1047
1048 thr = goacc_thread ();
1049 acc_dev = thr->dev;
1050
1051 /* Determine if this is an "acc enter data". */
1052 for (i = 0; i < mapnum; ++i)
1053 {
1054 unsigned char kind = kinds[i] & 0xff;
1055
1056 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1057 continue;
1058
1059 if (kind == GOMP_MAP_FORCE_ALLOC
1060 || kind == GOMP_MAP_FORCE_PRESENT
1061 || kind == GOMP_MAP_FORCE_TO
1062 || kind == GOMP_MAP_TO
1063 || kind == GOMP_MAP_ALLOC)
1064 {
1065 data_enter = true;
1066 break;
1067 }
1068
1069 if (kind == GOMP_MAP_RELEASE
1070 || kind == GOMP_MAP_DELETE
1071 || kind == GOMP_MAP_FROM
1072 || kind == GOMP_MAP_FORCE_FROM)
1073 break;
1074
1075 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1076 kind);
1077 }
1078
1079 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
1080
1081 acc_prof_info prof_info;
1082 if (profiling_p)
1083 {
1084 thr->prof_info = &prof_info;
1085
1086 prof_info.event_type
1087 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
1088 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
1089 prof_info.version = _ACC_PROF_INFO_VERSION;
1090 prof_info.device_type = acc_device_type (acc_dev->type);
1091 prof_info.device_number = acc_dev->target_id;
1092 prof_info.thread_id = -1;
1093 prof_info.async = async;
1094 prof_info.async_queue = prof_info.async;
1095 prof_info.src_file = NULL;
1096 prof_info.func_name = NULL;
1097 prof_info.line_no = -1;
1098 prof_info.end_line_no = -1;
1099 prof_info.func_line_no = -1;
1100 prof_info.func_end_line_no = -1;
1101 }
1102 acc_event_info enter_exit_data_event_info;
1103 if (profiling_p)
1104 {
1105 enter_exit_data_event_info.other_event.event_type
1106 = prof_info.event_type;
1107 enter_exit_data_event_info.other_event.valid_bytes
1108 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
1109 enter_exit_data_event_info.other_event.parent_construct
1110 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
1111 enter_exit_data_event_info.other_event.implicit = 0;
1112 enter_exit_data_event_info.other_event.tool_info = NULL;
1113 }
1114 acc_api_info api_info;
1115 if (profiling_p)
1116 {
1117 thr->api_info = &api_info;
1118
1119 api_info.device_api = acc_device_api_none;
1120 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
1121 api_info.device_type = prof_info.device_type;
1122 api_info.vendor = -1;
1123 api_info.device_handle = NULL;
1124 api_info.context_handle = NULL;
1125 api_info.async_handle = NULL;
1126 }
1127
1128 if (profiling_p)
1129 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1130 &api_info);
1131
1132 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1133 || (flags & GOACC_FLAG_HOST_FALLBACK))
1134 {
1135 prof_info.device_type = acc_device_host;
1136 api_info.device_type = prof_info.device_type;
1137
1138 goto out_prof;
1139 }
1140
1141 if (num_waits)
1142 {
1143 va_list ap;
1144
1145 va_start (ap, num_waits);
1146 goacc_wait (async, num_waits, &ap);
1147 va_end (ap);
1148 }
1149
1150 /* In c, non-pointers and arrays are represented by a single data clause.
1151 Dynamically allocated arrays and subarrays are represented by a data
1152 clause followed by an internal GOMP_MAP_POINTER.
1153
1154 In fortran, scalars and not allocated arrays are represented by a
1155 single data clause. Allocated arrays and subarrays have three mappings:
1156 1) the original data clause, 2) a PSET 3) a pointer to the array data.
1157 */
1158
1159 if (data_enter)
1160 {
1161 for (i = 0; i < mapnum; i++)
1162 {
1163 /* Scan for pointers and PSETs. */
1164 int pointer = find_pointer (i, mapnum, kinds);
1165
1166 if (!pointer)
1167 {
1168 unsigned char kind = kinds[i] & 0xff;
1169 switch (kind)
1170 {
1171 case GOMP_MAP_ALLOC:
1172 case GOMP_MAP_FORCE_ALLOC:
1173 case GOMP_MAP_TO:
1174 case GOMP_MAP_FORCE_TO:
1175 break;
1176 default:
1177 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1178 kind);
1179 break;
1180 }
1181
1182 /* We actually have one mapping. */
1183 pointer = 1;
1184 }
1185
1186 goacc_enter_data (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
1187 async);
1188 /* If applicable, increment 'i' further; OpenACC requires fortran
1189 arrays to be contiguous, so each PSET is associated with
1190 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
1191 one MAP_POINTER. */
1192 i += pointer - 1;
1193 }
1194 }
1195 else
1196 for (i = 0; i < mapnum; ++i)
1197 {
1198 int pointer = find_pointer (i, mapnum, kinds);
1199
1200 if (!pointer)
1201 {
1202 unsigned char kind = kinds[i] & 0xff;
1203 switch (kind)
1204 {
1205 case GOMP_MAP_RELEASE:
1206 case GOMP_MAP_DELETE:
1207 case GOMP_MAP_FROM:
1208 case GOMP_MAP_FORCE_FROM:
1209 break;
1210 default:
1211 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1212 kind);
1213 break;
1214 }
1215
1216 goacc_exit_data (hostaddrs[i], sizes[i], kinds[i], async);
1217 }
1218 else
1219 {
1220 goacc_remove_pointer (hostaddrs[i], sizes[i], kinds[i], async);
1221 /* See the above comment. */
1222 i += pointer - 1;
1223 }
1224 }
1225
1226 out_prof:
1227 if (profiling_p)
1228 {
1229 prof_info.event_type
1230 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
1231 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
1232 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
1233 &api_info);
1234
1235 thr->prof_info = NULL;
1236 thr->api_info = NULL;
1237 }
1238 }