[PR92840] [OpenACC] Refuse 'acc_unmap_data' unless mapped by 'acc_map_data'
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #include <string.h>
34 #include <assert.h>
35
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
38
39 static splay_tree_key
40 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
41 {
42 struct splay_tree_key_s node;
43 splay_tree_key key;
44
45 node.host_start = (uintptr_t) h;
46 node.host_end = (uintptr_t) h + s;
47
48 key = splay_tree_lookup (&dev->mem_map, &node);
49
50 return key;
51 }
52
53 /* Return block containing [D->S), or NULL if not contained.
54 The list isn't ordered by device address, so we have to iterate
55 over the whole array. This is not expected to be a common
56 operation. The device lock associated with TGT must be locked on entry, and
57 remains locked on exit. */
58
59 static splay_tree_key
60 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
61 {
62 int i;
63 struct target_mem_desc *t;
64
65 if (!tgt)
66 return NULL;
67
68 for (t = tgt; t != NULL; t = t->prev)
69 {
70 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
71 break;
72 }
73
74 if (!t)
75 return NULL;
76
77 for (i = 0; i < t->list_count; i++)
78 {
79 void * offset;
80
81 splay_tree_key k = &t->array[i].key;
82 offset = d - t->tgt_start + k->tgt_offset;
83
84 if (k->host_start + offset <= (void *) k->host_end)
85 return k;
86 }
87
88 return NULL;
89 }
90
91 /* OpenACC is silent on how memory exhaustion is indicated. We return
92 NULL. */
93
94 void *
95 acc_malloc (size_t s)
96 {
97 if (!s)
98 return NULL;
99
100 goacc_lazy_initialize ();
101
102 struct goacc_thread *thr = goacc_thread ();
103
104 assert (thr->dev);
105
106 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
107 return malloc (s);
108
109 acc_prof_info prof_info;
110 acc_api_info api_info;
111 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
112
113 void *res = thr->dev->alloc_func (thr->dev->target_id, s);
114
115 if (profiling_p)
116 {
117 thr->prof_info = NULL;
118 thr->api_info = NULL;
119 }
120
121 return res;
122 }
123
124 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
125 the device address is mapped. We choose to check if it mapped,
126 and if it is, to unmap it. */
127 void
128 acc_free (void *d)
129 {
130 splay_tree_key k;
131
132 if (!d)
133 return;
134
135 struct goacc_thread *thr = goacc_thread ();
136
137 assert (thr && thr->dev);
138
139 struct gomp_device_descr *acc_dev = thr->dev;
140
141 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
142 return free (d);
143
144 acc_prof_info prof_info;
145 acc_api_info api_info;
146 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
147
148 gomp_mutex_lock (&acc_dev->lock);
149
150 /* We don't have to call lazy open here, as the ptr value must have
151 been returned by acc_malloc. It's not permitted to pass NULL in
152 (unless you got that null from acc_malloc). */
153 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
154 {
155 void *offset;
156
157 offset = d - k->tgt->tgt_start + k->tgt_offset;
158
159 gomp_mutex_unlock (&acc_dev->lock);
160
161 acc_unmap_data ((void *)(k->host_start + offset));
162 }
163 else
164 gomp_mutex_unlock (&acc_dev->lock);
165
166 if (!acc_dev->free_func (acc_dev->target_id, d))
167 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
168
169 if (profiling_p)
170 {
171 thr->prof_info = NULL;
172 thr->api_info = NULL;
173 }
174 }
175
176 static void
177 memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
178 const char *libfnname)
179 {
180 /* No need to call lazy open here, as the device pointer must have
181 been obtained from a routine that did that. */
182 struct goacc_thread *thr = goacc_thread ();
183
184 assert (thr && thr->dev);
185
186 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
187 {
188 if (from)
189 memmove (h, d, s);
190 else
191 memmove (d, h, s);
192 return;
193 }
194
195 acc_prof_info prof_info;
196 acc_api_info api_info;
197 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
198 if (profiling_p)
199 {
200 prof_info.async = async;
201 prof_info.async_queue = prof_info.async;
202 }
203
204 goacc_aq aq = get_goacc_asyncqueue (async);
205 if (from)
206 gomp_copy_dev2host (thr->dev, aq, h, d, s);
207 else
208 gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
209
210 if (profiling_p)
211 {
212 thr->prof_info = NULL;
213 thr->api_info = NULL;
214 }
215 }
216
217 void
218 acc_memcpy_to_device (void *d, void *h, size_t s)
219 {
220 memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
221 }
222
223 void
224 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
225 {
226 memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
227 }
228
229 void
230 acc_memcpy_from_device (void *h, void *d, size_t s)
231 {
232 memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
233 }
234
235 void
236 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
237 {
238 memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
239 }
240
241 /* Return the device pointer that corresponds to host data H. Or NULL
242 if no mapping. */
243
244 void *
245 acc_deviceptr (void *h)
246 {
247 splay_tree_key n;
248 void *d;
249 void *offset;
250
251 goacc_lazy_initialize ();
252
253 struct goacc_thread *thr = goacc_thread ();
254 struct gomp_device_descr *dev = thr->dev;
255
256 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
257 return h;
258
259 /* In the following, no OpenACC Profiling Interface events can possibly be
260 generated. */
261
262 gomp_mutex_lock (&dev->lock);
263
264 n = lookup_host (dev, h, 1);
265
266 if (!n)
267 {
268 gomp_mutex_unlock (&dev->lock);
269 return NULL;
270 }
271
272 offset = h - n->host_start;
273
274 d = n->tgt->tgt_start + n->tgt_offset + offset;
275
276 gomp_mutex_unlock (&dev->lock);
277
278 return d;
279 }
280
281 /* Return the host pointer that corresponds to device data D. Or NULL
282 if no mapping. */
283
284 void *
285 acc_hostptr (void *d)
286 {
287 splay_tree_key n;
288 void *h;
289 void *offset;
290
291 goacc_lazy_initialize ();
292
293 struct goacc_thread *thr = goacc_thread ();
294 struct gomp_device_descr *acc_dev = thr->dev;
295
296 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
297 return d;
298
299 /* In the following, no OpenACC Profiling Interface events can possibly be
300 generated. */
301
302 gomp_mutex_lock (&acc_dev->lock);
303
304 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
305
306 if (!n)
307 {
308 gomp_mutex_unlock (&acc_dev->lock);
309 return NULL;
310 }
311
312 offset = d - n->tgt->tgt_start + n->tgt_offset;
313
314 h = n->host_start + offset;
315
316 gomp_mutex_unlock (&acc_dev->lock);
317
318 return h;
319 }
320
321 /* Return 1 if host data [H,+S] is present on the device. */
322
323 int
324 acc_is_present (void *h, size_t s)
325 {
326 splay_tree_key n;
327
328 if (!s || !h)
329 return 0;
330
331 goacc_lazy_initialize ();
332
333 struct goacc_thread *thr = goacc_thread ();
334 struct gomp_device_descr *acc_dev = thr->dev;
335
336 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
337 return h != NULL;
338
339 /* In the following, no OpenACC Profiling Interface events can possibly be
340 generated. */
341
342 gomp_mutex_lock (&acc_dev->lock);
343
344 n = lookup_host (acc_dev, h, s);
345
346 if (n && ((uintptr_t)h < n->host_start
347 || (uintptr_t)h + s > n->host_end
348 || s > n->host_end - n->host_start))
349 n = NULL;
350
351 gomp_mutex_unlock (&acc_dev->lock);
352
353 return n != NULL;
354 }
355
356 /* Create a mapping for host [H,+S] -> device [D,+S] */
357
358 void
359 acc_map_data (void *h, void *d, size_t s)
360 {
361 struct target_mem_desc *tgt = NULL;
362 size_t mapnum = 1;
363 void *hostaddrs = h;
364 void *devaddrs = d;
365 size_t sizes = s;
366 unsigned short kinds = GOMP_MAP_ALLOC;
367
368 goacc_lazy_initialize ();
369
370 struct goacc_thread *thr = goacc_thread ();
371 struct gomp_device_descr *acc_dev = thr->dev;
372
373 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
374 {
375 if (d != h)
376 gomp_fatal ("cannot map data on shared-memory system");
377 }
378 else
379 {
380 struct goacc_thread *thr = goacc_thread ();
381
382 if (!d || !h || !s)
383 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
384 (void *)h, (int)s, (void *)d, (int)s);
385
386 acc_prof_info prof_info;
387 acc_api_info api_info;
388 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
389
390 gomp_mutex_lock (&acc_dev->lock);
391
392 if (lookup_host (acc_dev, h, s))
393 {
394 gomp_mutex_unlock (&acc_dev->lock);
395 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
396 (int)s);
397 }
398
399 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
400 {
401 gomp_mutex_unlock (&acc_dev->lock);
402 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
403 (int)s);
404 }
405
406 gomp_mutex_unlock (&acc_dev->lock);
407
408 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
409 &kinds, true, GOMP_MAP_VARS_OPENACC);
410 splay_tree_key n = tgt->list[0].key;
411 assert (n->refcount == 1);
412 assert (n->dynamic_refcount == 0);
413 /* Special reference counting behavior. */
414 n->refcount = REFCOUNT_INFINITY;
415
416 if (profiling_p)
417 {
418 thr->prof_info = NULL;
419 thr->api_info = NULL;
420 }
421 }
422
423 gomp_mutex_lock (&acc_dev->lock);
424 tgt->prev = acc_dev->openacc.data_environ;
425 acc_dev->openacc.data_environ = tgt;
426 gomp_mutex_unlock (&acc_dev->lock);
427 }
428
429 void
430 acc_unmap_data (void *h)
431 {
432 struct goacc_thread *thr = goacc_thread ();
433 struct gomp_device_descr *acc_dev = thr->dev;
434
435 /* No need to call lazy open, as the address must have been mapped. */
436
437 /* This is a no-op on shared-memory targets. */
438 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
439 return;
440
441 acc_prof_info prof_info;
442 acc_api_info api_info;
443 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
444
445 size_t host_size;
446
447 gomp_mutex_lock (&acc_dev->lock);
448
449 splay_tree_key n = lookup_host (acc_dev, h, 1);
450 struct target_mem_desc *t;
451
452 if (!n)
453 {
454 gomp_mutex_unlock (&acc_dev->lock);
455 gomp_fatal ("%p is not a mapped block", (void *)h);
456 }
457
458 host_size = n->host_end - n->host_start;
459
460 if (n->host_start != (uintptr_t) h)
461 {
462 gomp_mutex_unlock (&acc_dev->lock);
463 gomp_fatal ("[%p,%d] surrounds %p",
464 (void *) n->host_start, (int) host_size, (void *) h);
465 }
466 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
467 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
468 the different 'REFCOUNT_INFINITY' cases, or simply separate
469 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
470 etc.)? */
471 else if (n->refcount != REFCOUNT_INFINITY)
472 {
473 gomp_mutex_unlock (&acc_dev->lock);
474 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
475 " by 'acc_map_data'",
476 (void *) h, (int) host_size);
477 }
478
479 /* Mark for removal. */
480 n->refcount = 1;
481
482 t = n->tgt;
483
484 if (t->refcount == 2)
485 {
486 struct target_mem_desc *tp;
487
488 /* This is the last reference, so pull the descriptor off the
489 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
490 freeing the device memory. */
491 t->tgt_end = 0;
492 t->to_free = 0;
493
494 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
495 tp = t, t = t->prev)
496 if (n->tgt == t)
497 {
498 if (tp)
499 tp->prev = t->prev;
500 else
501 acc_dev->openacc.data_environ = t->prev;
502
503 break;
504 }
505 }
506
507 gomp_mutex_unlock (&acc_dev->lock);
508
509 gomp_unmap_vars (t, true);
510
511 if (profiling_p)
512 {
513 thr->prof_info = NULL;
514 thr->api_info = NULL;
515 }
516 }
517
518 #define FLAG_PRESENT (1 << 0)
519 #define FLAG_CREATE (1 << 1)
520 #define FLAG_COPY (1 << 2)
521
522 static void *
523 present_create_copy (unsigned f, void *h, size_t s, int async)
524 {
525 void *d;
526 splay_tree_key n;
527
528 if (!h || !s)
529 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
530
531 goacc_lazy_initialize ();
532
533 struct goacc_thread *thr = goacc_thread ();
534 struct gomp_device_descr *acc_dev = thr->dev;
535
536 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
537 return h;
538
539 acc_prof_info prof_info;
540 acc_api_info api_info;
541 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
542 if (profiling_p)
543 {
544 prof_info.async = async;
545 prof_info.async_queue = prof_info.async;
546 }
547
548 gomp_mutex_lock (&acc_dev->lock);
549
550 n = lookup_host (acc_dev, h, s);
551 if (n)
552 {
553 /* Present. */
554 d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
555
556 if (!(f & FLAG_PRESENT))
557 {
558 gomp_mutex_unlock (&acc_dev->lock);
559 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
560 (void *)h, (int)s, (void *)d, (int)s);
561 }
562 if ((h + s) > (void *)n->host_end)
563 {
564 gomp_mutex_unlock (&acc_dev->lock);
565 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
566 }
567
568 if (n->refcount != REFCOUNT_INFINITY)
569 {
570 n->refcount++;
571 n->dynamic_refcount++;
572 }
573 gomp_mutex_unlock (&acc_dev->lock);
574 }
575 else if (!(f & FLAG_CREATE))
576 {
577 gomp_mutex_unlock (&acc_dev->lock);
578 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
579 }
580 else
581 {
582 struct target_mem_desc *tgt;
583 size_t mapnum = 1;
584 unsigned short kinds;
585 void *hostaddrs = h;
586
587 if (f & FLAG_COPY)
588 kinds = GOMP_MAP_TO;
589 else
590 kinds = GOMP_MAP_ALLOC;
591
592 gomp_mutex_unlock (&acc_dev->lock);
593
594 goacc_aq aq = get_goacc_asyncqueue (async);
595
596 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
597 &kinds, true, GOMP_MAP_VARS_OPENACC);
598 /* Initialize dynamic refcount. */
599 tgt->list[0].key->dynamic_refcount = 1;
600
601 gomp_mutex_lock (&acc_dev->lock);
602
603 d = tgt->to_free;
604 tgt->prev = acc_dev->openacc.data_environ;
605 acc_dev->openacc.data_environ = tgt;
606
607 gomp_mutex_unlock (&acc_dev->lock);
608 }
609
610 if (profiling_p)
611 {
612 thr->prof_info = NULL;
613 thr->api_info = NULL;
614 }
615
616 return d;
617 }
618
619 void *
620 acc_create (void *h, size_t s)
621 {
622 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
623 }
624
625 void
626 acc_create_async (void *h, size_t s, int async)
627 {
628 present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async);
629 }
630
631 /* acc_present_or_create used to be what acc_create is now. */
632 /* acc_pcreate is acc_present_or_create by a different name. */
633 #ifdef HAVE_ATTRIBUTE_ALIAS
634 strong_alias (acc_create, acc_present_or_create)
635 strong_alias (acc_create, acc_pcreate)
636 #else
637 void *
638 acc_present_or_create (void *h, size_t s)
639 {
640 return acc_create (h, s);
641 }
642
643 void *
644 acc_pcreate (void *h, size_t s)
645 {
646 return acc_create (h, s);
647 }
648 #endif
649
650 void *
651 acc_copyin (void *h, size_t s)
652 {
653 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
654 acc_async_sync);
655 }
656
657 void
658 acc_copyin_async (void *h, size_t s, int async)
659 {
660 present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async);
661 }
662
663 /* acc_present_or_copyin used to be what acc_copyin is now. */
664 /* acc_pcopyin is acc_present_or_copyin by a different name. */
665 #ifdef HAVE_ATTRIBUTE_ALIAS
666 strong_alias (acc_copyin, acc_present_or_copyin)
667 strong_alias (acc_copyin, acc_pcopyin)
668 #else
669 void *
670 acc_present_or_copyin (void *h, size_t s)
671 {
672 return acc_copyin (h, s);
673 }
674
675 void *
676 acc_pcopyin (void *h, size_t s)
677 {
678 return acc_copyin (h, s);
679 }
680 #endif
681
682 #define FLAG_COPYOUT (1 << 0)
683 #define FLAG_FINALIZE (1 << 1)
684
685 static void
686 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
687 {
688 splay_tree_key n;
689 void *d;
690 struct goacc_thread *thr = goacc_thread ();
691 struct gomp_device_descr *acc_dev = thr->dev;
692
693 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
694 return;
695
696 acc_prof_info prof_info;
697 acc_api_info api_info;
698 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
699 if (profiling_p)
700 {
701 prof_info.async = async;
702 prof_info.async_queue = prof_info.async;
703 }
704
705 gomp_mutex_lock (&acc_dev->lock);
706
707 n = lookup_host (acc_dev, h, s);
708
709 /* No need to call lazy open, as the data must already have been
710 mapped. */
711
712 if (!n)
713 {
714 gomp_mutex_unlock (&acc_dev->lock);
715 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
716 }
717
718 d = (void *) (n->tgt->tgt_start + n->tgt_offset
719 + (uintptr_t) h - n->host_start);
720
721 if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
722 {
723 size_t host_size = n->host_end - n->host_start;
724 gomp_mutex_unlock (&acc_dev->lock);
725 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
726 (void *) h, (int) s, (void *) n->host_start, (int) host_size);
727 }
728
729 if (n->refcount == REFCOUNT_INFINITY)
730 {
731 n->refcount = 0;
732 n->dynamic_refcount = 0;
733 }
734 if (n->refcount < n->dynamic_refcount)
735 {
736 gomp_mutex_unlock (&acc_dev->lock);
737 gomp_fatal ("Dynamic reference counting assert fail\n");
738 }
739
740 if (f & FLAG_FINALIZE)
741 {
742 n->refcount -= n->dynamic_refcount;
743 n->dynamic_refcount = 0;
744 }
745 else if (n->dynamic_refcount)
746 {
747 n->dynamic_refcount--;
748 n->refcount--;
749 }
750
751 if (n->refcount == 0)
752 {
753 if (n->tgt->refcount == 2)
754 {
755 struct target_mem_desc *tp, *t;
756 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
757 tp = t, t = t->prev)
758 if (n->tgt == t)
759 {
760 if (tp)
761 tp->prev = t->prev;
762 else
763 acc_dev->openacc.data_environ = t->prev;
764 break;
765 }
766 }
767
768 if (f & FLAG_COPYOUT)
769 {
770 goacc_aq aq = get_goacc_asyncqueue (async);
771 gomp_copy_dev2host (acc_dev, aq, h, d, s);
772 }
773 gomp_remove_var (acc_dev, n);
774 }
775
776 gomp_mutex_unlock (&acc_dev->lock);
777
778 if (profiling_p)
779 {
780 thr->prof_info = NULL;
781 thr->api_info = NULL;
782 }
783 }
784
785 void
786 acc_delete (void *h , size_t s)
787 {
788 delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
789 }
790
791 void
792 acc_delete_async (void *h , size_t s, int async)
793 {
794 delete_copyout (0, h, s, async, __FUNCTION__);
795 }
796
797 void
798 acc_delete_finalize (void *h , size_t s)
799 {
800 delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__);
801 }
802
803 void
804 acc_delete_finalize_async (void *h , size_t s, int async)
805 {
806 delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__);
807 }
808
809 void
810 acc_copyout (void *h, size_t s)
811 {
812 delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
813 }
814
815 void
816 acc_copyout_async (void *h, size_t s, int async)
817 {
818 delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
819 }
820
821 void
822 acc_copyout_finalize (void *h, size_t s)
823 {
824 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync,
825 __FUNCTION__);
826 }
827
828 void
829 acc_copyout_finalize_async (void *h, size_t s, int async)
830 {
831 delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__);
832 }
833
834 static void
835 update_dev_host (int is_dev, void *h, size_t s, int async)
836 {
837 splay_tree_key n;
838 void *d;
839
840 goacc_lazy_initialize ();
841
842 struct goacc_thread *thr = goacc_thread ();
843 struct gomp_device_descr *acc_dev = thr->dev;
844
845 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
846 return;
847
848 /* Fortran optional arguments that are non-present result in a
849 NULL host address here. This can safely be ignored as it is
850 not possible to 'update' a non-present optional argument. */
851 if (h == NULL)
852 return;
853
854 acc_prof_info prof_info;
855 acc_api_info api_info;
856 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
857 if (profiling_p)
858 {
859 prof_info.async = async;
860 prof_info.async_queue = prof_info.async;
861 }
862
863 gomp_mutex_lock (&acc_dev->lock);
864
865 n = lookup_host (acc_dev, h, s);
866
867 if (!n)
868 {
869 gomp_mutex_unlock (&acc_dev->lock);
870 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
871 }
872
873 d = (void *) (n->tgt->tgt_start + n->tgt_offset
874 + (uintptr_t) h - n->host_start);
875
876 goacc_aq aq = get_goacc_asyncqueue (async);
877
878 if (is_dev)
879 gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
880 else
881 gomp_copy_dev2host (acc_dev, aq, h, d, s);
882
883 gomp_mutex_unlock (&acc_dev->lock);
884
885 if (profiling_p)
886 {
887 thr->prof_info = NULL;
888 thr->api_info = NULL;
889 }
890 }
891
892 void
893 acc_update_device (void *h, size_t s)
894 {
895 update_dev_host (1, h, s, acc_async_sync);
896 }
897
898 void
899 acc_update_device_async (void *h, size_t s, int async)
900 {
901 update_dev_host (1, h, s, async);
902 }
903
904 void
905 acc_update_self (void *h, size_t s)
906 {
907 update_dev_host (0, h, s, acc_async_sync);
908 }
909
910 void
911 acc_update_self_async (void *h, size_t s, int async)
912 {
913 update_dev_host (0, h, s, async);
914 }
915
916 void
917 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
918 void *kinds, int async)
919 {
920 struct target_mem_desc *tgt;
921 struct goacc_thread *thr = goacc_thread ();
922 struct gomp_device_descr *acc_dev = thr->dev;
923
924 if (*hostaddrs == NULL)
925 return;
926
927 if (acc_is_present (*hostaddrs, *sizes))
928 {
929 splay_tree_key n;
930 gomp_mutex_lock (&acc_dev->lock);
931 n = lookup_host (acc_dev, *hostaddrs, *sizes);
932 gomp_mutex_unlock (&acc_dev->lock);
933
934 tgt = n->tgt;
935 for (size_t i = 0; i < tgt->list_count; i++)
936 if (tgt->list[i].key == n)
937 {
938 for (size_t j = 0; j < mapnum; j++)
939 if (i + j < tgt->list_count && tgt->list[i + j].key)
940 {
941 tgt->list[i + j].key->refcount++;
942 tgt->list[i + j].key->dynamic_refcount++;
943 }
944 return;
945 }
946 /* Should not reach here. */
947 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
948 }
949
950 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
951 goacc_aq aq = get_goacc_asyncqueue (async);
952 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
953 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
954 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
955
956 /* Initialize dynamic refcount. */
957 tgt->list[0].key->dynamic_refcount = 1;
958
959 gomp_mutex_lock (&acc_dev->lock);
960 tgt->prev = acc_dev->openacc.data_environ;
961 acc_dev->openacc.data_environ = tgt;
962 gomp_mutex_unlock (&acc_dev->lock);
963 }
964
965 void
966 gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
967 int finalize, int mapnum)
968 {
969 struct goacc_thread *thr = goacc_thread ();
970 struct gomp_device_descr *acc_dev = thr->dev;
971 splay_tree_key n;
972 struct target_mem_desc *t;
973 int minrefs = (mapnum == 1) ? 2 : 3;
974
975 if (!acc_is_present (h, s))
976 return;
977
978 gomp_mutex_lock (&acc_dev->lock);
979
980 n = lookup_host (acc_dev, h, 1);
981
982 if (!n)
983 {
984 gomp_mutex_unlock (&acc_dev->lock);
985 gomp_fatal ("%p is not a mapped block", (void *)h);
986 }
987
988 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
989
990 t = n->tgt;
991
992 if (n->refcount < n->dynamic_refcount)
993 {
994 gomp_mutex_unlock (&acc_dev->lock);
995 gomp_fatal ("Dynamic reference counting assert fail\n");
996 }
997
998 if (finalize)
999 {
1000 n->refcount -= n->dynamic_refcount;
1001 n->dynamic_refcount = 0;
1002 }
1003 else if (n->dynamic_refcount)
1004 {
1005 n->dynamic_refcount--;
1006 n->refcount--;
1007 }
1008
1009 gomp_mutex_unlock (&acc_dev->lock);
1010
1011 if (n->refcount == 0)
1012 {
1013 if (t->refcount == minrefs)
1014 {
1015 /* This is the last reference, so pull the descriptor off the
1016 chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
1017 freeing the device memory. */
1018 struct target_mem_desc *tp;
1019 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
1020 tp = t, t = t->prev)
1021 {
1022 if (n->tgt == t)
1023 {
1024 if (tp)
1025 tp->prev = t->prev;
1026 else
1027 acc_dev->openacc.data_environ = t->prev;
1028 break;
1029 }
1030 }
1031 }
1032
1033 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
1034 n->refcount = 1;
1035 t->refcount = minrefs;
1036 for (size_t i = 0; i < t->list_count; i++)
1037 if (t->list[i].key == n)
1038 {
1039 t->list[i].copy_from = force_copyfrom ? 1 : 0;
1040 break;
1041 }
1042
1043 /* If running synchronously, unmap immediately. */
1044 if (async < acc_async_noval)
1045 gomp_unmap_vars (t, true);
1046 else
1047 {
1048 goacc_aq aq = get_goacc_asyncqueue (async);
1049 gomp_unmap_vars_async (t, true, aq);
1050 }
1051 }
1052
1053 gomp_mutex_unlock (&acc_dev->lock);
1054
1055 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
1056 }