1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "gomp-constants.h"
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
42 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
44 struct splay_tree_key_s node
;
47 node
.host_start
= (uintptr_t) h
;
48 node
.host_end
= (uintptr_t) h
+ s
;
50 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
62 lookup_dev (struct target_mem_desc
*tgt
, void *d
, size_t s
)
65 struct target_mem_desc
*t
;
70 for (t
= tgt
; t
!= NULL
; t
= t
->prev
)
72 if (t
->tgt_start
<= (uintptr_t) d
&& t
->tgt_end
>= (uintptr_t) d
+ s
)
79 for (i
= 0; i
< t
->list_count
; i
++)
83 splay_tree_key k
= &t
->array
[i
].key
;
84 offset
= d
- t
->tgt_start
+ k
->tgt_offset
;
86 if (k
->host_start
+ offset
<= (void *) k
->host_end
)
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
102 goacc_lazy_initialize ();
104 struct goacc_thread
*thr
= goacc_thread ();
108 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
111 return thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115 the device address is mapped. We choose to check if it mapped,
116 and if it is, to unmap it. */
125 struct goacc_thread
*thr
= goacc_thread ();
127 assert (thr
&& thr
->dev
);
129 struct gomp_device_descr
*acc_dev
= thr
->dev
;
131 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
134 gomp_mutex_lock (&acc_dev
->lock
);
136 /* We don't have to call lazy open here, as the ptr value must have
137 been returned by acc_malloc. It's not permitted to pass NULL in
138 (unless you got that null from acc_malloc). */
139 if ((k
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1)))
143 offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
145 gomp_mutex_unlock (&acc_dev
->lock
);
147 acc_unmap_data ((void *)(k
->host_start
+ offset
));
150 gomp_mutex_unlock (&acc_dev
->lock
);
152 acc_dev
->free_func (acc_dev
->target_id
, d
);
156 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
158 /* No need to call lazy open here, as the device pointer must have
159 been obtained from a routine that did that. */
160 struct goacc_thread
*thr
= goacc_thread ();
162 assert (thr
&& thr
->dev
);
164 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
170 thr
->dev
->host2dev_func (thr
->dev
->target_id
, d
, h
, s
);
174 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
176 /* No need to call lazy open here, as the device pointer must have
177 been obtained from a routine that did that. */
178 struct goacc_thread
*thr
= goacc_thread ();
180 assert (thr
&& thr
->dev
);
182 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
188 thr
->dev
->dev2host_func (thr
->dev
->target_id
, h
, d
, s
);
191 /* Return the device pointer that corresponds to host data H. Or NULL
195 acc_deviceptr (void *h
)
201 goacc_lazy_initialize ();
203 struct goacc_thread
*thr
= goacc_thread ();
204 struct gomp_device_descr
*dev
= thr
->dev
;
206 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
209 gomp_mutex_lock (&dev
->lock
);
211 n
= lookup_host (dev
, h
, 1);
215 gomp_mutex_unlock (&dev
->lock
);
219 offset
= h
- n
->host_start
;
221 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
223 gomp_mutex_unlock (&dev
->lock
);
228 /* Return the host pointer that corresponds to device data D. Or NULL
232 acc_hostptr (void *d
)
238 goacc_lazy_initialize ();
240 struct goacc_thread
*thr
= goacc_thread ();
241 struct gomp_device_descr
*acc_dev
= thr
->dev
;
243 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
246 gomp_mutex_lock (&acc_dev
->lock
);
248 n
= lookup_dev (acc_dev
->openacc
.data_environ
, d
, 1);
252 gomp_mutex_unlock (&acc_dev
->lock
);
256 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
258 h
= n
->host_start
+ offset
;
260 gomp_mutex_unlock (&acc_dev
->lock
);
265 /* Return 1 if host data [H,+S] is present on the device. */
268 acc_is_present (void *h
, size_t s
)
275 goacc_lazy_initialize ();
277 struct goacc_thread
*thr
= goacc_thread ();
278 struct gomp_device_descr
*acc_dev
= thr
->dev
;
280 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
283 gomp_mutex_lock (&acc_dev
->lock
);
285 n
= lookup_host (acc_dev
, h
, s
);
287 if (n
&& ((uintptr_t)h
< n
->host_start
288 || (uintptr_t)h
+ s
> n
->host_end
289 || s
> n
->host_end
- n
->host_start
))
292 gomp_mutex_unlock (&acc_dev
->lock
);
297 /* Create a mapping for host [H,+S] -> device [D,+S] */
300 acc_map_data (void *h
, void *d
, size_t s
)
302 struct target_mem_desc
*tgt
= NULL
;
307 unsigned short kinds
= GOMP_MAP_ALLOC
;
309 goacc_lazy_initialize ();
311 struct goacc_thread
*thr
= goacc_thread ();
312 struct gomp_device_descr
*acc_dev
= thr
->dev
;
314 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
317 gomp_fatal ("cannot map data on shared-memory system");
321 struct goacc_thread
*thr
= goacc_thread ();
324 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
325 (void *)h
, (int)s
, (void *)d
, (int)s
);
327 gomp_mutex_lock (&acc_dev
->lock
);
329 if (lookup_host (acc_dev
, h
, s
))
331 gomp_mutex_unlock (&acc_dev
->lock
);
332 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
336 if (lookup_dev (thr
->dev
->openacc
.data_environ
, d
, s
))
338 gomp_mutex_unlock (&acc_dev
->lock
);
339 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
343 gomp_mutex_unlock (&acc_dev
->lock
);
345 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
346 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
349 gomp_mutex_lock (&acc_dev
->lock
);
350 tgt
->prev
= acc_dev
->openacc
.data_environ
;
351 acc_dev
->openacc
.data_environ
= tgt
;
352 gomp_mutex_unlock (&acc_dev
->lock
);
356 acc_unmap_data (void *h
)
358 struct goacc_thread
*thr
= goacc_thread ();
359 struct gomp_device_descr
*acc_dev
= thr
->dev
;
361 /* No need to call lazy open, as the address must have been mapped. */
363 /* This is a no-op on shared-memory targets. */
364 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
369 gomp_mutex_lock (&acc_dev
->lock
);
371 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
372 struct target_mem_desc
*t
;
376 gomp_mutex_unlock (&acc_dev
->lock
);
377 gomp_fatal ("%p is not a mapped block", (void *)h
);
380 host_size
= n
->host_end
- n
->host_start
;
382 if (n
->host_start
!= (uintptr_t) h
)
384 gomp_mutex_unlock (&acc_dev
->lock
);
385 gomp_fatal ("[%p,%d] surrounds %p",
386 (void *) n
->host_start
, (int) host_size
, (void *) h
);
391 if (t
->refcount
== 2)
393 struct target_mem_desc
*tp
;
395 /* This is the last reference, so pull the descriptor off the
396 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
397 freeing the device memory. */
401 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
408 acc_dev
->openacc
.data_environ
= t
->prev
;
414 gomp_mutex_unlock (&acc_dev
->lock
);
416 gomp_unmap_vars (t
, true);
419 #define FLAG_PRESENT (1 << 0)
420 #define FLAG_CREATE (1 << 1)
421 #define FLAG_COPY (1 << 2)
424 present_create_copy (unsigned f
, void *h
, size_t s
)
430 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
432 goacc_lazy_initialize ();
434 struct goacc_thread
*thr
= goacc_thread ();
435 struct gomp_device_descr
*acc_dev
= thr
->dev
;
437 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
440 gomp_mutex_lock (&acc_dev
->lock
);
442 n
= lookup_host (acc_dev
, h
, s
);
446 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
);
448 if (!(f
& FLAG_PRESENT
))
450 gomp_mutex_unlock (&acc_dev
->lock
);
451 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
452 (void *)h
, (int)s
, (void *)d
, (int)s
);
454 if ((h
+ s
) > (void *)n
->host_end
)
456 gomp_mutex_unlock (&acc_dev
->lock
);
457 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
460 gomp_mutex_unlock (&acc_dev
->lock
);
462 else if (!(f
& FLAG_CREATE
))
464 gomp_mutex_unlock (&acc_dev
->lock
);
465 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
469 struct target_mem_desc
*tgt
;
471 unsigned short kinds
;
477 kinds
= GOMP_MAP_ALLOC
;
479 gomp_mutex_unlock (&acc_dev
->lock
);
481 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, NULL
, &s
, &kinds
, true,
482 GOMP_MAP_VARS_OPENACC
);
484 gomp_mutex_lock (&acc_dev
->lock
);
487 tgt
->prev
= acc_dev
->openacc
.data_environ
;
488 acc_dev
->openacc
.data_environ
= tgt
;
490 gomp_mutex_unlock (&acc_dev
->lock
);
497 acc_create (void *h
, size_t s
)
499 return present_create_copy (FLAG_CREATE
, h
, s
);
503 acc_copyin (void *h
, size_t s
)
505 return present_create_copy (FLAG_CREATE
| FLAG_COPY
, h
, s
);
509 acc_present_or_create (void *h
, size_t s
)
511 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
);
515 acc_present_or_copyin (void *h
, size_t s
)
517 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
);
520 #define FLAG_COPYOUT (1 << 0)
523 delete_copyout (unsigned f
, void *h
, size_t s
)
528 struct goacc_thread
*thr
= goacc_thread ();
529 struct gomp_device_descr
*acc_dev
= thr
->dev
;
531 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
534 gomp_mutex_lock (&acc_dev
->lock
);
536 n
= lookup_host (acc_dev
, h
, s
);
538 /* No need to call lazy open, as the data must already have been
543 gomp_mutex_unlock (&acc_dev
->lock
);
544 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
547 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
548 + (uintptr_t) h
- n
->host_start
);
550 host_size
= n
->host_end
- n
->host_start
;
552 if (n
->host_start
!= (uintptr_t) h
|| host_size
!= s
)
554 gomp_mutex_unlock (&acc_dev
->lock
);
555 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
556 (void *) n
->host_start
, (int) host_size
, (void *) h
, (int) s
);
559 gomp_mutex_unlock (&acc_dev
->lock
);
561 if (f
& FLAG_COPYOUT
)
562 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
566 acc_dev
->free_func (acc_dev
->target_id
, d
);
570 acc_delete (void *h
, size_t s
)
572 delete_copyout (0, h
, s
);
575 void acc_copyout (void *h
, size_t s
)
577 delete_copyout (FLAG_COPYOUT
, h
, s
);
581 update_dev_host (int is_dev
, void *h
, size_t s
)
586 goacc_lazy_initialize ();
588 struct goacc_thread
*thr
= goacc_thread ();
589 struct gomp_device_descr
*acc_dev
= thr
->dev
;
591 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
594 gomp_mutex_lock (&acc_dev
->lock
);
596 n
= lookup_host (acc_dev
, h
, s
);
600 gomp_mutex_unlock (&acc_dev
->lock
);
601 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
604 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
605 + (uintptr_t) h
- n
->host_start
);
607 gomp_mutex_unlock (&acc_dev
->lock
);
610 acc_dev
->host2dev_func (acc_dev
->target_id
, d
, h
, s
);
612 acc_dev
->dev2host_func (acc_dev
->target_id
, h
, d
, s
);
616 acc_update_device (void *h
, size_t s
)
618 update_dev_host (1, h
, s
);
622 acc_update_self (void *h
, size_t s
)
624 update_dev_host (0, h
, s
);
628 gomp_acc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
631 struct target_mem_desc
*tgt
;
632 struct goacc_thread
*thr
= goacc_thread ();
633 struct gomp_device_descr
*acc_dev
= thr
->dev
;
635 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
636 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
,
637 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
638 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
640 gomp_mutex_lock (&acc_dev
->lock
);
641 tgt
->prev
= acc_dev
->openacc
.data_environ
;
642 acc_dev
->openacc
.data_environ
= tgt
;
643 gomp_mutex_unlock (&acc_dev
->lock
);
647 gomp_acc_remove_pointer (void *h
, bool force_copyfrom
, int async
, int mapnum
)
649 struct goacc_thread
*thr
= goacc_thread ();
650 struct gomp_device_descr
*acc_dev
= thr
->dev
;
652 struct target_mem_desc
*t
;
653 int minrefs
= (mapnum
== 1) ? 2 : 3;
655 gomp_mutex_lock (&acc_dev
->lock
);
657 n
= lookup_host (acc_dev
, h
, 1);
661 gomp_mutex_unlock (&acc_dev
->lock
);
662 gomp_fatal ("%p is not a mapped block", (void *)h
);
665 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
669 struct target_mem_desc
*tp
;
671 if (t
->refcount
== minrefs
)
673 /* This is the last reference, so pull the descriptor off the
674 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
675 freeing the device memory. */
679 for (tp
= NULL
, t
= acc_dev
->openacc
.data_environ
; t
!= NULL
;
687 acc_dev
->openacc
.data_environ
= t
->prev
;
694 t
->list
[0].copy_from
= 1;
696 gomp_mutex_unlock (&acc_dev
->lock
);
698 /* If running synchronously, unmap immediately. */
699 if (async
< acc_async_noval
)
700 gomp_unmap_vars (t
, true);
703 gomp_copy_from_async (t
);
704 acc_dev
->openacc
.register_async_cleanup_func (t
);
707 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);