1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
121 gomp_mutex_lock (&devices
[device_id
].lock
);
122 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
123 gomp_init_device (&devices
[device_id
]);
124 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
126 gomp_mutex_unlock (&devices
[device_id
].lock
);
129 gomp_mutex_unlock (&devices
[device_id
].lock
);
131 return &devices
[device_id
];
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
138 if (key
->host_start
!= key
->host_end
)
139 return splay_tree_lookup (mem_map
, key
);
142 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
147 n
= splay_tree_lookup (mem_map
, key
);
151 return splay_tree_lookup (mem_map
, key
);
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
157 if (key
->host_start
!= key
->host_end
)
158 return splay_tree_lookup (mem_map
, key
);
161 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
167 gomp_device_copy (struct gomp_device_descr
*devicep
,
168 bool (*copy_func
) (int, void *, const void *, size_t),
169 const char *dst
, void *dstaddr
,
170 const char *src
, const void *srcaddr
,
173 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
175 gomp_mutex_unlock (&devicep
->lock
);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
182 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
183 bool (*copy_func
) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue
*),
185 const char *dst
, void *dstaddr
,
186 const char *src
, const void *srcaddr
,
187 size_t size
, struct goacc_asyncqueue
*aq
)
189 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
191 gomp_mutex_unlock (&devicep
->lock
);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
200 struct gomp_coalesce_chunk
202 /* The starting and ending point of a coalesced chunk of memory. */
206 struct gomp_coalesce_buf
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
211 struct target_mem_desc
*tgt
;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk
*chunks
;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
237 /* Add region with device tgt_start relative offset and length to CBUF. */
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
242 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
246 if (cbuf
->chunk_cnt
< 0)
248 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
250 cbuf
->chunk_cnt
= -1;
253 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
255 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf
->use_cnt
== 1)
265 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
266 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
275 gomp_to_device_kind_p (int kind
)
281 case GOMP_MAP_FORCE_ALLOC
:
282 case GOMP_MAP_FORCE_FROM
:
283 case GOMP_MAP_ALWAYS_FROM
:
290 attribute_hidden
void
291 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
292 struct goacc_asyncqueue
*aq
,
293 void *d
, const void *h
, size_t sz
,
294 struct gomp_coalesce_buf
*cbuf
)
298 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
299 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
302 long last
= cbuf
->chunk_cnt
- 1;
303 while (first
<= last
)
305 long middle
= (first
+ last
) >> 1;
306 if (cbuf
->chunks
[middle
].end
<= doff
)
308 else if (cbuf
->chunks
[middle
].start
<= doff
)
310 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
321 if (__builtin_expect (aq
!= NULL
, 0))
322 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
323 "dev", d
, "host", h
, sz
, aq
);
325 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
328 attribute_hidden
void
329 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
330 struct goacc_asyncqueue
*aq
,
331 void *h
, const void *d
, size_t sz
)
333 if (__builtin_expect (aq
!= NULL
, 0))
334 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
335 "host", h
, "dev", d
, sz
, aq
);
337 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
341 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
343 if (!devicep
->free_func (devicep
->target_id
, devptr
))
345 gomp_mutex_unlock (&devicep
->lock
);
346 gomp_fatal ("error in freeing device memory block at %p", devptr
);
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
355 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
356 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
357 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
358 unsigned char kind
, struct gomp_coalesce_buf
*cbuf
)
361 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
362 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
363 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
364 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
366 if ((kind
& GOMP_MAP_FLAG_FORCE
)
367 || oldn
->host_start
> newn
->host_start
368 || oldn
->host_end
< newn
->host_end
)
370 gomp_mutex_unlock (&devicep
->lock
);
371 gomp_fatal ("Trying to map into device [%p..%p) object when "
372 "[%p..%p) is already mapped",
373 (void *) newn
->host_start
, (void *) newn
->host_end
,
374 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
377 if (GOMP_MAP_ALWAYS_TO_P (kind
))
378 gomp_copy_host2dev (devicep
, aq
,
379 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
380 + newn
->host_start
- oldn
->host_start
),
381 (void *) newn
->host_start
,
382 newn
->host_end
- newn
->host_start
, cbuf
);
384 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
389 get_kind (bool short_mapkind
, void *kinds
, int idx
)
391 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
392 : ((unsigned char *) kinds
)[idx
];
396 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
397 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
398 struct gomp_coalesce_buf
*cbuf
)
400 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
401 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
402 struct splay_tree_key_s cur_node
;
404 cur_node
.host_start
= host_ptr
;
405 if (cur_node
.host_start
== (uintptr_t) NULL
)
407 cur_node
.tgt_offset
= (uintptr_t) NULL
;
408 gomp_copy_host2dev (devicep
, aq
,
409 (void *) (tgt
->tgt_start
+ target_offset
),
410 (void *) &cur_node
.tgt_offset
,
411 sizeof (void *), cbuf
);
414 /* Add bias to the pointer value. */
415 cur_node
.host_start
+= bias
;
416 cur_node
.host_end
= cur_node
.host_start
;
417 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
420 gomp_mutex_unlock (&devicep
->lock
);
421 gomp_fatal ("Pointer target of array section wasn't mapped");
423 cur_node
.host_start
-= n
->host_start
;
425 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
426 /* At this point tgt_offset is target address of the
427 array section. Now subtract bias to get what we want
428 to initialize the pointer with. */
429 cur_node
.tgt_offset
-= bias
;
430 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
431 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
435 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
436 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
437 size_t first
, size_t i
, void **hostaddrs
,
438 size_t *sizes
, void *kinds
,
439 struct gomp_coalesce_buf
*cbuf
)
441 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
442 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
443 struct splay_tree_key_s cur_node
;
445 const bool short_mapkind
= true;
446 const int typemask
= short_mapkind
? 0xff : 0x7;
448 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
449 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
450 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
451 kind
= get_kind (short_mapkind
, kinds
, i
);
454 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
456 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
457 &tgt
->list
[i
], kind
& typemask
, cbuf
);
462 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
464 cur_node
.host_start
--;
465 n2
= splay_tree_lookup (mem_map
, &cur_node
);
466 cur_node
.host_start
++;
469 && n2
->host_start
- n
->host_start
470 == n2
->tgt_offset
- n
->tgt_offset
)
472 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
,
473 &tgt
->list
[i
], kind
& typemask
, cbuf
);
478 n2
= splay_tree_lookup (mem_map
, &cur_node
);
482 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
484 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
485 kind
& typemask
, cbuf
);
489 gomp_mutex_unlock (&devicep
->lock
);
490 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
491 "other mapped elements from the same structure weren't mapped "
492 "together with it", (void *) cur_node
.host_start
,
493 (void *) cur_node
.host_end
);
496 attribute_hidden
uintptr_t
497 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
499 if (tgt
->list
[i
].key
!= NULL
)
500 return tgt
->list
[i
].key
->tgt
->tgt_start
501 + tgt
->list
[i
].key
->tgt_offset
502 + tgt
->list
[i
].offset
;
504 switch (tgt
->list
[i
].offset
)
507 return (uintptr_t) hostaddrs
[i
];
513 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
514 + tgt
->list
[i
+ 1].key
->tgt_offset
515 + tgt
->list
[i
+ 1].offset
516 + (uintptr_t) hostaddrs
[i
]
517 - (uintptr_t) hostaddrs
[i
+ 1];
520 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
524 static inline __attribute__((always_inline
)) struct target_mem_desc
*
525 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
526 struct goacc_asyncqueue
*aq
, size_t mapnum
,
527 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
528 void *kinds
, bool short_mapkind
,
529 enum gomp_map_vars_kind pragma_kind
)
531 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
532 bool has_firstprivate
= false;
533 const int rshift
= short_mapkind
? 8 : 3;
534 const int typemask
= short_mapkind
? 0xff : 0x7;
535 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
536 struct splay_tree_key_s cur_node
;
537 struct target_mem_desc
*tgt
538 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
539 tgt
->list_count
= mapnum
;
540 tgt
->refcount
= (pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
541 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
) ? 0 : 1;
542 tgt
->device_descr
= devicep
;
544 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
553 tgt_align
= sizeof (void *);
559 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
561 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
562 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
565 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
567 size_t align
= 4 * sizeof (void *);
569 tgt_size
= mapnum
* sizeof (void *);
571 cbuf
.use_cnt
= 1 + (mapnum
> 1);
572 cbuf
.chunks
[0].start
= 0;
573 cbuf
.chunks
[0].end
= tgt_size
;
576 gomp_mutex_lock (&devicep
->lock
);
577 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
579 gomp_mutex_unlock (&devicep
->lock
);
584 for (i
= 0; i
< mapnum
; i
++)
586 int kind
= get_kind (short_mapkind
, kinds
, i
);
587 if (hostaddrs
[i
] == NULL
588 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
590 tgt
->list
[i
].key
= NULL
;
591 tgt
->list
[i
].offset
= OFFSET_INLINED
;
594 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
596 tgt
->list
[i
].key
= NULL
;
599 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
600 on a separate construct prior to using use_device_{addr,ptr}.
601 In OpenMP 5.0, map directives need to be ordered by the
602 middle-end before the use_device_* clauses. If
603 !not_found_cnt, all mappings requested (if any) are already
604 mapped, so use_device_{addr,ptr} can be resolved right away.
605 Otherwise, if not_found_cnt, gomp_map_lookup might fail
606 now but would succeed after performing the mappings in the
607 following loop. We can't defer this always to the second
608 loop, because it is not even invoked when !not_found_cnt
609 after the first loop. */
610 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
611 cur_node
.host_end
= cur_node
.host_start
;
612 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
615 gomp_mutex_unlock (&devicep
->lock
);
616 gomp_fatal ("use_device_ptr pointer wasn't mapped");
618 cur_node
.host_start
-= n
->host_start
;
620 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
621 + cur_node
.host_start
);
622 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
625 tgt
->list
[i
].offset
= 0;
628 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
630 size_t first
= i
+ 1;
631 size_t last
= i
+ sizes
[i
];
632 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
633 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
635 tgt
->list
[i
].key
= NULL
;
636 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
637 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
640 size_t align
= (size_t) 1 << (kind
>> rshift
);
641 if (tgt_align
< align
)
643 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
644 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
645 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
646 not_found_cnt
+= last
- i
;
647 for (i
= first
; i
<= last
; i
++)
649 tgt
->list
[i
].key
= NULL
;
650 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
652 gomp_coalesce_buf_add (&cbuf
,
653 tgt_size
- cur_node
.host_end
654 + (uintptr_t) hostaddrs
[i
],
660 for (i
= first
; i
<= last
; i
++)
661 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
666 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
668 tgt
->list
[i
].key
= NULL
;
669 tgt
->list
[i
].offset
= OFFSET_POINTER
;
670 has_firstprivate
= true;
673 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
674 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
675 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
677 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
678 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
680 tgt
->list
[i
].key
= NULL
;
682 size_t align
= (size_t) 1 << (kind
>> rshift
);
683 if (tgt_align
< align
)
685 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
686 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
687 cur_node
.host_end
- cur_node
.host_start
);
688 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
689 has_firstprivate
= true;
693 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
695 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
698 tgt
->list
[i
].key
= NULL
;
699 tgt
->list
[i
].offset
= OFFSET_POINTER
;
704 n
= splay_tree_lookup (mem_map
, &cur_node
);
705 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
706 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
707 kind
& typemask
, NULL
);
710 tgt
->list
[i
].key
= NULL
;
712 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
714 /* Not present, hence, skip entry - including its MAP_POINTER,
716 tgt
->list
[i
].offset
= OFFSET_POINTER
;
718 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
719 == GOMP_MAP_POINTER
))
722 tgt
->list
[i
].key
= NULL
;
723 tgt
->list
[i
].offset
= 0;
727 size_t align
= (size_t) 1 << (kind
>> rshift
);
729 if (tgt_align
< align
)
731 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
732 if (gomp_to_device_kind_p (kind
& typemask
))
733 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
734 cur_node
.host_end
- cur_node
.host_start
);
735 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
736 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
739 for (j
= i
+ 1; j
< mapnum
; j
++)
740 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
, j
)
743 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
744 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
745 > cur_node
.host_end
))
749 tgt
->list
[j
].key
= NULL
;
760 gomp_mutex_unlock (&devicep
->lock
);
761 gomp_fatal ("unexpected aggregation");
763 tgt
->to_free
= devaddrs
[0];
764 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
765 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
767 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
769 /* Allocate tgt_align aligned tgt_size block of memory. */
770 /* FIXME: Perhaps change interface to allocate properly aligned
772 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
773 tgt_size
+ tgt_align
- 1);
776 gomp_mutex_unlock (&devicep
->lock
);
777 gomp_fatal ("device memory allocation fail");
780 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
781 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
782 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
784 if (cbuf
.use_cnt
== 1)
786 if (cbuf
.chunk_cnt
> 0)
789 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
805 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
806 tgt_size
= mapnum
* sizeof (void *);
809 if (not_found_cnt
|| has_firstprivate
)
812 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
813 splay_tree_node array
= tgt
->array
;
814 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= ~(size_t) 0;
815 uintptr_t field_tgt_base
= 0;
817 for (i
= 0; i
< mapnum
; i
++)
818 if (tgt
->list
[i
].key
== NULL
)
820 int kind
= get_kind (short_mapkind
, kinds
, i
);
821 if (hostaddrs
[i
] == NULL
)
823 switch (kind
& typemask
)
825 size_t align
, len
, first
, last
;
827 case GOMP_MAP_FIRSTPRIVATE
:
828 align
= (size_t) 1 << (kind
>> rshift
);
829 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
830 tgt
->list
[i
].offset
= tgt_size
;
832 gomp_copy_host2dev (devicep
, aq
,
833 (void *) (tgt
->tgt_start
+ tgt_size
),
834 (void *) hostaddrs
[i
], len
, cbufp
);
837 case GOMP_MAP_FIRSTPRIVATE_INT
:
838 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
840 case GOMP_MAP_USE_DEVICE_PTR
:
841 if (tgt
->list
[i
].offset
== 0)
843 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
844 cur_node
.host_end
= cur_node
.host_start
;
845 n
= gomp_map_lookup (mem_map
, &cur_node
);
848 gomp_mutex_unlock (&devicep
->lock
);
849 gomp_fatal ("use_device_ptr pointer wasn't mapped");
851 cur_node
.host_start
-= n
->host_start
;
853 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
854 + cur_node
.host_start
);
855 tgt
->list
[i
].offset
= ~(uintptr_t) 0;
858 case GOMP_MAP_STRUCT
:
861 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
862 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
864 if (tgt
->list
[first
].key
!= NULL
)
866 n
= splay_tree_lookup (mem_map
, &cur_node
);
869 size_t align
= (size_t) 1 << (kind
>> rshift
);
870 tgt_size
-= (uintptr_t) hostaddrs
[first
]
871 - (uintptr_t) hostaddrs
[i
];
872 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
873 tgt_size
+= (uintptr_t) hostaddrs
[first
]
874 - (uintptr_t) hostaddrs
[i
];
875 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
876 field_tgt_offset
= tgt_size
;
877 field_tgt_clear
= last
;
878 tgt_size
+= cur_node
.host_end
879 - (uintptr_t) hostaddrs
[first
];
882 for (i
= first
; i
<= last
; i
++)
883 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
884 sizes
, kinds
, cbufp
);
887 case GOMP_MAP_ALWAYS_POINTER
:
888 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
889 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
890 n
= splay_tree_lookup (mem_map
, &cur_node
);
892 || n
->host_start
> cur_node
.host_start
893 || n
->host_end
< cur_node
.host_end
)
895 gomp_mutex_unlock (&devicep
->lock
);
896 gomp_fatal ("always pointer not mapped");
898 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
899 != GOMP_MAP_ALWAYS_POINTER
)
900 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
901 if (cur_node
.tgt_offset
)
902 cur_node
.tgt_offset
-= sizes
[i
];
903 gomp_copy_host2dev (devicep
, aq
,
904 (void *) (n
->tgt
->tgt_start
906 + cur_node
.host_start
908 (void *) &cur_node
.tgt_offset
,
909 sizeof (void *), cbufp
);
910 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
911 + cur_node
.host_start
- n
->host_start
;
913 case GOMP_MAP_IF_PRESENT
:
914 /* Not present - otherwise handled above. Skip over its
915 MAP_POINTER as well. */
917 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
918 == GOMP_MAP_POINTER
))
924 splay_tree_key k
= &array
->key
;
925 k
->host_start
= (uintptr_t) hostaddrs
[i
];
926 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
927 k
->host_end
= k
->host_start
+ sizes
[i
];
929 k
->host_end
= k
->host_start
+ sizeof (void *);
930 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
931 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
932 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
933 kind
& typemask
, cbufp
);
937 if (n
&& n
->refcount
== REFCOUNT_LINK
)
939 /* Replace target address of the pointer with target address
940 of mapped object in the splay tree. */
941 splay_tree_remove (mem_map
, n
);
943 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
944 k
->aux
->link_key
= n
;
946 size_t align
= (size_t) 1 << (kind
>> rshift
);
947 tgt
->list
[i
].key
= k
;
949 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
951 k
->tgt_offset
= k
->host_start
- field_tgt_base
953 if (i
== field_tgt_clear
)
954 field_tgt_clear
= FIELD_TGT_EMPTY
;
958 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
959 k
->tgt_offset
= tgt_size
;
960 tgt_size
+= k
->host_end
- k
->host_start
;
962 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
963 tgt
->list
[i
].always_copy_from
964 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
965 tgt
->list
[i
].offset
= 0;
966 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
968 k
->virtual_refcount
= 0;
972 splay_tree_insert (mem_map
, array
);
973 switch (kind
& typemask
)
977 case GOMP_MAP_FORCE_ALLOC
:
978 case GOMP_MAP_FORCE_FROM
:
979 case GOMP_MAP_ALWAYS_FROM
:
982 case GOMP_MAP_TOFROM
:
983 case GOMP_MAP_FORCE_TO
:
984 case GOMP_MAP_FORCE_TOFROM
:
985 case GOMP_MAP_ALWAYS_TO
:
986 case GOMP_MAP_ALWAYS_TOFROM
:
987 gomp_copy_host2dev (devicep
, aq
,
988 (void *) (tgt
->tgt_start
990 (void *) k
->host_start
,
991 k
->host_end
- k
->host_start
, cbufp
);
993 case GOMP_MAP_POINTER
:
994 gomp_map_pointer (tgt
, aq
,
995 (uintptr_t) *(void **) k
->host_start
,
996 k
->tgt_offset
, sizes
[i
], cbufp
);
998 case GOMP_MAP_TO_PSET
:
999 gomp_copy_host2dev (devicep
, aq
,
1000 (void *) (tgt
->tgt_start
1002 (void *) k
->host_start
,
1003 k
->host_end
- k
->host_start
, cbufp
);
1005 for (j
= i
+ 1; j
< mapnum
; j
++)
1006 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind
, kinds
,
1010 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1011 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1016 tgt
->list
[j
].key
= k
;
1017 tgt
->list
[j
].copy_from
= false;
1018 tgt
->list
[j
].always_copy_from
= false;
1019 if (k
->refcount
!= REFCOUNT_INFINITY
)
1021 gomp_map_pointer (tgt
, aq
,
1022 (uintptr_t) *(void **) hostaddrs
[j
],
1024 + ((uintptr_t) hostaddrs
[j
]
1030 case GOMP_MAP_FORCE_PRESENT
:
1032 /* We already looked up the memory region above and it
1034 size_t size
= k
->host_end
- k
->host_start
;
1035 gomp_mutex_unlock (&devicep
->lock
);
1036 #ifdef HAVE_INTTYPES_H
1037 gomp_fatal ("present clause: !acc_is_present (%p, "
1038 "%"PRIu64
" (0x%"PRIx64
"))",
1039 (void *) k
->host_start
,
1040 (uint64_t) size
, (uint64_t) size
);
1042 gomp_fatal ("present clause: !acc_is_present (%p, "
1043 "%lu (0x%lx))", (void *) k
->host_start
,
1044 (unsigned long) size
, (unsigned long) size
);
1048 case GOMP_MAP_FORCE_DEVICEPTR
:
1049 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1050 gomp_copy_host2dev (devicep
, aq
,
1051 (void *) (tgt
->tgt_start
1053 (void *) k
->host_start
,
1054 sizeof (void *), cbufp
);
1057 gomp_mutex_unlock (&devicep
->lock
);
1058 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1062 if (k
->aux
&& k
->aux
->link_key
)
1064 /* Set link pointer on target to the device address of the
1066 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1067 /* We intentionally do not use coalescing here, as it's not
1068 data allocated by the current call to this function. */
1069 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1070 &tgt_addr
, sizeof (void *), NULL
);
1077 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1079 for (i
= 0; i
< mapnum
; i
++)
1081 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1082 gomp_copy_host2dev (devicep
, aq
,
1083 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1084 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1092 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1093 gomp_copy_host2dev (devicep
, aq
,
1094 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1095 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1096 - cbuf
.chunks
[0].start
),
1097 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1103 /* If the variable from "omp target enter data" map-list was already mapped,
1104 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1106 if ((pragma_kind
== GOMP_MAP_VARS_ENTER_DATA
1107 || pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1108 && tgt
->refcount
== 0)
1110 /* If we're about to discard a target_mem_desc with no "structural"
1111 references (tgt->refcount == 0), any splay keys linked in the tgt's
1112 list must have their virtual refcount incremented to represent that
1113 "lost" reference in order to implement the semantics of the OpenACC
1114 "present increment" operation properly. */
1115 if (pragma_kind
== GOMP_MAP_VARS_OPENACC_ENTER_DATA
)
1116 for (i
= 0; i
< tgt
->list_count
; i
++)
1117 if (tgt
->list
[i
].key
)
1118 tgt
->list
[i
].key
->virtual_refcount
++;
1124 gomp_mutex_unlock (&devicep
->lock
);
1128 attribute_hidden
struct target_mem_desc
*
1129 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1130 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1131 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1133 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1134 sizes
, kinds
, short_mapkind
, pragma_kind
);
1137 attribute_hidden
struct target_mem_desc
*
1138 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1139 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1140 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1141 void *kinds
, bool short_mapkind
,
1142 enum gomp_map_vars_kind pragma_kind
)
1144 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1145 sizes
, kinds
, short_mapkind
, pragma_kind
);
1149 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1151 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1153 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1160 gomp_unref_tgt (void *ptr
)
1162 bool is_tgt_unmapped
= false;
1164 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1166 if (tgt
->refcount
> 1)
1170 gomp_unmap_tgt (tgt
);
1171 is_tgt_unmapped
= true;
1174 return is_tgt_unmapped
;
1178 gomp_unref_tgt_void (void *ptr
)
1180 (void) gomp_unref_tgt (ptr
);
1183 static inline __attribute__((always_inline
)) bool
1184 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1185 struct goacc_asyncqueue
*aq
)
1187 bool is_tgt_unmapped
= false;
1188 splay_tree_remove (&devicep
->mem_map
, k
);
1191 if (k
->aux
->link_key
)
1192 splay_tree_insert (&devicep
->mem_map
,
1193 (splay_tree_node
) k
->aux
->link_key
);
1198 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1201 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1202 return is_tgt_unmapped
;
1205 attribute_hidden
bool
1206 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1208 return gomp_remove_var_internal (devicep
, k
, NULL
);
1211 /* Remove a variable asynchronously. This actually removes the variable
1212 mapping immediately, but retains the linked target_mem_desc until the
1213 asynchronous operation has completed (as it may still refer to target
1214 memory). The device lock must be held before entry, and remains locked on
1217 attribute_hidden
void
1218 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1219 struct goacc_asyncqueue
*aq
)
1221 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1224 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1225 variables back from device to host: if it is false, it is assumed that this
1226 has been done already. */
1228 static inline __attribute__((always_inline
)) void
1229 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1230 struct goacc_asyncqueue
*aq
)
1232 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1234 if (tgt
->list_count
== 0)
1240 gomp_mutex_lock (&devicep
->lock
);
1241 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1243 gomp_mutex_unlock (&devicep
->lock
);
1250 for (i
= 0; i
< tgt
->list_count
; i
++)
1252 splay_tree_key k
= tgt
->list
[i
].key
;
1256 bool do_unmap
= false;
1258 && k
->virtual_refcount
> 0
1259 && k
->refcount
!= REFCOUNT_INFINITY
)
1261 k
->virtual_refcount
--;
1264 else if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1266 else if (k
->refcount
== 1)
1272 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1273 || tgt
->list
[i
].always_copy_from
)
1274 gomp_copy_dev2host (devicep
, aq
,
1275 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1276 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1277 + tgt
->list
[i
].offset
),
1278 tgt
->list
[i
].length
);
1281 struct target_mem_desc
*k_tgt
= k
->tgt
;
1282 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1283 /* It would be bad if TGT got unmapped while we're still iterating
1284 over its LIST_COUNT, and also expect to use it in the following
1286 assert (!is_tgt_unmapped
1292 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1295 gomp_unref_tgt ((void *) tgt
);
1297 gomp_mutex_unlock (&devicep
->lock
);
1300 attribute_hidden
void
1301 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1303 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1306 attribute_hidden
void
1307 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1308 struct goacc_asyncqueue
*aq
)
1310 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1314 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1315 size_t *sizes
, void *kinds
, bool short_mapkind
)
1318 struct splay_tree_key_s cur_node
;
1319 const int typemask
= short_mapkind
? 0xff : 0x7;
1327 gomp_mutex_lock (&devicep
->lock
);
1328 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1330 gomp_mutex_unlock (&devicep
->lock
);
1334 for (i
= 0; i
< mapnum
; i
++)
1337 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1338 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1339 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1342 int kind
= get_kind (short_mapkind
, kinds
, i
);
1343 if (n
->host_start
> cur_node
.host_start
1344 || n
->host_end
< cur_node
.host_end
)
1346 gomp_mutex_unlock (&devicep
->lock
);
1347 gomp_fatal ("Trying to update [%p..%p) object when "
1348 "only [%p..%p) is mapped",
1349 (void *) cur_node
.host_start
,
1350 (void *) cur_node
.host_end
,
1351 (void *) n
->host_start
,
1352 (void *) n
->host_end
);
1356 void *hostaddr
= (void *) cur_node
.host_start
;
1357 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1358 + cur_node
.host_start
- n
->host_start
);
1359 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1361 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1362 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1364 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1365 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1368 gomp_mutex_unlock (&devicep
->lock
);
1371 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1372 And insert to splay tree the mapping between addresses from HOST_TABLE and
1373 from loaded target image. We rely in the host and device compiler
1374 emitting variable and functions in the same order. */
1377 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1378 const void *host_table
, const void *target_data
,
1379 bool is_register_lock
)
1381 void **host_func_table
= ((void ***) host_table
)[0];
1382 void **host_funcs_end
= ((void ***) host_table
)[1];
1383 void **host_var_table
= ((void ***) host_table
)[2];
1384 void **host_vars_end
= ((void ***) host_table
)[3];
1386 /* The func table contains only addresses, the var table contains addresses
1387 and corresponding sizes. */
1388 int num_funcs
= host_funcs_end
- host_func_table
;
1389 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1391 /* Load image to device and get target addresses for the image. */
1392 struct addr_pair
*target_table
= NULL
;
1393 int i
, num_target_entries
;
1396 = devicep
->load_image_func (devicep
->target_id
, version
,
1397 target_data
, &target_table
);
1399 if (num_target_entries
!= num_funcs
+ num_vars
)
1401 gomp_mutex_unlock (&devicep
->lock
);
1402 if (is_register_lock
)
1403 gomp_mutex_unlock (®ister_lock
);
1404 gomp_fatal ("Cannot map target functions or variables"
1405 " (expected %u, have %u)", num_funcs
+ num_vars
,
1406 num_target_entries
);
1409 /* Insert host-target address mapping into splay tree. */
1410 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1411 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1412 tgt
->refcount
= REFCOUNT_INFINITY
;
1415 tgt
->to_free
= NULL
;
1417 tgt
->list_count
= 0;
1418 tgt
->device_descr
= devicep
;
1419 splay_tree_node array
= tgt
->array
;
1421 for (i
= 0; i
< num_funcs
; i
++)
1423 splay_tree_key k
= &array
->key
;
1424 k
->host_start
= (uintptr_t) host_func_table
[i
];
1425 k
->host_end
= k
->host_start
+ 1;
1427 k
->tgt_offset
= target_table
[i
].start
;
1428 k
->refcount
= REFCOUNT_INFINITY
;
1429 k
->virtual_refcount
= 0;
1432 array
->right
= NULL
;
1433 splay_tree_insert (&devicep
->mem_map
, array
);
1437 /* Most significant bit of the size in host and target tables marks
1438 "omp declare target link" variables. */
1439 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1440 const uintptr_t size_mask
= ~link_bit
;
1442 for (i
= 0; i
< num_vars
; i
++)
1444 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1445 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1447 if ((uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1449 gomp_mutex_unlock (&devicep
->lock
);
1450 if (is_register_lock
)
1451 gomp_mutex_unlock (®ister_lock
);
1452 gomp_fatal ("Cannot map target variables (size mismatch)");
1455 splay_tree_key k
= &array
->key
;
1456 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1458 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1460 k
->tgt_offset
= target_var
->start
;
1461 k
->refcount
= target_size
& link_bit
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1462 k
->virtual_refcount
= 0;
1465 array
->right
= NULL
;
1466 splay_tree_insert (&devicep
->mem_map
, array
);
1470 free (target_table
);
1473 /* Unload the mappings described by target_data from device DEVICE_P.
1474 The device must be locked. */
1477 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1479 const void *host_table
, const void *target_data
)
1481 void **host_func_table
= ((void ***) host_table
)[0];
1482 void **host_funcs_end
= ((void ***) host_table
)[1];
1483 void **host_var_table
= ((void ***) host_table
)[2];
1484 void **host_vars_end
= ((void ***) host_table
)[3];
1486 /* The func table contains only addresses, the var table contains addresses
1487 and corresponding sizes. */
1488 int num_funcs
= host_funcs_end
- host_func_table
;
1489 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1491 struct splay_tree_key_s k
;
1492 splay_tree_key node
= NULL
;
1494 /* Find mapping at start of node array */
1495 if (num_funcs
|| num_vars
)
1497 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1498 : (uintptr_t) host_var_table
[0]);
1499 k
.host_end
= k
.host_start
+ 1;
1500 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1503 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1505 gomp_mutex_unlock (&devicep
->lock
);
1506 gomp_fatal ("image unload fail");
1509 /* Remove mappings from splay tree. */
1511 for (i
= 0; i
< num_funcs
; i
++)
1513 k
.host_start
= (uintptr_t) host_func_table
[i
];
1514 k
.host_end
= k
.host_start
+ 1;
1515 splay_tree_remove (&devicep
->mem_map
, &k
);
1518 /* Most significant bit of the size in host and target tables marks
1519 "omp declare target link" variables. */
1520 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1521 const uintptr_t size_mask
= ~link_bit
;
1522 bool is_tgt_unmapped
= false;
1524 for (i
= 0; i
< num_vars
; i
++)
1526 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1528 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1530 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1531 splay_tree_remove (&devicep
->mem_map
, &k
);
1534 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1535 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1539 if (node
&& !is_tgt_unmapped
)
1546 /* This function should be called from every offload image while loading.
1547 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1548 the target, and TARGET_DATA needed by target plugin. */
1551 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1552 int target_type
, const void *target_data
)
1556 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1557 gomp_fatal ("Library too old for offload (version %u < %u)",
1558 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1560 gomp_mutex_lock (®ister_lock
);
1562 /* Load image to all initialized devices. */
1563 for (i
= 0; i
< num_devices
; i
++)
1565 struct gomp_device_descr
*devicep
= &devices
[i
];
1566 gomp_mutex_lock (&devicep
->lock
);
1567 if (devicep
->type
== target_type
1568 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1569 gomp_load_image_to_device (devicep
, version
,
1570 host_table
, target_data
, true);
1571 gomp_mutex_unlock (&devicep
->lock
);
1574 /* Insert image to array of pending images. */
1576 = gomp_realloc_unlock (offload_images
,
1577 (num_offload_images
+ 1)
1578 * sizeof (struct offload_image_descr
));
1579 offload_images
[num_offload_images
].version
= version
;
1580 offload_images
[num_offload_images
].type
= target_type
;
1581 offload_images
[num_offload_images
].host_table
= host_table
;
1582 offload_images
[num_offload_images
].target_data
= target_data
;
1584 num_offload_images
++;
1585 gomp_mutex_unlock (®ister_lock
);
1589 GOMP_offload_register (const void *host_table
, int target_type
,
1590 const void *target_data
)
1592 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1595 /* This function should be called from every offload image while unloading.
1596 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1597 the target, and TARGET_DATA needed by target plugin. */
1600 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1601 int target_type
, const void *target_data
)
1605 gomp_mutex_lock (®ister_lock
);
1607 /* Unload image from all initialized devices. */
1608 for (i
= 0; i
< num_devices
; i
++)
1610 struct gomp_device_descr
*devicep
= &devices
[i
];
1611 gomp_mutex_lock (&devicep
->lock
);
1612 if (devicep
->type
== target_type
1613 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1614 gomp_unload_image_from_device (devicep
, version
,
1615 host_table
, target_data
);
1616 gomp_mutex_unlock (&devicep
->lock
);
1619 /* Remove image from array of pending images. */
1620 for (i
= 0; i
< num_offload_images
; i
++)
1621 if (offload_images
[i
].target_data
== target_data
)
1623 offload_images
[i
] = offload_images
[--num_offload_images
];
1627 gomp_mutex_unlock (®ister_lock
);
1631 GOMP_offload_unregister (const void *host_table
, int target_type
,
1632 const void *target_data
)
1634 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1637 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1638 must be locked on entry, and remains locked on return. */
1640 attribute_hidden
void
1641 gomp_init_device (struct gomp_device_descr
*devicep
)
1644 if (!devicep
->init_device_func (devicep
->target_id
))
1646 gomp_mutex_unlock (&devicep
->lock
);
1647 gomp_fatal ("device initialization failed");
1650 /* Load to device all images registered by the moment. */
1651 for (i
= 0; i
< num_offload_images
; i
++)
1653 struct offload_image_descr
*image
= &offload_images
[i
];
1654 if (image
->type
== devicep
->type
)
1655 gomp_load_image_to_device (devicep
, image
->version
,
1656 image
->host_table
, image
->target_data
,
1660 /* Initialize OpenACC asynchronous queues. */
1661 goacc_init_asyncqueues (devicep
);
1663 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1666 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1667 must be locked on entry, and remains locked on return. */
1669 attribute_hidden
bool
1670 gomp_fini_device (struct gomp_device_descr
*devicep
)
1672 bool ret
= goacc_fini_asyncqueues (devicep
);
1673 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1674 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1678 attribute_hidden
void
1679 gomp_unload_device (struct gomp_device_descr
*devicep
)
1681 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1685 /* Unload from device all images registered at the moment. */
1686 for (i
= 0; i
< num_offload_images
; i
++)
1688 struct offload_image_descr
*image
= &offload_images
[i
];
1689 if (image
->type
== devicep
->type
)
1690 gomp_unload_image_from_device (devicep
, image
->version
,
1692 image
->target_data
);
1697 /* Host fallback for GOMP_target{,_ext} routines. */
1700 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
)
1702 struct gomp_thread old_thr
, *thr
= gomp_thread ();
1704 memset (thr
, '\0', sizeof (*thr
));
1705 if (gomp_places_list
)
1707 thr
->place
= old_thr
.place
;
1708 thr
->ts
.place_partition_len
= gomp_places_list_len
;
1711 gomp_free_thread (thr
);
1715 /* Calculate alignment and size requirements of a private copy of data shared
1716 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1719 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
1720 unsigned short *kinds
, size_t *tgt_align
,
1724 for (i
= 0; i
< mapnum
; i
++)
1725 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1727 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1728 if (*tgt_align
< align
)
1730 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
1731 *tgt_size
+= sizes
[i
];
1735 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1738 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
1739 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
1742 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
1744 tgt
+= tgt_align
- al
;
1747 for (i
= 0; i
< mapnum
; i
++)
1748 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
1750 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
1751 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1752 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
1753 hostaddrs
[i
] = tgt
+ tgt_size
;
1754 tgt_size
= tgt_size
+ sizes
[i
];
1758 /* Helper function of GOMP_target{,_ext} routines. */
1761 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
1762 void (*host_fn
) (void *))
1764 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
1765 return (void *) host_fn
;
1768 gomp_mutex_lock (&devicep
->lock
);
1769 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1771 gomp_mutex_unlock (&devicep
->lock
);
1775 struct splay_tree_key_s k
;
1776 k
.host_start
= (uintptr_t) host_fn
;
1777 k
.host_end
= k
.host_start
+ 1;
1778 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1779 gomp_mutex_unlock (&devicep
->lock
);
1783 return (void *) tgt_fn
->tgt_offset
;
1787 /* Called when encountering a target directive. If DEVICE
1788 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1789 GOMP_DEVICE_HOST_FALLBACK (or any value
1790 larger than last available hw device), use host fallback.
1791 FN is address of host code, UNUSED is part of the current ABI, but
1792 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1793 with MAPNUM entries, with addresses of the host objects,
1794 sizes of the host objects (resp. for pointer kind pointer bias
1795 and assumed sizeof (void *) size) and kinds. */
1798 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
1799 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
1800 unsigned char *kinds
)
1802 struct gomp_device_descr
*devicep
= resolve_device (device
);
1806 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1807 /* All shared memory devices should use the GOMP_target_ext function. */
1808 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
1809 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
1810 return gomp_target_fallback (fn
, hostaddrs
);
1812 struct target_mem_desc
*tgt_vars
1813 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1814 GOMP_MAP_VARS_TARGET
);
1815 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
1817 gomp_unmap_vars (tgt_vars
, true);
1820 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1821 and several arguments have been added:
1822 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1823 DEPEND is array of dependencies, see GOMP_task for details.
1825 ARGS is a pointer to an array consisting of a variable number of both
1826 device-independent and device-specific arguments, which can take one two
1827 elements where the first specifies for which device it is intended, the type
1828 and optionally also the value. If the value is not present in the first
1829 one, the whole second element the actual value. The last element of the
1830 array is a single NULL. Among the device independent can be for example
1831 NUM_TEAMS and THREAD_LIMIT.
1833 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1834 that value, or 1 if teams construct is not present, or 0, if
1835 teams construct does not have num_teams clause and so the choice is
1836 implementation defined, and -1 if it can't be determined on the host
1837 what value will GOMP_teams have on the device.
1838 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1839 body with that value, or 0, if teams construct does not have thread_limit
1840 clause or the teams construct is not present, or -1 if it can't be
1841 determined on the host what value will GOMP_teams have on the device. */
1844 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
1845 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
1846 unsigned int flags
, void **depend
, void **args
)
1848 struct gomp_device_descr
*devicep
= resolve_device (device
);
1849 size_t tgt_align
= 0, tgt_size
= 0;
1850 bool fpc_done
= false;
1852 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
1854 struct gomp_thread
*thr
= gomp_thread ();
1855 /* Create a team if we don't have any around, as nowait
1856 target tasks make sense to run asynchronously even when
1857 outside of any parallel. */
1858 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
1860 struct gomp_team
*team
= gomp_new_team (1);
1861 struct gomp_task
*task
= thr
->task
;
1862 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
1863 team
->prev_ts
= thr
->ts
;
1864 thr
->ts
.team
= team
;
1865 thr
->ts
.team_id
= 0;
1866 thr
->ts
.work_share
= &team
->work_shares
[0];
1867 thr
->ts
.last_work_share
= NULL
;
1868 #ifdef HAVE_SYNC_BUILTINS
1869 thr
->ts
.single_count
= 0;
1871 thr
->ts
.static_trip
= 0;
1872 thr
->task
= &team
->implicit_task
[0];
1873 gomp_init_task (thr
->task
, NULL
, icv
);
1879 thr
->task
= &team
->implicit_task
[0];
1882 pthread_setspecific (gomp_thread_destructor
, thr
);
1885 && !thr
->task
->final_task
)
1887 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
1888 sizes
, kinds
, flags
, depend
, args
,
1889 GOMP_TARGET_TASK_BEFORE_MAP
);
1894 /* If there are depend clauses, but nowait is not present
1895 (or we are in a final task), block the parent task until the
1896 dependencies are resolved and then just continue with the rest
1897 of the function as if it is a merged task. */
1900 struct gomp_thread
*thr
= gomp_thread ();
1901 if (thr
->task
&& thr
->task
->depend_hash
)
1903 /* If we might need to wait, copy firstprivate now. */
1904 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1905 &tgt_align
, &tgt_size
);
1908 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1909 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1910 tgt_align
, tgt_size
);
1913 gomp_task_maybe_wait_for_dependencies (depend
);
1919 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1920 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
1921 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
1925 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1926 &tgt_align
, &tgt_size
);
1929 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1930 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1931 tgt_align
, tgt_size
);
1934 gomp_target_fallback (fn
, hostaddrs
);
1938 struct target_mem_desc
*tgt_vars
;
1939 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1943 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
1944 &tgt_align
, &tgt_size
);
1947 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
1948 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
1949 tgt_align
, tgt_size
);
1955 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
1956 true, GOMP_MAP_VARS_TARGET
);
1957 devicep
->run_func (devicep
->target_id
, fn_addr
,
1958 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
1961 gomp_unmap_vars (tgt_vars
, true);
1964 /* Host fallback for GOMP_target_data{,_ext} routines. */
1967 gomp_target_data_fallback (void)
1969 struct gomp_task_icv
*icv
= gomp_icv (false);
1970 if (icv
->target_data
)
1972 /* Even when doing a host fallback, if there are any active
1973 #pragma omp target data constructs, need to remember the
1974 new #pragma omp target data, otherwise GOMP_target_end_data
1975 would get out of sync. */
1976 struct target_mem_desc
*tgt
1977 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
1978 GOMP_MAP_VARS_DATA
);
1979 tgt
->prev
= icv
->target_data
;
1980 icv
->target_data
= tgt
;
1985 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
1986 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
1988 struct gomp_device_descr
*devicep
= resolve_device (device
);
1991 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
1992 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
1993 return gomp_target_data_fallback ();
1995 struct target_mem_desc
*tgt
1996 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
1997 GOMP_MAP_VARS_DATA
);
1998 struct gomp_task_icv
*icv
= gomp_icv (true);
1999 tgt
->prev
= icv
->target_data
;
2000 icv
->target_data
= tgt
;
2004 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2005 size_t *sizes
, unsigned short *kinds
)
2007 struct gomp_device_descr
*devicep
= resolve_device (device
);
2010 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2011 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2012 return gomp_target_data_fallback ();
2014 struct target_mem_desc
*tgt
2015 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2016 GOMP_MAP_VARS_DATA
);
2017 struct gomp_task_icv
*icv
= gomp_icv (true);
2018 tgt
->prev
= icv
->target_data
;
2019 icv
->target_data
= tgt
;
2023 GOMP_target_end_data (void)
2025 struct gomp_task_icv
*icv
= gomp_icv (false);
2026 if (icv
->target_data
)
2028 struct target_mem_desc
*tgt
= icv
->target_data
;
2029 icv
->target_data
= tgt
->prev
;
2030 gomp_unmap_vars (tgt
, true);
2035 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2036 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2038 struct gomp_device_descr
*devicep
= resolve_device (device
);
2041 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2042 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2045 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2049 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2050 size_t *sizes
, unsigned short *kinds
,
2051 unsigned int flags
, void **depend
)
2053 struct gomp_device_descr
*devicep
= resolve_device (device
);
2055 /* If there are depend clauses, but nowait is not present,
2056 block the parent task until the dependencies are resolved
2057 and then just continue with the rest of the function as if it
2058 is a merged task. Until we are able to schedule task during
2059 variable mapping or unmapping, ignore nowait if depend clauses
2063 struct gomp_thread
*thr
= gomp_thread ();
2064 if (thr
->task
&& thr
->task
->depend_hash
)
2066 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2068 && !thr
->task
->final_task
)
2070 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2071 mapnum
, hostaddrs
, sizes
, kinds
,
2072 flags
| GOMP_TARGET_FLAG_UPDATE
,
2073 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2078 struct gomp_team
*team
= thr
->ts
.team
;
2079 /* If parallel or taskgroup has been cancelled, don't start new
2081 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2083 if (gomp_team_barrier_cancelled (&team
->barrier
))
2085 if (thr
->task
->taskgroup
)
2087 if (thr
->task
->taskgroup
->cancelled
)
2089 if (thr
->task
->taskgroup
->workshare
2090 && thr
->task
->taskgroup
->prev
2091 && thr
->task
->taskgroup
->prev
->cancelled
)
2096 gomp_task_maybe_wait_for_dependencies (depend
);
2102 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2103 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2106 struct gomp_thread
*thr
= gomp_thread ();
2107 struct gomp_team
*team
= thr
->ts
.team
;
2108 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2109 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2111 if (gomp_team_barrier_cancelled (&team
->barrier
))
2113 if (thr
->task
->taskgroup
)
2115 if (thr
->task
->taskgroup
->cancelled
)
2117 if (thr
->task
->taskgroup
->workshare
2118 && thr
->task
->taskgroup
->prev
2119 && thr
->task
->taskgroup
->prev
->cancelled
)
2124 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2128 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2129 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2131 const int typemask
= 0xff;
2133 gomp_mutex_lock (&devicep
->lock
);
2134 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2136 gomp_mutex_unlock (&devicep
->lock
);
2140 for (i
= 0; i
< mapnum
; i
++)
2142 struct splay_tree_key_s cur_node
;
2143 unsigned char kind
= kinds
[i
] & typemask
;
2147 case GOMP_MAP_ALWAYS_FROM
:
2148 case GOMP_MAP_DELETE
:
2149 case GOMP_MAP_RELEASE
:
2150 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2151 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2152 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2153 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2154 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2155 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2156 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2157 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2161 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2163 if ((kind
== GOMP_MAP_DELETE
2164 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2165 && k
->refcount
!= REFCOUNT_INFINITY
)
2168 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2169 || kind
== GOMP_MAP_ALWAYS_FROM
)
2170 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2171 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2172 + cur_node
.host_start
2174 cur_node
.host_end
- cur_node
.host_start
);
2175 if (k
->refcount
== 0)
2176 gomp_remove_var (devicep
, k
);
2180 gomp_mutex_unlock (&devicep
->lock
);
2181 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2186 gomp_mutex_unlock (&devicep
->lock
);
2190 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2191 size_t *sizes
, unsigned short *kinds
,
2192 unsigned int flags
, void **depend
)
2194 struct gomp_device_descr
*devicep
= resolve_device (device
);
2196 /* If there are depend clauses, but nowait is not present,
2197 block the parent task until the dependencies are resolved
2198 and then just continue with the rest of the function as if it
2199 is a merged task. Until we are able to schedule task during
2200 variable mapping or unmapping, ignore nowait if depend clauses
2204 struct gomp_thread
*thr
= gomp_thread ();
2205 if (thr
->task
&& thr
->task
->depend_hash
)
2207 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2209 && !thr
->task
->final_task
)
2211 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2212 mapnum
, hostaddrs
, sizes
, kinds
,
2213 flags
, depend
, NULL
,
2214 GOMP_TARGET_TASK_DATA
))
2219 struct gomp_team
*team
= thr
->ts
.team
;
2220 /* If parallel or taskgroup has been cancelled, don't start new
2222 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2224 if (gomp_team_barrier_cancelled (&team
->barrier
))
2226 if (thr
->task
->taskgroup
)
2228 if (thr
->task
->taskgroup
->cancelled
)
2230 if (thr
->task
->taskgroup
->workshare
2231 && thr
->task
->taskgroup
->prev
2232 && thr
->task
->taskgroup
->prev
->cancelled
)
2237 gomp_task_maybe_wait_for_dependencies (depend
);
2243 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2244 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2247 struct gomp_thread
*thr
= gomp_thread ();
2248 struct gomp_team
*team
= thr
->ts
.team
;
2249 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2250 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2252 if (gomp_team_barrier_cancelled (&team
->barrier
))
2254 if (thr
->task
->taskgroup
)
2256 if (thr
->task
->taskgroup
->cancelled
)
2258 if (thr
->task
->taskgroup
->workshare
2259 && thr
->task
->taskgroup
->prev
2260 && thr
->task
->taskgroup
->prev
->cancelled
)
2266 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2267 for (i
= 0; i
< mapnum
; i
++)
2268 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2270 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2271 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2275 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2276 true, GOMP_MAP_VARS_ENTER_DATA
);
2278 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2282 gomp_target_task_fn (void *data
)
2284 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2285 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2287 if (ttask
->fn
!= NULL
)
2291 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2292 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2293 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2295 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2296 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
);
2300 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2303 gomp_unmap_vars (ttask
->tgt
, true);
2307 void *actual_arguments
;
2308 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2311 actual_arguments
= ttask
->hostaddrs
;
2315 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2316 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2317 GOMP_MAP_VARS_TARGET
);
2318 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2320 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2322 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2323 ttask
->args
, (void *) ttask
);
2326 else if (devicep
== NULL
2327 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2328 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2332 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2333 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2334 ttask
->kinds
, true);
2335 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2336 for (i
= 0; i
< ttask
->mapnum
; i
++)
2337 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2339 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2340 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2341 GOMP_MAP_VARS_ENTER_DATA
);
2342 i
+= ttask
->sizes
[i
];
2345 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2346 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2348 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2354 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2358 struct gomp_task_icv
*icv
= gomp_icv (true);
2359 icv
->thread_limit_var
2360 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2366 omp_target_alloc (size_t size
, int device_num
)
2368 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2369 return malloc (size
);
2374 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2375 if (devicep
== NULL
)
2378 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2379 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2380 return malloc (size
);
2382 gomp_mutex_lock (&devicep
->lock
);
2383 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2384 gomp_mutex_unlock (&devicep
->lock
);
2389 omp_target_free (void *device_ptr
, int device_num
)
2391 if (device_ptr
== NULL
)
2394 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2403 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2404 if (devicep
== NULL
)
2407 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2408 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2414 gomp_mutex_lock (&devicep
->lock
);
2415 gomp_free_device_memory (devicep
, device_ptr
);
2416 gomp_mutex_unlock (&devicep
->lock
);
2420 omp_target_is_present (const void *ptr
, int device_num
)
2425 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2431 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2432 if (devicep
== NULL
)
2435 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2436 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2439 gomp_mutex_lock (&devicep
->lock
);
2440 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2441 struct splay_tree_key_s cur_node
;
2443 cur_node
.host_start
= (uintptr_t) ptr
;
2444 cur_node
.host_end
= cur_node
.host_start
;
2445 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2446 int ret
= n
!= NULL
;
2447 gomp_mutex_unlock (&devicep
->lock
);
2452 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2453 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2456 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2459 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2461 if (dst_device_num
< 0)
2464 dst_devicep
= resolve_device (dst_device_num
);
2465 if (dst_devicep
== NULL
)
2468 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2469 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2472 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2474 if (src_device_num
< 0)
2477 src_devicep
= resolve_device (src_device_num
);
2478 if (src_devicep
== NULL
)
2481 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2482 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2485 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2487 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2490 if (src_devicep
== NULL
)
2492 gomp_mutex_lock (&dst_devicep
->lock
);
2493 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2494 (char *) dst
+ dst_offset
,
2495 (char *) src
+ src_offset
, length
);
2496 gomp_mutex_unlock (&dst_devicep
->lock
);
2497 return (ret
? 0 : EINVAL
);
2499 if (dst_devicep
== NULL
)
2501 gomp_mutex_lock (&src_devicep
->lock
);
2502 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2503 (char *) dst
+ dst_offset
,
2504 (char *) src
+ src_offset
, length
);
2505 gomp_mutex_unlock (&src_devicep
->lock
);
2506 return (ret
? 0 : EINVAL
);
2508 if (src_devicep
== dst_devicep
)
2510 gomp_mutex_lock (&src_devicep
->lock
);
2511 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2512 (char *) dst
+ dst_offset
,
2513 (char *) src
+ src_offset
, length
);
2514 gomp_mutex_unlock (&src_devicep
->lock
);
2515 return (ret
? 0 : EINVAL
);
2521 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2522 int num_dims
, const size_t *volume
,
2523 const size_t *dst_offsets
,
2524 const size_t *src_offsets
,
2525 const size_t *dst_dimensions
,
2526 const size_t *src_dimensions
,
2527 struct gomp_device_descr
*dst_devicep
,
2528 struct gomp_device_descr
*src_devicep
)
2530 size_t dst_slice
= element_size
;
2531 size_t src_slice
= element_size
;
2532 size_t j
, dst_off
, src_off
, length
;
2537 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2538 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2539 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2541 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2543 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2547 else if (src_devicep
== NULL
)
2548 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2549 (char *) dst
+ dst_off
,
2550 (const char *) src
+ src_off
,
2552 else if (dst_devicep
== NULL
)
2553 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2554 (char *) dst
+ dst_off
,
2555 (const char *) src
+ src_off
,
2557 else if (src_devicep
== dst_devicep
)
2558 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2559 (char *) dst
+ dst_off
,
2560 (const char *) src
+ src_off
,
2564 return ret
? 0 : EINVAL
;
2567 /* FIXME: it would be nice to have some plugin function to handle
2568 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2569 be handled in the generic recursion below, and for host-host it
2570 should be used even for any num_dims >= 2. */
2572 for (i
= 1; i
< num_dims
; i
++)
2573 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2574 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2576 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2577 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2579 for (j
= 0; j
< volume
[0]; j
++)
2581 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2582 (const char *) src
+ src_off
,
2583 element_size
, num_dims
- 1,
2584 volume
+ 1, dst_offsets
+ 1,
2585 src_offsets
+ 1, dst_dimensions
+ 1,
2586 src_dimensions
+ 1, dst_devicep
,
2590 dst_off
+= dst_slice
;
2591 src_off
+= src_slice
;
2597 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2598 int num_dims
, const size_t *volume
,
2599 const size_t *dst_offsets
,
2600 const size_t *src_offsets
,
2601 const size_t *dst_dimensions
,
2602 const size_t *src_dimensions
,
2603 int dst_device_num
, int src_device_num
)
2605 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2610 if (dst_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2612 if (dst_device_num
< 0)
2615 dst_devicep
= resolve_device (dst_device_num
);
2616 if (dst_devicep
== NULL
)
2619 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2620 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2623 if (src_device_num
!= GOMP_DEVICE_HOST_FALLBACK
)
2625 if (src_device_num
< 0)
2628 src_devicep
= resolve_device (src_device_num
);
2629 if (src_devicep
== NULL
)
2632 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2633 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2637 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
2641 gomp_mutex_lock (&src_devicep
->lock
);
2642 else if (dst_devicep
)
2643 gomp_mutex_lock (&dst_devicep
->lock
);
2644 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
2645 volume
, dst_offsets
, src_offsets
,
2646 dst_dimensions
, src_dimensions
,
2647 dst_devicep
, src_devicep
);
2649 gomp_mutex_unlock (&src_devicep
->lock
);
2650 else if (dst_devicep
)
2651 gomp_mutex_unlock (&dst_devicep
->lock
);
2656 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
2657 size_t size
, size_t device_offset
, int device_num
)
2659 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2665 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2666 if (devicep
== NULL
)
2669 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2670 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2673 gomp_mutex_lock (&devicep
->lock
);
2675 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2676 struct splay_tree_key_s cur_node
;
2679 cur_node
.host_start
= (uintptr_t) host_ptr
;
2680 cur_node
.host_end
= cur_node
.host_start
+ size
;
2681 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2684 if (n
->tgt
->tgt_start
+ n
->tgt_offset
2685 == (uintptr_t) device_ptr
+ device_offset
2686 && n
->host_start
<= cur_node
.host_start
2687 && n
->host_end
>= cur_node
.host_end
)
2692 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2693 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
2697 tgt
->to_free
= NULL
;
2699 tgt
->list_count
= 0;
2700 tgt
->device_descr
= devicep
;
2701 splay_tree_node array
= tgt
->array
;
2702 splay_tree_key k
= &array
->key
;
2703 k
->host_start
= cur_node
.host_start
;
2704 k
->host_end
= cur_node
.host_end
;
2706 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
2707 k
->refcount
= REFCOUNT_INFINITY
;
2708 k
->virtual_refcount
= 0;
2711 array
->right
= NULL
;
2712 splay_tree_insert (&devicep
->mem_map
, array
);
2715 gomp_mutex_unlock (&devicep
->lock
);
2720 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
2722 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2728 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2729 if (devicep
== NULL
)
2732 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
2735 gomp_mutex_lock (&devicep
->lock
);
2737 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2738 struct splay_tree_key_s cur_node
;
2741 cur_node
.host_start
= (uintptr_t) ptr
;
2742 cur_node
.host_end
= cur_node
.host_start
;
2743 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
2745 && n
->host_start
== cur_node
.host_start
2746 && n
->refcount
== REFCOUNT_INFINITY
2747 && n
->tgt
->tgt_start
== 0
2748 && n
->tgt
->to_free
== NULL
2749 && n
->tgt
->refcount
== 1
2750 && n
->tgt
->list_count
== 0)
2752 splay_tree_remove (&devicep
->mem_map
, n
);
2753 gomp_unmap_tgt (n
->tgt
);
2757 gomp_mutex_unlock (&devicep
->lock
);
2762 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
2765 if (device_num
== GOMP_DEVICE_HOST_FALLBACK
)
2766 return gomp_pause_host ();
2767 if (device_num
< 0 || device_num
>= gomp_get_num_devices ())
2769 /* Do nothing for target devices for now. */
2774 omp_pause_resource_all (omp_pause_resource_t kind
)
2777 if (gomp_pause_host ())
2779 /* Do nothing for target devices for now. */
2783 ialias (omp_pause_resource
)
2784 ialias (omp_pause_resource_all
)
2786 #ifdef PLUGIN_SUPPORT
2788 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2790 The handles of the found functions are stored in the corresponding fields
2791 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2794 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
2795 const char *plugin_name
)
2797 const char *err
= NULL
, *last_missing
= NULL
;
2799 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
2803 /* Check if all required functions are available in the plugin and store
2804 their handlers. None of the symbols can legitimately be NULL,
2805 so we don't need to check dlerror all the time. */
2807 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2809 /* Similar, but missing functions are not an error. Return false if
2810 failed, true otherwise. */
2811 #define DLSYM_OPT(f, n) \
2812 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2813 || (last_missing = #n, 0))
2816 if (device
->version_func () != GOMP_VERSION
)
2818 err
= "plugin version mismatch";
2825 DLSYM (get_num_devices
);
2826 DLSYM (init_device
);
2827 DLSYM (fini_device
);
2829 DLSYM (unload_image
);
2834 device
->capabilities
= device
->get_caps_func ();
2835 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2839 DLSYM_OPT (can_run
, can_run
);
2842 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
2844 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
2845 || !DLSYM_OPT (openacc
.create_thread_data
,
2846 openacc_create_thread_data
)
2847 || !DLSYM_OPT (openacc
.destroy_thread_data
,
2848 openacc_destroy_thread_data
)
2849 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
2850 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
2851 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
2852 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
2853 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
2854 || !DLSYM_OPT (openacc
.async
.queue_callback
,
2855 openacc_async_queue_callback
)
2856 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
2857 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
2858 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
))
2860 /* Require all the OpenACC handlers if we have
2861 GOMP_OFFLOAD_CAP_OPENACC_200. */
2862 err
= "plugin missing OpenACC handler function";
2867 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
2868 openacc_cuda_get_current_device
);
2869 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
2870 openacc_cuda_get_current_context
);
2871 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
2872 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
2873 if (cuda
&& cuda
!= 4)
2875 /* Make sure all the CUDA functions are there if any of them are. */
2876 err
= "plugin missing OpenACC CUDA handler function";
2888 gomp_error ("while loading %s: %s", plugin_name
, err
);
2890 gomp_error ("missing function was %s", last_missing
);
2892 dlclose (plugin_handle
);
2897 /* This function finalizes all initialized devices. */
2900 gomp_target_fini (void)
2903 for (i
= 0; i
< num_devices
; i
++)
2906 struct gomp_device_descr
*devicep
= &devices
[i
];
2907 gomp_mutex_lock (&devicep
->lock
);
2908 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2909 ret
= gomp_fini_device (devicep
);
2910 gomp_mutex_unlock (&devicep
->lock
);
2912 gomp_fatal ("device finalization failed");
2916 /* This function initializes the runtime for offloading.
2917 It parses the list of offload plugins, and tries to load these.
2918 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2919 will be set, and the array DEVICES initialized, containing descriptors for
2920 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2924 gomp_target_init (void)
2926 const char *prefix
="libgomp-plugin-";
2927 const char *suffix
= SONAME_SUFFIX (1);
2928 const char *cur
, *next
;
2930 int i
, new_num_devices
;
2935 cur
= OFFLOAD_PLUGINS
;
2939 struct gomp_device_descr current_device
;
2940 size_t prefix_len
, suffix_len
, cur_len
;
2942 next
= strchr (cur
, ',');
2944 prefix_len
= strlen (prefix
);
2945 cur_len
= next
? next
- cur
: strlen (cur
);
2946 suffix_len
= strlen (suffix
);
2948 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
2955 memcpy (plugin_name
, prefix
, prefix_len
);
2956 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
2957 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
2959 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
2961 new_num_devices
= current_device
.get_num_devices_func ();
2962 if (new_num_devices
>= 1)
2964 /* Augment DEVICES and NUM_DEVICES. */
2966 devices
= realloc (devices
, (num_devices
+ new_num_devices
)
2967 * sizeof (struct gomp_device_descr
));
2975 current_device
.name
= current_device
.get_name_func ();
2976 /* current_device.capabilities has already been set. */
2977 current_device
.type
= current_device
.get_type_func ();
2978 current_device
.mem_map
.root
= NULL
;
2979 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
2980 for (i
= 0; i
< new_num_devices
; i
++)
2982 current_device
.target_id
= i
;
2983 devices
[num_devices
] = current_device
;
2984 gomp_mutex_init (&devices
[num_devices
].lock
);
2995 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2996 NUM_DEVICES_OPENMP. */
2997 struct gomp_device_descr
*devices_s
2998 = malloc (num_devices
* sizeof (struct gomp_device_descr
));
3005 num_devices_openmp
= 0;
3006 for (i
= 0; i
< num_devices
; i
++)
3007 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3008 devices_s
[num_devices_openmp
++] = devices
[i
];
3009 int num_devices_after_openmp
= num_devices_openmp
;
3010 for (i
= 0; i
< num_devices
; i
++)
3011 if (!(devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3012 devices_s
[num_devices_after_openmp
++] = devices
[i
];
3014 devices
= devices_s
;
3016 for (i
= 0; i
< num_devices
; i
++)
3018 /* The 'devices' array can be moved (by the realloc call) until we have
3019 found all the plugins, so registering with the OpenACC runtime (which
3020 takes a copy of the pointer argument) must be delayed until now. */
3021 if (devices
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3022 goacc_register (&devices
[i
]);
3025 if (atexit (gomp_target_fini
) != 0)
3026 gomp_fatal ("atexit failed");
3029 #else /* PLUGIN_SUPPORT */
3030 /* If dlfcn.h is unavailable we always fallback to host execution.
3031 GOMP_target* routines are just stubs for this case. */
3033 gomp_target_init (void)
3036 #endif /* PLUGIN_SUPPORT */