libstdc++: Fix typo in chrono::year_month_weekday::operator==
[gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48
49 static void gomp_target_init (void);
50
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
56
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
65 };
66
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
69
70 /* Total number of offload images. */
71 static int num_offload_images;
72
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
75
76 /* Total number of available devices. */
77 static int num_devices;
78
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
81
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94 }
95
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105 gomp_init_targets_once ();
106 return num_devices_openmp;
107 }
108
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112 if (device_id == GOMP_DEVICE_ICV)
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
120
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 {
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
128 }
129 gomp_mutex_unlock (&devices[device_id].lock);
130
131 return &devices[device_id];
132 }
133
134
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
140
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
152 }
153
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
159
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
164 }
165
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
172 {
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 {
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178 }
179 }
180
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
188 {
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 {
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 }
195 }
196
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
199
200 struct gomp_coalesce_chunk
201 {
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
204 };
205
206 struct gomp_coalesce_buf
207 {
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
223 };
224
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236
237 /* Add region with device tgt_start relative offset and length to CBUF. */
238
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
245 {
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 {
250 cbuf->chunk_cnt = -1;
251 return;
252 }
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 {
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
258 }
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
264 }
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
269 }
270
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
273
274 static inline bool
275 gomp_to_device_kind_p (int kind)
276 {
277 switch (kind)
278 {
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
287 }
288 }
289
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
295 {
296 if (cbuf)
297 {
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 {
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
304 {
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
309 {
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
315 }
316 else
317 last = middle - 1;
318 }
319 }
320 }
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
332 {
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343 if (!devicep->free_func (devicep->target_id, devptr))
344 {
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 }
348 }
349
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
353
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360 assert (kind != GOMP_MAP_ATTACH);
361
362 tgt_var->key = oldn;
363 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365 tgt_var->is_attach = false;
366 tgt_var->offset = newn->host_start - oldn->host_start;
367 tgt_var->length = newn->host_end - newn->host_start;
368
369 if ((kind & GOMP_MAP_FLAG_FORCE)
370 || oldn->host_start > newn->host_start
371 || oldn->host_end < newn->host_end)
372 {
373 gomp_mutex_unlock (&devicep->lock);
374 gomp_fatal ("Trying to map into device [%p..%p) object when "
375 "[%p..%p) is already mapped",
376 (void *) newn->host_start, (void *) newn->host_end,
377 (void *) oldn->host_start, (void *) oldn->host_end);
378 }
379
380 if (GOMP_MAP_ALWAYS_TO_P (kind))
381 gomp_copy_host2dev (devicep, aq,
382 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383 + newn->host_start - oldn->host_start),
384 (void *) newn->host_start,
385 newn->host_end - newn->host_start, cbuf);
386
387 if (oldn->refcount != REFCOUNT_INFINITY)
388 oldn->refcount++;
389 }
390
391 static int
392 get_kind (bool short_mapkind, void *kinds, int idx)
393 {
394 return short_mapkind ? ((unsigned short *) kinds)[idx]
395 : ((unsigned char *) kinds)[idx];
396 }
397
398 static void
399 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401 struct gomp_coalesce_buf *cbuf)
402 {
403 struct gomp_device_descr *devicep = tgt->device_descr;
404 struct splay_tree_s *mem_map = &devicep->mem_map;
405 struct splay_tree_key_s cur_node;
406
407 cur_node.host_start = host_ptr;
408 if (cur_node.host_start == (uintptr_t) NULL)
409 {
410 cur_node.tgt_offset = (uintptr_t) NULL;
411 gomp_copy_host2dev (devicep, aq,
412 (void *) (tgt->tgt_start + target_offset),
413 (void *) &cur_node.tgt_offset,
414 sizeof (void *), cbuf);
415 return;
416 }
417 /* Add bias to the pointer value. */
418 cur_node.host_start += bias;
419 cur_node.host_end = cur_node.host_start;
420 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421 if (n == NULL)
422 {
423 gomp_mutex_unlock (&devicep->lock);
424 gomp_fatal ("Pointer target of array section wasn't mapped");
425 }
426 cur_node.host_start -= n->host_start;
427 cur_node.tgt_offset
428 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429 /* At this point tgt_offset is target address of the
430 array section. Now subtract bias to get what we want
431 to initialize the pointer with. */
432 cur_node.tgt_offset -= bias;
433 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
435 }
436
437 static void
438 gomp_map_fields_existing (struct target_mem_desc *tgt,
439 struct goacc_asyncqueue *aq, splay_tree_key n,
440 size_t first, size_t i, void **hostaddrs,
441 size_t *sizes, void *kinds,
442 struct gomp_coalesce_buf *cbuf)
443 {
444 struct gomp_device_descr *devicep = tgt->device_descr;
445 struct splay_tree_s *mem_map = &devicep->mem_map;
446 struct splay_tree_key_s cur_node;
447 int kind;
448 const bool short_mapkind = true;
449 const int typemask = short_mapkind ? 0xff : 0x7;
450
451 cur_node.host_start = (uintptr_t) hostaddrs[i];
452 cur_node.host_end = cur_node.host_start + sizes[i];
453 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454 kind = get_kind (short_mapkind, kinds, i);
455 if (n2
456 && n2->tgt == n->tgt
457 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
458 {
459 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460 &tgt->list[i], kind & typemask, cbuf);
461 return;
462 }
463 if (sizes[i] == 0)
464 {
465 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
466 {
467 cur_node.host_start--;
468 n2 = splay_tree_lookup (mem_map, &cur_node);
469 cur_node.host_start++;
470 if (n2
471 && n2->tgt == n->tgt
472 && n2->host_start - n->host_start
473 == n2->tgt_offset - n->tgt_offset)
474 {
475 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476 &tgt->list[i], kind & typemask, cbuf);
477 return;
478 }
479 }
480 cur_node.host_end++;
481 n2 = splay_tree_lookup (mem_map, &cur_node);
482 cur_node.host_end--;
483 if (n2
484 && n2->tgt == n->tgt
485 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
486 {
487 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488 kind & typemask, cbuf);
489 return;
490 }
491 }
492 gomp_mutex_unlock (&devicep->lock);
493 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494 "other mapped elements from the same structure weren't mapped "
495 "together with it", (void *) cur_node.host_start,
496 (void *) cur_node.host_end);
497 }
498
499 attribute_hidden void
500 gomp_attach_pointer (struct gomp_device_descr *devicep,
501 struct goacc_asyncqueue *aq, splay_tree mem_map,
502 splay_tree_key n, uintptr_t attach_to, size_t bias,
503 struct gomp_coalesce_buf *cbufp)
504 {
505 struct splay_tree_key_s s;
506 size_t size, idx;
507
508 if (n == NULL)
509 {
510 gomp_mutex_unlock (&devicep->lock);
511 gomp_fatal ("enclosing struct not mapped for attach");
512 }
513
514 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515 /* We might have a pointer in a packed struct: however we cannot have more
516 than one such pointer in each pointer-sized portion of the struct, so
517 this is safe. */
518 idx = (attach_to - n->host_start) / sizeof (void *);
519
520 if (!n->aux)
521 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
522
523 if (!n->aux->attach_count)
524 n->aux->attach_count
525 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
526
527 if (n->aux->attach_count[idx] < UINTPTR_MAX)
528 n->aux->attach_count[idx]++;
529 else
530 {
531 gomp_mutex_unlock (&devicep->lock);
532 gomp_fatal ("attach count overflow");
533 }
534
535 if (n->aux->attach_count[idx] == 1)
536 {
537 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538 - n->host_start;
539 uintptr_t target = (uintptr_t) *(void **) attach_to;
540 splay_tree_key tn;
541 uintptr_t data;
542
543 if ((void *) target == NULL)
544 {
545 gomp_mutex_unlock (&devicep->lock);
546 gomp_fatal ("attempt to attach null pointer");
547 }
548
549 s.host_start = target + bias;
550 s.host_end = s.host_start + 1;
551 tn = splay_tree_lookup (mem_map, &s);
552
553 if (!tn)
554 {
555 gomp_mutex_unlock (&devicep->lock);
556 gomp_fatal ("pointer target not mapped for attach");
557 }
558
559 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
560
561 gomp_debug (1,
562 "%s: attaching host %p, target %p (struct base %p) to %p\n",
563 __FUNCTION__, (void *) attach_to, (void *) devptr,
564 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
565
566 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567 sizeof (void *), cbufp);
568 }
569 else
570 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571 (void *) attach_to, (int) n->aux->attach_count[idx]);
572 }
573
574 attribute_hidden void
575 gomp_detach_pointer (struct gomp_device_descr *devicep,
576 struct goacc_asyncqueue *aq, splay_tree_key n,
577 uintptr_t detach_from, bool finalize,
578 struct gomp_coalesce_buf *cbufp)
579 {
580 size_t idx;
581
582 if (n == NULL)
583 {
584 gomp_mutex_unlock (&devicep->lock);
585 gomp_fatal ("enclosing struct not mapped for detach");
586 }
587
588 idx = (detach_from - n->host_start) / sizeof (void *);
589
590 if (!n->aux || !n->aux->attach_count)
591 {
592 gomp_mutex_unlock (&devicep->lock);
593 gomp_fatal ("no attachment counters for struct");
594 }
595
596 if (finalize)
597 n->aux->attach_count[idx] = 1;
598
599 if (n->aux->attach_count[idx] == 0)
600 {
601 gomp_mutex_unlock (&devicep->lock);
602 gomp_fatal ("attach count underflow");
603 }
604 else
605 n->aux->attach_count[idx]--;
606
607 if (n->aux->attach_count[idx] == 0)
608 {
609 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610 - n->host_start;
611 uintptr_t target = (uintptr_t) *(void **) detach_from;
612
613 gomp_debug (1,
614 "%s: detaching host %p, target %p (struct base %p) to %p\n",
615 __FUNCTION__, (void *) detach_from, (void *) devptr,
616 (void *) (n->tgt->tgt_start + n->tgt_offset),
617 (void *) target);
618
619 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620 sizeof (void *), cbufp);
621 }
622 else
623 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624 (void *) detach_from, (int) n->aux->attach_count[idx]);
625 }
626
627 attribute_hidden uintptr_t
628 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
629 {
630 if (tgt->list[i].key != NULL)
631 return tgt->list[i].key->tgt->tgt_start
632 + tgt->list[i].key->tgt_offset
633 + tgt->list[i].offset;
634
635 switch (tgt->list[i].offset)
636 {
637 case OFFSET_INLINED:
638 return (uintptr_t) hostaddrs[i];
639
640 case OFFSET_POINTER:
641 return 0;
642
643 case OFFSET_STRUCT:
644 return tgt->list[i + 1].key->tgt->tgt_start
645 + tgt->list[i + 1].key->tgt_offset
646 + tgt->list[i + 1].offset
647 + (uintptr_t) hostaddrs[i]
648 - (uintptr_t) hostaddrs[i + 1];
649
650 default:
651 return tgt->tgt_start + tgt->list[i].offset;
652 }
653 }
654
655 static inline __attribute__((always_inline)) struct target_mem_desc *
656 gomp_map_vars_internal (struct gomp_device_descr *devicep,
657 struct goacc_asyncqueue *aq, size_t mapnum,
658 void **hostaddrs, void **devaddrs, size_t *sizes,
659 void *kinds, bool short_mapkind,
660 enum gomp_map_vars_kind pragma_kind)
661 {
662 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663 bool has_firstprivate = false;
664 const int rshift = short_mapkind ? 8 : 3;
665 const int typemask = short_mapkind ? 0xff : 0x7;
666 struct splay_tree_s *mem_map = &devicep->mem_map;
667 struct splay_tree_key_s cur_node;
668 struct target_mem_desc *tgt
669 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670 tgt->list_count = mapnum;
671 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672 tgt->device_descr = devicep;
673 tgt->prev = NULL;
674 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
675
676 if (mapnum == 0)
677 {
678 tgt->tgt_start = 0;
679 tgt->tgt_end = 0;
680 return tgt;
681 }
682
683 tgt_align = sizeof (void *);
684 tgt_size = 0;
685 cbuf.chunks = NULL;
686 cbuf.chunk_cnt = -1;
687 cbuf.use_cnt = 0;
688 cbuf.buf = NULL;
689 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
690 {
691 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693 cbuf.chunk_cnt = 0;
694 }
695 if (pragma_kind == GOMP_MAP_VARS_TARGET)
696 {
697 size_t align = 4 * sizeof (void *);
698 tgt_align = align;
699 tgt_size = mapnum * sizeof (void *);
700 cbuf.chunk_cnt = 1;
701 cbuf.use_cnt = 1 + (mapnum > 1);
702 cbuf.chunks[0].start = 0;
703 cbuf.chunks[0].end = tgt_size;
704 }
705
706 gomp_mutex_lock (&devicep->lock);
707 if (devicep->state == GOMP_DEVICE_FINALIZED)
708 {
709 gomp_mutex_unlock (&devicep->lock);
710 free (tgt);
711 return NULL;
712 }
713
714 for (i = 0; i < mapnum; i++)
715 {
716 int kind = get_kind (short_mapkind, kinds, i);
717 if (hostaddrs[i] == NULL
718 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
719 {
720 tgt->list[i].key = NULL;
721 tgt->list[i].offset = OFFSET_INLINED;
722 continue;
723 }
724 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
726 {
727 tgt->list[i].key = NULL;
728 if (!not_found_cnt)
729 {
730 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731 on a separate construct prior to using use_device_{addr,ptr}.
732 In OpenMP 5.0, map directives need to be ordered by the
733 middle-end before the use_device_* clauses. If
734 !not_found_cnt, all mappings requested (if any) are already
735 mapped, so use_device_{addr,ptr} can be resolved right away.
736 Otherwise, if not_found_cnt, gomp_map_lookup might fail
737 now but would succeed after performing the mappings in the
738 following loop. We can't defer this always to the second
739 loop, because it is not even invoked when !not_found_cnt
740 after the first loop. */
741 cur_node.host_start = (uintptr_t) hostaddrs[i];
742 cur_node.host_end = cur_node.host_start;
743 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744 if (n != NULL)
745 {
746 cur_node.host_start -= n->host_start;
747 hostaddrs[i]
748 = (void *) (n->tgt->tgt_start + n->tgt_offset
749 + cur_node.host_start);
750 }
751 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
752 {
753 gomp_mutex_unlock (&devicep->lock);
754 gomp_fatal ("use_device_ptr pointer wasn't mapped");
755 }
756 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757 /* If not present, continue using the host address. */
758 ;
759 else
760 __builtin_unreachable ();
761 tgt->list[i].offset = OFFSET_INLINED;
762 }
763 else
764 tgt->list[i].offset = 0;
765 continue;
766 }
767 else if ((kind & typemask) == GOMP_MAP_STRUCT)
768 {
769 size_t first = i + 1;
770 size_t last = i + sizes[i];
771 cur_node.host_start = (uintptr_t) hostaddrs[i];
772 cur_node.host_end = (uintptr_t) hostaddrs[last]
773 + sizes[last];
774 tgt->list[i].key = NULL;
775 tgt->list[i].offset = OFFSET_STRUCT;
776 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777 if (n == NULL)
778 {
779 size_t align = (size_t) 1 << (kind >> rshift);
780 if (tgt_align < align)
781 tgt_align = align;
782 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783 tgt_size = (tgt_size + align - 1) & ~(align - 1);
784 tgt_size += cur_node.host_end - cur_node.host_start;
785 not_found_cnt += last - i;
786 for (i = first; i <= last; i++)
787 {
788 tgt->list[i].key = NULL;
789 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790 & typemask))
791 gomp_coalesce_buf_add (&cbuf,
792 tgt_size - cur_node.host_end
793 + (uintptr_t) hostaddrs[i],
794 sizes[i]);
795 }
796 i--;
797 continue;
798 }
799 for (i = first; i <= last; i++)
800 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801 sizes, kinds, NULL);
802 i--;
803 continue;
804 }
805 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
806 {
807 tgt->list[i].key = NULL;
808 tgt->list[i].offset = OFFSET_POINTER;
809 has_firstprivate = true;
810 continue;
811 }
812 else if ((kind & typemask) == GOMP_MAP_ATTACH)
813 {
814 tgt->list[i].key = NULL;
815 has_firstprivate = true;
816 continue;
817 }
818 cur_node.host_start = (uintptr_t) hostaddrs[i];
819 if (!GOMP_MAP_POINTER_P (kind & typemask))
820 cur_node.host_end = cur_node.host_start + sizes[i];
821 else
822 cur_node.host_end = cur_node.host_start + sizeof (void *);
823 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824 {
825 tgt->list[i].key = NULL;
826
827 size_t align = (size_t) 1 << (kind >> rshift);
828 if (tgt_align < align)
829 tgt_align = align;
830 tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 gomp_coalesce_buf_add (&cbuf, tgt_size,
832 cur_node.host_end - cur_node.host_start);
833 tgt_size += cur_node.host_end - cur_node.host_start;
834 has_firstprivate = true;
835 continue;
836 }
837 splay_tree_key n;
838 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839 {
840 n = gomp_map_0len_lookup (mem_map, &cur_node);
841 if (!n)
842 {
843 tgt->list[i].key = NULL;
844 tgt->list[i].offset = OFFSET_POINTER;
845 continue;
846 }
847 }
848 else
849 n = splay_tree_lookup (mem_map, &cur_node);
850 if (n && n->refcount != REFCOUNT_LINK)
851 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 kind & typemask, NULL);
853 else
854 {
855 tgt->list[i].key = NULL;
856
857 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858 {
859 /* Not present, hence, skip entry - including its MAP_POINTER,
860 when existing. */
861 tgt->list[i].offset = OFFSET_POINTER;
862 if (i + 1 < mapnum
863 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 == GOMP_MAP_POINTER))
865 {
866 ++i;
867 tgt->list[i].key = NULL;
868 tgt->list[i].offset = 0;
869 }
870 continue;
871 }
872 size_t align = (size_t) 1 << (kind >> rshift);
873 not_found_cnt++;
874 if (tgt_align < align)
875 tgt_align = align;
876 tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 if (gomp_to_device_kind_p (kind & typemask))
878 gomp_coalesce_buf_add (&cbuf, tgt_size,
879 cur_node.host_end - cur_node.host_start);
880 tgt_size += cur_node.host_end - cur_node.host_start;
881 if ((kind & typemask) == GOMP_MAP_TO_PSET)
882 {
883 size_t j;
884 for (j = i + 1; j < mapnum; j++)
885 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 & typemask))
887 break;
888 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 > cur_node.host_end))
891 break;
892 else
893 {
894 tgt->list[j].key = NULL;
895 i++;
896 }
897 }
898 }
899 }
900
901 if (devaddrs)
902 {
903 if (mapnum != 1)
904 {
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("unexpected aggregation");
907 }
908 tgt->to_free = devaddrs[0];
909 tgt->tgt_start = (uintptr_t) tgt->to_free;
910 tgt->tgt_end = tgt->tgt_start + sizes[0];
911 }
912 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913 {
914 /* Allocate tgt_align aligned tgt_size block of memory. */
915 /* FIXME: Perhaps change interface to allocate properly aligned
916 memory. */
917 tgt->to_free = devicep->alloc_func (devicep->target_id,
918 tgt_size + tgt_align - 1);
919 if (!tgt->to_free)
920 {
921 gomp_mutex_unlock (&devicep->lock);
922 gomp_fatal ("device memory allocation fail");
923 }
924
925 tgt->tgt_start = (uintptr_t) tgt->to_free;
926 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927 tgt->tgt_end = tgt->tgt_start + tgt_size;
928
929 if (cbuf.use_cnt == 1)
930 cbuf.chunk_cnt--;
931 if (cbuf.chunk_cnt > 0)
932 {
933 cbuf.buf
934 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 if (cbuf.buf)
936 {
937 cbuf.tgt = tgt;
938 cbufp = &cbuf;
939 }
940 }
941 }
942 else
943 {
944 tgt->to_free = NULL;
945 tgt->tgt_start = 0;
946 tgt->tgt_end = 0;
947 }
948
949 tgt_size = 0;
950 if (pragma_kind == GOMP_MAP_VARS_TARGET)
951 tgt_size = mapnum * sizeof (void *);
952
953 tgt->array = NULL;
954 if (not_found_cnt || has_firstprivate)
955 {
956 if (not_found_cnt)
957 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958 splay_tree_node array = tgt->array;
959 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960 uintptr_t field_tgt_base = 0;
961
962 for (i = 0; i < mapnum; i++)
963 if (tgt->list[i].key == NULL)
964 {
965 int kind = get_kind (short_mapkind, kinds, i);
966 if (hostaddrs[i] == NULL)
967 continue;
968 switch (kind & typemask)
969 {
970 size_t align, len, first, last;
971 splay_tree_key n;
972 case GOMP_MAP_FIRSTPRIVATE:
973 align = (size_t) 1 << (kind >> rshift);
974 tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 tgt->list[i].offset = tgt_size;
976 len = sizes[i];
977 gomp_copy_host2dev (devicep, aq,
978 (void *) (tgt->tgt_start + tgt_size),
979 (void *) hostaddrs[i], len, cbufp);
980 tgt_size += len;
981 continue;
982 case GOMP_MAP_FIRSTPRIVATE_INT:
983 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 continue;
985 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 /* The OpenACC 'host_data' construct only allows 'use_device'
987 "mapping" clauses, so in the first loop, 'not_found_cnt'
988 must always have been zero, so all OpenACC 'use_device'
989 clauses have already been handled. (We can only easily test
990 'use_device' with 'if_present' clause here.) */
991 assert (tgt->list[i].offset == OFFSET_INLINED);
992 /* Nevertheless, FALLTHRU to the normal handling, to keep the
993 code conceptually simple, similar to the first loop. */
994 case GOMP_MAP_USE_DEVICE_PTR:
995 if (tgt->list[i].offset == 0)
996 {
997 cur_node.host_start = (uintptr_t) hostaddrs[i];
998 cur_node.host_end = cur_node.host_start;
999 n = gomp_map_lookup (mem_map, &cur_node);
1000 if (n != NULL)
1001 {
1002 cur_node.host_start -= n->host_start;
1003 hostaddrs[i]
1004 = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 + cur_node.host_start);
1006 }
1007 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008 {
1009 gomp_mutex_unlock (&devicep->lock);
1010 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011 }
1012 else if ((kind & typemask)
1013 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 /* If not present, continue using the host address. */
1015 ;
1016 else
1017 __builtin_unreachable ();
1018 tgt->list[i].offset = OFFSET_INLINED;
1019 }
1020 continue;
1021 case GOMP_MAP_STRUCT:
1022 first = i + 1;
1023 last = i + sizes[i];
1024 cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 + sizes[last];
1027 if (tgt->list[first].key != NULL)
1028 continue;
1029 n = splay_tree_lookup (mem_map, &cur_node);
1030 if (n == NULL)
1031 {
1032 size_t align = (size_t) 1 << (kind >> rshift);
1033 tgt_size -= (uintptr_t) hostaddrs[first]
1034 - (uintptr_t) hostaddrs[i];
1035 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 tgt_size += (uintptr_t) hostaddrs[first]
1037 - (uintptr_t) hostaddrs[i];
1038 field_tgt_base = (uintptr_t) hostaddrs[first];
1039 field_tgt_offset = tgt_size;
1040 field_tgt_clear = last;
1041 tgt_size += cur_node.host_end
1042 - (uintptr_t) hostaddrs[first];
1043 continue;
1044 }
1045 for (i = first; i <= last; i++)
1046 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 sizes, kinds, cbufp);
1048 i--;
1049 continue;
1050 case GOMP_MAP_ALWAYS_POINTER:
1051 cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 n = splay_tree_lookup (mem_map, &cur_node);
1054 if (n == NULL
1055 || n->host_start > cur_node.host_start
1056 || n->host_end < cur_node.host_end)
1057 {
1058 gomp_mutex_unlock (&devicep->lock);
1059 gomp_fatal ("always pointer not mapped");
1060 }
1061 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 != GOMP_MAP_ALWAYS_POINTER)
1063 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 if (cur_node.tgt_offset)
1065 cur_node.tgt_offset -= sizes[i];
1066 gomp_copy_host2dev (devicep, aq,
1067 (void *) (n->tgt->tgt_start
1068 + n->tgt_offset
1069 + cur_node.host_start
1070 - n->host_start),
1071 (void *) &cur_node.tgt_offset,
1072 sizeof (void *), cbufp);
1073 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 + cur_node.host_start - n->host_start;
1075 continue;
1076 case GOMP_MAP_IF_PRESENT:
1077 /* Not present - otherwise handled above. Skip over its
1078 MAP_POINTER as well. */
1079 if (i + 1 < mapnum
1080 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 == GOMP_MAP_POINTER))
1082 ++i;
1083 continue;
1084 case GOMP_MAP_ATTACH:
1085 {
1086 cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 if (n != NULL)
1090 {
1091 tgt->list[i].key = n;
1092 tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 tgt->list[i].length = n->host_end - n->host_start;
1094 tgt->list[i].copy_from = false;
1095 tgt->list[i].always_copy_from = false;
1096 tgt->list[i].is_attach = true;
1097 /* OpenACC 'attach'/'detach' doesn't affect
1098 structured/dynamic reference counts ('n->refcount',
1099 'n->dynamic_refcount'). */
1100 }
1101 else
1102 {
1103 gomp_mutex_unlock (&devicep->lock);
1104 gomp_fatal ("outer struct not mapped for attach");
1105 }
1106 gomp_attach_pointer (devicep, aq, mem_map, n,
1107 (uintptr_t) hostaddrs[i], sizes[i],
1108 cbufp);
1109 continue;
1110 }
1111 default:
1112 break;
1113 }
1114 splay_tree_key k = &array->key;
1115 k->host_start = (uintptr_t) hostaddrs[i];
1116 if (!GOMP_MAP_POINTER_P (kind & typemask))
1117 k->host_end = k->host_start + sizes[i];
1118 else
1119 k->host_end = k->host_start + sizeof (void *);
1120 splay_tree_key n = splay_tree_lookup (mem_map, k);
1121 if (n && n->refcount != REFCOUNT_LINK)
1122 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1123 kind & typemask, cbufp);
1124 else
1125 {
1126 k->aux = NULL;
1127 if (n && n->refcount == REFCOUNT_LINK)
1128 {
1129 /* Replace target address of the pointer with target address
1130 of mapped object in the splay tree. */
1131 splay_tree_remove (mem_map, n);
1132 k->aux
1133 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1134 k->aux->link_key = n;
1135 }
1136 size_t align = (size_t) 1 << (kind >> rshift);
1137 tgt->list[i].key = k;
1138 k->tgt = tgt;
1139 if (field_tgt_clear != FIELD_TGT_EMPTY)
1140 {
1141 k->tgt_offset = k->host_start - field_tgt_base
1142 + field_tgt_offset;
1143 if (i == field_tgt_clear)
1144 field_tgt_clear = FIELD_TGT_EMPTY;
1145 }
1146 else
1147 {
1148 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1149 k->tgt_offset = tgt_size;
1150 tgt_size += k->host_end - k->host_start;
1151 }
1152 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1153 tgt->list[i].always_copy_from
1154 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1155 tgt->list[i].is_attach = false;
1156 tgt->list[i].offset = 0;
1157 tgt->list[i].length = k->host_end - k->host_start;
1158 k->refcount = 1;
1159 k->dynamic_refcount = 0;
1160 tgt->refcount++;
1161 array->left = NULL;
1162 array->right = NULL;
1163 splay_tree_insert (mem_map, array);
1164 switch (kind & typemask)
1165 {
1166 case GOMP_MAP_ALLOC:
1167 case GOMP_MAP_FROM:
1168 case GOMP_MAP_FORCE_ALLOC:
1169 case GOMP_MAP_FORCE_FROM:
1170 case GOMP_MAP_ALWAYS_FROM:
1171 break;
1172 case GOMP_MAP_TO:
1173 case GOMP_MAP_TOFROM:
1174 case GOMP_MAP_FORCE_TO:
1175 case GOMP_MAP_FORCE_TOFROM:
1176 case GOMP_MAP_ALWAYS_TO:
1177 case GOMP_MAP_ALWAYS_TOFROM:
1178 gomp_copy_host2dev (devicep, aq,
1179 (void *) (tgt->tgt_start
1180 + k->tgt_offset),
1181 (void *) k->host_start,
1182 k->host_end - k->host_start, cbufp);
1183 break;
1184 case GOMP_MAP_POINTER:
1185 gomp_map_pointer (tgt, aq,
1186 (uintptr_t) *(void **) k->host_start,
1187 k->tgt_offset, sizes[i], cbufp);
1188 break;
1189 case GOMP_MAP_TO_PSET:
1190 gomp_copy_host2dev (devicep, aq,
1191 (void *) (tgt->tgt_start
1192 + k->tgt_offset),
1193 (void *) k->host_start,
1194 k->host_end - k->host_start, cbufp);
1195
1196 for (j = i + 1; j < mapnum; j++)
1197 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198 j)
1199 & typemask))
1200 break;
1201 else if ((uintptr_t) hostaddrs[j] < k->host_start
1202 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203 > k->host_end))
1204 break;
1205 else
1206 {
1207 tgt->list[j].key = k;
1208 tgt->list[j].copy_from = false;
1209 tgt->list[j].always_copy_from = false;
1210 tgt->list[j].is_attach = false;
1211 if (k->refcount != REFCOUNT_INFINITY)
1212 k->refcount++;
1213 gomp_map_pointer (tgt, aq,
1214 (uintptr_t) *(void **) hostaddrs[j],
1215 k->tgt_offset
1216 + ((uintptr_t) hostaddrs[j]
1217 - k->host_start),
1218 sizes[j], cbufp);
1219 i++;
1220 }
1221 break;
1222 case GOMP_MAP_FORCE_PRESENT:
1223 {
1224 /* We already looked up the memory region above and it
1225 was missing. */
1226 size_t size = k->host_end - k->host_start;
1227 gomp_mutex_unlock (&devicep->lock);
1228 #ifdef HAVE_INTTYPES_H
1229 gomp_fatal ("present clause: !acc_is_present (%p, "
1230 "%"PRIu64" (0x%"PRIx64"))",
1231 (void *) k->host_start,
1232 (uint64_t) size, (uint64_t) size);
1233 #else
1234 gomp_fatal ("present clause: !acc_is_present (%p, "
1235 "%lu (0x%lx))", (void *) k->host_start,
1236 (unsigned long) size, (unsigned long) size);
1237 #endif
1238 }
1239 break;
1240 case GOMP_MAP_FORCE_DEVICEPTR:
1241 assert (k->host_end - k->host_start == sizeof (void *));
1242 gomp_copy_host2dev (devicep, aq,
1243 (void *) (tgt->tgt_start
1244 + k->tgt_offset),
1245 (void *) k->host_start,
1246 sizeof (void *), cbufp);
1247 break;
1248 default:
1249 gomp_mutex_unlock (&devicep->lock);
1250 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1251 kind);
1252 }
1253
1254 if (k->aux && k->aux->link_key)
1255 {
1256 /* Set link pointer on target to the device address of the
1257 mapped object. */
1258 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1259 /* We intentionally do not use coalescing here, as it's not
1260 data allocated by the current call to this function. */
1261 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1262 &tgt_addr, sizeof (void *), NULL);
1263 }
1264 array++;
1265 }
1266 }
1267 }
1268
1269 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270 {
1271 for (i = 0; i < mapnum; i++)
1272 {
1273 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1274 gomp_copy_host2dev (devicep, aq,
1275 (void *) (tgt->tgt_start + i * sizeof (void *)),
1276 (void *) &cur_node.tgt_offset, sizeof (void *),
1277 cbufp);
1278 }
1279 }
1280
1281 if (cbufp)
1282 {
1283 long c = 0;
1284 for (c = 0; c < cbuf.chunk_cnt; ++c)
1285 gomp_copy_host2dev (devicep, aq,
1286 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1287 (char *) cbuf.buf + (cbuf.chunks[c].start
1288 - cbuf.chunks[0].start),
1289 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1290 free (cbuf.buf);
1291 cbuf.buf = NULL;
1292 cbufp = NULL;
1293 }
1294
1295 /* If the variable from "omp target enter data" map-list was already mapped,
1296 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1297 gomp_exit_data. */
1298 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299 {
1300 free (tgt);
1301 tgt = NULL;
1302 }
1303
1304 gomp_mutex_unlock (&devicep->lock);
1305 return tgt;
1306 }
1307
1308 attribute_hidden struct target_mem_desc *
1309 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1310 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1311 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312 {
1313 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1314 sizes, kinds, short_mapkind, pragma_kind);
1315 }
1316
1317 attribute_hidden struct target_mem_desc *
1318 gomp_map_vars_async (struct gomp_device_descr *devicep,
1319 struct goacc_asyncqueue *aq, size_t mapnum,
1320 void **hostaddrs, void **devaddrs, size_t *sizes,
1321 void *kinds, bool short_mapkind,
1322 enum gomp_map_vars_kind pragma_kind)
1323 {
1324 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1325 sizes, kinds, short_mapkind, pragma_kind);
1326 }
1327
1328 static void
1329 gomp_unmap_tgt (struct target_mem_desc *tgt)
1330 {
1331 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1332 if (tgt->tgt_end)
1333 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334
1335 free (tgt->array);
1336 free (tgt);
1337 }
1338
1339 static bool
1340 gomp_unref_tgt (void *ptr)
1341 {
1342 bool is_tgt_unmapped = false;
1343
1344 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345
1346 if (tgt->refcount > 1)
1347 tgt->refcount--;
1348 else
1349 {
1350 gomp_unmap_tgt (tgt);
1351 is_tgt_unmapped = true;
1352 }
1353
1354 return is_tgt_unmapped;
1355 }
1356
1357 static void
1358 gomp_unref_tgt_void (void *ptr)
1359 {
1360 (void) gomp_unref_tgt (ptr);
1361 }
1362
1363 static inline __attribute__((always_inline)) bool
1364 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1365 struct goacc_asyncqueue *aq)
1366 {
1367 bool is_tgt_unmapped = false;
1368 splay_tree_remove (&devicep->mem_map, k);
1369 if (k->aux)
1370 {
1371 if (k->aux->link_key)
1372 splay_tree_insert (&devicep->mem_map,
1373 (splay_tree_node) k->aux->link_key);
1374 if (k->aux->attach_count)
1375 free (k->aux->attach_count);
1376 free (k->aux);
1377 k->aux = NULL;
1378 }
1379 if (aq)
1380 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1381 (void *) k->tgt);
1382 else
1383 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1384 return is_tgt_unmapped;
1385 }
1386
1387 attribute_hidden bool
1388 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389 {
1390 return gomp_remove_var_internal (devicep, k, NULL);
1391 }
1392
1393 /* Remove a variable asynchronously. This actually removes the variable
1394 mapping immediately, but retains the linked target_mem_desc until the
1395 asynchronous operation has completed (as it may still refer to target
1396 memory). The device lock must be held before entry, and remains locked on
1397 exit. */
1398
1399 attribute_hidden void
1400 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1401 struct goacc_asyncqueue *aq)
1402 {
1403 (void) gomp_remove_var_internal (devicep, k, aq);
1404 }
1405
1406 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1407 variables back from device to host: if it is false, it is assumed that this
1408 has been done already. */
1409
1410 static inline __attribute__((always_inline)) void
1411 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1412 struct goacc_asyncqueue *aq)
1413 {
1414 struct gomp_device_descr *devicep = tgt->device_descr;
1415
1416 if (tgt->list_count == 0)
1417 {
1418 free (tgt);
1419 return;
1420 }
1421
1422 gomp_mutex_lock (&devicep->lock);
1423 if (devicep->state == GOMP_DEVICE_FINALIZED)
1424 {
1425 gomp_mutex_unlock (&devicep->lock);
1426 free (tgt->array);
1427 free (tgt);
1428 return;
1429 }
1430
1431 size_t i;
1432
1433 /* We must perform detachments before any copies back to the host. */
1434 for (i = 0; i < tgt->list_count; i++)
1435 {
1436 splay_tree_key k = tgt->list[i].key;
1437
1438 if (k != NULL && tgt->list[i].is_attach)
1439 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1440 + tgt->list[i].offset,
1441 false, NULL);
1442 }
1443
1444 for (i = 0; i < tgt->list_count; i++)
1445 {
1446 splay_tree_key k = tgt->list[i].key;
1447 if (k == NULL)
1448 continue;
1449
1450 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1451 counts ('n->refcount', 'n->dynamic_refcount'). */
1452 if (tgt->list[i].is_attach)
1453 continue;
1454
1455 bool do_unmap = false;
1456 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1457 k->refcount--;
1458 else if (k->refcount == 1)
1459 {
1460 k->refcount--;
1461 do_unmap = true;
1462 }
1463
1464 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1465 || tgt->list[i].always_copy_from)
1466 gomp_copy_dev2host (devicep, aq,
1467 (void *) (k->host_start + tgt->list[i].offset),
1468 (void *) (k->tgt->tgt_start + k->tgt_offset
1469 + tgt->list[i].offset),
1470 tgt->list[i].length);
1471 if (do_unmap)
1472 {
1473 struct target_mem_desc *k_tgt = k->tgt;
1474 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1475 /* It would be bad if TGT got unmapped while we're still iterating
1476 over its LIST_COUNT, and also expect to use it in the following
1477 code. */
1478 assert (!is_tgt_unmapped
1479 || k_tgt != tgt);
1480 }
1481 }
1482
1483 if (aq)
1484 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1485 (void *) tgt);
1486 else
1487 gomp_unref_tgt ((void *) tgt);
1488
1489 gomp_mutex_unlock (&devicep->lock);
1490 }
1491
1492 attribute_hidden void
1493 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1494 {
1495 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1496 }
1497
1498 attribute_hidden void
1499 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1500 struct goacc_asyncqueue *aq)
1501 {
1502 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1503 }
1504
1505 static void
1506 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1507 size_t *sizes, void *kinds, bool short_mapkind)
1508 {
1509 size_t i;
1510 struct splay_tree_key_s cur_node;
1511 const int typemask = short_mapkind ? 0xff : 0x7;
1512
1513 if (!devicep)
1514 return;
1515
1516 if (mapnum == 0)
1517 return;
1518
1519 gomp_mutex_lock (&devicep->lock);
1520 if (devicep->state == GOMP_DEVICE_FINALIZED)
1521 {
1522 gomp_mutex_unlock (&devicep->lock);
1523 return;
1524 }
1525
1526 for (i = 0; i < mapnum; i++)
1527 if (sizes[i])
1528 {
1529 cur_node.host_start = (uintptr_t) hostaddrs[i];
1530 cur_node.host_end = cur_node.host_start + sizes[i];
1531 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1532 if (n)
1533 {
1534 int kind = get_kind (short_mapkind, kinds, i);
1535 if (n->host_start > cur_node.host_start
1536 || n->host_end < cur_node.host_end)
1537 {
1538 gomp_mutex_unlock (&devicep->lock);
1539 gomp_fatal ("Trying to update [%p..%p) object when "
1540 "only [%p..%p) is mapped",
1541 (void *) cur_node.host_start,
1542 (void *) cur_node.host_end,
1543 (void *) n->host_start,
1544 (void *) n->host_end);
1545 }
1546
1547
1548 void *hostaddr = (void *) cur_node.host_start;
1549 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1550 + cur_node.host_start - n->host_start);
1551 size_t size = cur_node.host_end - cur_node.host_start;
1552
1553 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1554 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1555 NULL);
1556 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1557 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1558 }
1559 }
1560 gomp_mutex_unlock (&devicep->lock);
1561 }
1562
1563 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1564 And insert to splay tree the mapping between addresses from HOST_TABLE and
1565 from loaded target image. We rely in the host and device compiler
1566 emitting variable and functions in the same order. */
1567
1568 static void
1569 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1570 const void *host_table, const void *target_data,
1571 bool is_register_lock)
1572 {
1573 void **host_func_table = ((void ***) host_table)[0];
1574 void **host_funcs_end = ((void ***) host_table)[1];
1575 void **host_var_table = ((void ***) host_table)[2];
1576 void **host_vars_end = ((void ***) host_table)[3];
1577
1578 /* The func table contains only addresses, the var table contains addresses
1579 and corresponding sizes. */
1580 int num_funcs = host_funcs_end - host_func_table;
1581 int num_vars = (host_vars_end - host_var_table) / 2;
1582
1583 /* Load image to device and get target addresses for the image. */
1584 struct addr_pair *target_table = NULL;
1585 int i, num_target_entries;
1586
1587 num_target_entries
1588 = devicep->load_image_func (devicep->target_id, version,
1589 target_data, &target_table);
1590
1591 if (num_target_entries != num_funcs + num_vars)
1592 {
1593 gomp_mutex_unlock (&devicep->lock);
1594 if (is_register_lock)
1595 gomp_mutex_unlock (&register_lock);
1596 gomp_fatal ("Cannot map target functions or variables"
1597 " (expected %u, have %u)", num_funcs + num_vars,
1598 num_target_entries);
1599 }
1600
1601 /* Insert host-target address mapping into splay tree. */
1602 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1603 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1604 tgt->refcount = REFCOUNT_INFINITY;
1605 tgt->tgt_start = 0;
1606 tgt->tgt_end = 0;
1607 tgt->to_free = NULL;
1608 tgt->prev = NULL;
1609 tgt->list_count = 0;
1610 tgt->device_descr = devicep;
1611 splay_tree_node array = tgt->array;
1612
1613 for (i = 0; i < num_funcs; i++)
1614 {
1615 splay_tree_key k = &array->key;
1616 k->host_start = (uintptr_t) host_func_table[i];
1617 k->host_end = k->host_start + 1;
1618 k->tgt = tgt;
1619 k->tgt_offset = target_table[i].start;
1620 k->refcount = REFCOUNT_INFINITY;
1621 k->dynamic_refcount = 0;
1622 k->aux = NULL;
1623 array->left = NULL;
1624 array->right = NULL;
1625 splay_tree_insert (&devicep->mem_map, array);
1626 array++;
1627 }
1628
1629 /* Most significant bit of the size in host and target tables marks
1630 "omp declare target link" variables. */
1631 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1632 const uintptr_t size_mask = ~link_bit;
1633
1634 for (i = 0; i < num_vars; i++)
1635 {
1636 struct addr_pair *target_var = &target_table[num_funcs + i];
1637 uintptr_t target_size = target_var->end - target_var->start;
1638 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1639
1640 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1641 {
1642 gomp_mutex_unlock (&devicep->lock);
1643 if (is_register_lock)
1644 gomp_mutex_unlock (&register_lock);
1645 gomp_fatal ("Cannot map target variables (size mismatch)");
1646 }
1647
1648 splay_tree_key k = &array->key;
1649 k->host_start = (uintptr_t) host_var_table[i * 2];
1650 k->host_end
1651 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1652 k->tgt = tgt;
1653 k->tgt_offset = target_var->start;
1654 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1655 k->dynamic_refcount = 0;
1656 k->aux = NULL;
1657 array->left = NULL;
1658 array->right = NULL;
1659 splay_tree_insert (&devicep->mem_map, array);
1660 array++;
1661 }
1662
1663 free (target_table);
1664 }
1665
1666 /* Unload the mappings described by target_data from device DEVICE_P.
1667 The device must be locked. */
1668
1669 static void
1670 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1671 unsigned version,
1672 const void *host_table, const void *target_data)
1673 {
1674 void **host_func_table = ((void ***) host_table)[0];
1675 void **host_funcs_end = ((void ***) host_table)[1];
1676 void **host_var_table = ((void ***) host_table)[2];
1677 void **host_vars_end = ((void ***) host_table)[3];
1678
1679 /* The func table contains only addresses, the var table contains addresses
1680 and corresponding sizes. */
1681 int num_funcs = host_funcs_end - host_func_table;
1682 int num_vars = (host_vars_end - host_var_table) / 2;
1683
1684 struct splay_tree_key_s k;
1685 splay_tree_key node = NULL;
1686
1687 /* Find mapping at start of node array */
1688 if (num_funcs || num_vars)
1689 {
1690 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1691 : (uintptr_t) host_var_table[0]);
1692 k.host_end = k.host_start + 1;
1693 node = splay_tree_lookup (&devicep->mem_map, &k);
1694 }
1695
1696 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1697 {
1698 gomp_mutex_unlock (&devicep->lock);
1699 gomp_fatal ("image unload fail");
1700 }
1701
1702 /* Remove mappings from splay tree. */
1703 int i;
1704 for (i = 0; i < num_funcs; i++)
1705 {
1706 k.host_start = (uintptr_t) host_func_table[i];
1707 k.host_end = k.host_start + 1;
1708 splay_tree_remove (&devicep->mem_map, &k);
1709 }
1710
1711 /* Most significant bit of the size in host and target tables marks
1712 "omp declare target link" variables. */
1713 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1714 const uintptr_t size_mask = ~link_bit;
1715 bool is_tgt_unmapped = false;
1716
1717 for (i = 0; i < num_vars; i++)
1718 {
1719 k.host_start = (uintptr_t) host_var_table[i * 2];
1720 k.host_end
1721 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1722
1723 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1724 splay_tree_remove (&devicep->mem_map, &k);
1725 else
1726 {
1727 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1728 is_tgt_unmapped = gomp_remove_var (devicep, n);
1729 }
1730 }
1731
1732 if (node && !is_tgt_unmapped)
1733 {
1734 free (node->tgt);
1735 free (node);
1736 }
1737 }
1738
1739 /* This function should be called from every offload image while loading.
1740 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1741 the target, and TARGET_DATA needed by target plugin. */
1742
1743 void
1744 GOMP_offload_register_ver (unsigned version, const void *host_table,
1745 int target_type, const void *target_data)
1746 {
1747 int i;
1748
1749 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1750 gomp_fatal ("Library too old for offload (version %u < %u)",
1751 GOMP_VERSION, GOMP_VERSION_LIB (version));
1752
1753 gomp_mutex_lock (&register_lock);
1754
1755 /* Load image to all initialized devices. */
1756 for (i = 0; i < num_devices; i++)
1757 {
1758 struct gomp_device_descr *devicep = &devices[i];
1759 gomp_mutex_lock (&devicep->lock);
1760 if (devicep->type == target_type
1761 && devicep->state == GOMP_DEVICE_INITIALIZED)
1762 gomp_load_image_to_device (devicep, version,
1763 host_table, target_data, true);
1764 gomp_mutex_unlock (&devicep->lock);
1765 }
1766
1767 /* Insert image to array of pending images. */
1768 offload_images
1769 = gomp_realloc_unlock (offload_images,
1770 (num_offload_images + 1)
1771 * sizeof (struct offload_image_descr));
1772 offload_images[num_offload_images].version = version;
1773 offload_images[num_offload_images].type = target_type;
1774 offload_images[num_offload_images].host_table = host_table;
1775 offload_images[num_offload_images].target_data = target_data;
1776
1777 num_offload_images++;
1778 gomp_mutex_unlock (&register_lock);
1779 }
1780
1781 void
1782 GOMP_offload_register (const void *host_table, int target_type,
1783 const void *target_data)
1784 {
1785 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1786 }
1787
1788 /* This function should be called from every offload image while unloading.
1789 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1790 the target, and TARGET_DATA needed by target plugin. */
1791
1792 void
1793 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1794 int target_type, const void *target_data)
1795 {
1796 int i;
1797
1798 gomp_mutex_lock (&register_lock);
1799
1800 /* Unload image from all initialized devices. */
1801 for (i = 0; i < num_devices; i++)
1802 {
1803 struct gomp_device_descr *devicep = &devices[i];
1804 gomp_mutex_lock (&devicep->lock);
1805 if (devicep->type == target_type
1806 && devicep->state == GOMP_DEVICE_INITIALIZED)
1807 gomp_unload_image_from_device (devicep, version,
1808 host_table, target_data);
1809 gomp_mutex_unlock (&devicep->lock);
1810 }
1811
1812 /* Remove image from array of pending images. */
1813 for (i = 0; i < num_offload_images; i++)
1814 if (offload_images[i].target_data == target_data)
1815 {
1816 offload_images[i] = offload_images[--num_offload_images];
1817 break;
1818 }
1819
1820 gomp_mutex_unlock (&register_lock);
1821 }
1822
1823 void
1824 GOMP_offload_unregister (const void *host_table, int target_type,
1825 const void *target_data)
1826 {
1827 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1828 }
1829
1830 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1831 must be locked on entry, and remains locked on return. */
1832
1833 attribute_hidden void
1834 gomp_init_device (struct gomp_device_descr *devicep)
1835 {
1836 int i;
1837 if (!devicep->init_device_func (devicep->target_id))
1838 {
1839 gomp_mutex_unlock (&devicep->lock);
1840 gomp_fatal ("device initialization failed");
1841 }
1842
1843 /* Load to device all images registered by the moment. */
1844 for (i = 0; i < num_offload_images; i++)
1845 {
1846 struct offload_image_descr *image = &offload_images[i];
1847 if (image->type == devicep->type)
1848 gomp_load_image_to_device (devicep, image->version,
1849 image->host_table, image->target_data,
1850 false);
1851 }
1852
1853 /* Initialize OpenACC asynchronous queues. */
1854 goacc_init_asyncqueues (devicep);
1855
1856 devicep->state = GOMP_DEVICE_INITIALIZED;
1857 }
1858
1859 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1860 must be locked on entry, and remains locked on return. */
1861
1862 attribute_hidden bool
1863 gomp_fini_device (struct gomp_device_descr *devicep)
1864 {
1865 bool ret = goacc_fini_asyncqueues (devicep);
1866 ret &= devicep->fini_device_func (devicep->target_id);
1867 devicep->state = GOMP_DEVICE_FINALIZED;
1868 return ret;
1869 }
1870
1871 attribute_hidden void
1872 gomp_unload_device (struct gomp_device_descr *devicep)
1873 {
1874 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1875 {
1876 unsigned i;
1877
1878 /* Unload from device all images registered at the moment. */
1879 for (i = 0; i < num_offload_images; i++)
1880 {
1881 struct offload_image_descr *image = &offload_images[i];
1882 if (image->type == devicep->type)
1883 gomp_unload_image_from_device (devicep, image->version,
1884 image->host_table,
1885 image->target_data);
1886 }
1887 }
1888 }
1889
1890 /* Host fallback for GOMP_target{,_ext} routines. */
1891
1892 static void
1893 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1894 {
1895 struct gomp_thread old_thr, *thr = gomp_thread ();
1896 old_thr = *thr;
1897 memset (thr, '\0', sizeof (*thr));
1898 if (gomp_places_list)
1899 {
1900 thr->place = old_thr.place;
1901 thr->ts.place_partition_len = gomp_places_list_len;
1902 }
1903 fn (hostaddrs);
1904 gomp_free_thread (thr);
1905 *thr = old_thr;
1906 }
1907
1908 /* Calculate alignment and size requirements of a private copy of data shared
1909 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1910
1911 static inline void
1912 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1913 unsigned short *kinds, size_t *tgt_align,
1914 size_t *tgt_size)
1915 {
1916 size_t i;
1917 for (i = 0; i < mapnum; i++)
1918 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1919 {
1920 size_t align = (size_t) 1 << (kinds[i] >> 8);
1921 if (*tgt_align < align)
1922 *tgt_align = align;
1923 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1924 *tgt_size += sizes[i];
1925 }
1926 }
1927
1928 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1929
1930 static inline void
1931 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1932 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1933 size_t tgt_size)
1934 {
1935 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1936 if (al)
1937 tgt += tgt_align - al;
1938 tgt_size = 0;
1939 size_t i;
1940 for (i = 0; i < mapnum; i++)
1941 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1942 {
1943 size_t align = (size_t) 1 << (kinds[i] >> 8);
1944 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1945 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1946 hostaddrs[i] = tgt + tgt_size;
1947 tgt_size = tgt_size + sizes[i];
1948 }
1949 }
1950
1951 /* Helper function of GOMP_target{,_ext} routines. */
1952
1953 static void *
1954 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1955 void (*host_fn) (void *))
1956 {
1957 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1958 return (void *) host_fn;
1959 else
1960 {
1961 gomp_mutex_lock (&devicep->lock);
1962 if (devicep->state == GOMP_DEVICE_FINALIZED)
1963 {
1964 gomp_mutex_unlock (&devicep->lock);
1965 return NULL;
1966 }
1967
1968 struct splay_tree_key_s k;
1969 k.host_start = (uintptr_t) host_fn;
1970 k.host_end = k.host_start + 1;
1971 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1972 gomp_mutex_unlock (&devicep->lock);
1973 if (tgt_fn == NULL)
1974 return NULL;
1975
1976 return (void *) tgt_fn->tgt_offset;
1977 }
1978 }
1979
1980 /* Called when encountering a target directive. If DEVICE
1981 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1982 GOMP_DEVICE_HOST_FALLBACK (or any value
1983 larger than last available hw device), use host fallback.
1984 FN is address of host code, UNUSED is part of the current ABI, but
1985 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1986 with MAPNUM entries, with addresses of the host objects,
1987 sizes of the host objects (resp. for pointer kind pointer bias
1988 and assumed sizeof (void *) size) and kinds. */
1989
1990 void
1991 GOMP_target (int device, void (*fn) (void *), const void *unused,
1992 size_t mapnum, void **hostaddrs, size_t *sizes,
1993 unsigned char *kinds)
1994 {
1995 struct gomp_device_descr *devicep = resolve_device (device);
1996
1997 void *fn_addr;
1998 if (devicep == NULL
1999 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2000 /* All shared memory devices should use the GOMP_target_ext function. */
2001 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2002 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2003 return gomp_target_fallback (fn, hostaddrs);
2004
2005 struct target_mem_desc *tgt_vars
2006 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2007 GOMP_MAP_VARS_TARGET);
2008 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2009 NULL);
2010 gomp_unmap_vars (tgt_vars, true);
2011 }
2012
2013 static inline unsigned int
2014 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2015 {
2016 /* If we cannot run asynchronously, simply ignore nowait. */
2017 if (devicep != NULL && devicep->async_run_func == NULL)
2018 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2019
2020 return flags;
2021 }
2022
2023 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2024 and several arguments have been added:
2025 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2026 DEPEND is array of dependencies, see GOMP_task for details.
2027
2028 ARGS is a pointer to an array consisting of a variable number of both
2029 device-independent and device-specific arguments, which can take one two
2030 elements where the first specifies for which device it is intended, the type
2031 and optionally also the value. If the value is not present in the first
2032 one, the whole second element the actual value. The last element of the
2033 array is a single NULL. Among the device independent can be for example
2034 NUM_TEAMS and THREAD_LIMIT.
2035
2036 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2037 that value, or 1 if teams construct is not present, or 0, if
2038 teams construct does not have num_teams clause and so the choice is
2039 implementation defined, and -1 if it can't be determined on the host
2040 what value will GOMP_teams have on the device.
2041 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2042 body with that value, or 0, if teams construct does not have thread_limit
2043 clause or the teams construct is not present, or -1 if it can't be
2044 determined on the host what value will GOMP_teams have on the device. */
2045
2046 void
2047 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2048 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2049 unsigned int flags, void **depend, void **args)
2050 {
2051 struct gomp_device_descr *devicep = resolve_device (device);
2052 size_t tgt_align = 0, tgt_size = 0;
2053 bool fpc_done = false;
2054
2055 flags = clear_unsupported_flags (devicep, flags);
2056
2057 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058 {
2059 struct gomp_thread *thr = gomp_thread ();
2060 /* Create a team if we don't have any around, as nowait
2061 target tasks make sense to run asynchronously even when
2062 outside of any parallel. */
2063 if (__builtin_expect (thr->ts.team == NULL, 0))
2064 {
2065 struct gomp_team *team = gomp_new_team (1);
2066 struct gomp_task *task = thr->task;
2067 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2068 team->prev_ts = thr->ts;
2069 thr->ts.team = team;
2070 thr->ts.team_id = 0;
2071 thr->ts.work_share = &team->work_shares[0];
2072 thr->ts.last_work_share = NULL;
2073 #ifdef HAVE_SYNC_BUILTINS
2074 thr->ts.single_count = 0;
2075 #endif
2076 thr->ts.static_trip = 0;
2077 thr->task = &team->implicit_task[0];
2078 gomp_init_task (thr->task, NULL, icv);
2079 if (task)
2080 {
2081 thr->task = task;
2082 gomp_end_task ();
2083 free (task);
2084 thr->task = &team->implicit_task[0];
2085 }
2086 else
2087 pthread_setspecific (gomp_thread_destructor, thr);
2088 }
2089 if (thr->ts.team
2090 && !thr->task->final_task)
2091 {
2092 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2093 sizes, kinds, flags, depend, args,
2094 GOMP_TARGET_TASK_BEFORE_MAP);
2095 return;
2096 }
2097 }
2098
2099 /* If there are depend clauses, but nowait is not present
2100 (or we are in a final task), block the parent task until the
2101 dependencies are resolved and then just continue with the rest
2102 of the function as if it is a merged task. */
2103 if (depend != NULL)
2104 {
2105 struct gomp_thread *thr = gomp_thread ();
2106 if (thr->task && thr->task->depend_hash)
2107 {
2108 /* If we might need to wait, copy firstprivate now. */
2109 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2110 &tgt_align, &tgt_size);
2111 if (tgt_align)
2112 {
2113 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2114 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2115 tgt_align, tgt_size);
2116 }
2117 fpc_done = true;
2118 gomp_task_maybe_wait_for_dependencies (depend);
2119 }
2120 }
2121
2122 void *fn_addr;
2123 if (devicep == NULL
2124 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2125 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2126 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2127 {
2128 if (!fpc_done)
2129 {
2130 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2131 &tgt_align, &tgt_size);
2132 if (tgt_align)
2133 {
2134 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2135 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2136 tgt_align, tgt_size);
2137 }
2138 }
2139 gomp_target_fallback (fn, hostaddrs);
2140 return;
2141 }
2142
2143 struct target_mem_desc *tgt_vars;
2144 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2145 {
2146 if (!fpc_done)
2147 {
2148 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2149 &tgt_align, &tgt_size);
2150 if (tgt_align)
2151 {
2152 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2153 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2154 tgt_align, tgt_size);
2155 }
2156 }
2157 tgt_vars = NULL;
2158 }
2159 else
2160 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2161 true, GOMP_MAP_VARS_TARGET);
2162 devicep->run_func (devicep->target_id, fn_addr,
2163 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2164 args);
2165 if (tgt_vars)
2166 gomp_unmap_vars (tgt_vars, true);
2167 }
2168
2169 /* Host fallback for GOMP_target_data{,_ext} routines. */
2170
2171 static void
2172 gomp_target_data_fallback (void)
2173 {
2174 struct gomp_task_icv *icv = gomp_icv (false);
2175 if (icv->target_data)
2176 {
2177 /* Even when doing a host fallback, if there are any active
2178 #pragma omp target data constructs, need to remember the
2179 new #pragma omp target data, otherwise GOMP_target_end_data
2180 would get out of sync. */
2181 struct target_mem_desc *tgt
2182 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2183 GOMP_MAP_VARS_DATA);
2184 tgt->prev = icv->target_data;
2185 icv->target_data = tgt;
2186 }
2187 }
2188
2189 void
2190 GOMP_target_data (int device, const void *unused, size_t mapnum,
2191 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2192 {
2193 struct gomp_device_descr *devicep = resolve_device (device);
2194
2195 if (devicep == NULL
2196 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2197 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2198 return gomp_target_data_fallback ();
2199
2200 struct target_mem_desc *tgt
2201 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2202 GOMP_MAP_VARS_DATA);
2203 struct gomp_task_icv *icv = gomp_icv (true);
2204 tgt->prev = icv->target_data;
2205 icv->target_data = tgt;
2206 }
2207
2208 void
2209 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2210 size_t *sizes, unsigned short *kinds)
2211 {
2212 struct gomp_device_descr *devicep = resolve_device (device);
2213
2214 if (devicep == NULL
2215 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217 return gomp_target_data_fallback ();
2218
2219 struct target_mem_desc *tgt
2220 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2221 GOMP_MAP_VARS_DATA);
2222 struct gomp_task_icv *icv = gomp_icv (true);
2223 tgt->prev = icv->target_data;
2224 icv->target_data = tgt;
2225 }
2226
2227 void
2228 GOMP_target_end_data (void)
2229 {
2230 struct gomp_task_icv *icv = gomp_icv (false);
2231 if (icv->target_data)
2232 {
2233 struct target_mem_desc *tgt = icv->target_data;
2234 icv->target_data = tgt->prev;
2235 gomp_unmap_vars (tgt, true);
2236 }
2237 }
2238
2239 void
2240 GOMP_target_update (int device, const void *unused, size_t mapnum,
2241 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2242 {
2243 struct gomp_device_descr *devicep = resolve_device (device);
2244
2245 if (devicep == NULL
2246 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2247 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2248 return;
2249
2250 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2251 }
2252
2253 void
2254 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2255 size_t *sizes, unsigned short *kinds,
2256 unsigned int flags, void **depend)
2257 {
2258 struct gomp_device_descr *devicep = resolve_device (device);
2259
2260 /* If there are depend clauses, but nowait is not present,
2261 block the parent task until the dependencies are resolved
2262 and then just continue with the rest of the function as if it
2263 is a merged task. Until we are able to schedule task during
2264 variable mapping or unmapping, ignore nowait if depend clauses
2265 are not present. */
2266 if (depend != NULL)
2267 {
2268 struct gomp_thread *thr = gomp_thread ();
2269 if (thr->task && thr->task->depend_hash)
2270 {
2271 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2272 && thr->ts.team
2273 && !thr->task->final_task)
2274 {
2275 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2276 mapnum, hostaddrs, sizes, kinds,
2277 flags | GOMP_TARGET_FLAG_UPDATE,
2278 depend, NULL, GOMP_TARGET_TASK_DATA))
2279 return;
2280 }
2281 else
2282 {
2283 struct gomp_team *team = thr->ts.team;
2284 /* If parallel or taskgroup has been cancelled, don't start new
2285 tasks. */
2286 if (__builtin_expect (gomp_cancel_var, 0) && team)
2287 {
2288 if (gomp_team_barrier_cancelled (&team->barrier))
2289 return;
2290 if (thr->task->taskgroup)
2291 {
2292 if (thr->task->taskgroup->cancelled)
2293 return;
2294 if (thr->task->taskgroup->workshare
2295 && thr->task->taskgroup->prev
2296 && thr->task->taskgroup->prev->cancelled)
2297 return;
2298 }
2299 }
2300
2301 gomp_task_maybe_wait_for_dependencies (depend);
2302 }
2303 }
2304 }
2305
2306 if (devicep == NULL
2307 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2308 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2309 return;
2310
2311 struct gomp_thread *thr = gomp_thread ();
2312 struct gomp_team *team = thr->ts.team;
2313 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2314 if (__builtin_expect (gomp_cancel_var, 0) && team)
2315 {
2316 if (gomp_team_barrier_cancelled (&team->barrier))
2317 return;
2318 if (thr->task->taskgroup)
2319 {
2320 if (thr->task->taskgroup->cancelled)
2321 return;
2322 if (thr->task->taskgroup->workshare
2323 && thr->task->taskgroup->prev
2324 && thr->task->taskgroup->prev->cancelled)
2325 return;
2326 }
2327 }
2328
2329 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2330 }
2331
2332 static void
2333 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2334 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2335 {
2336 const int typemask = 0xff;
2337 size_t i;
2338 gomp_mutex_lock (&devicep->lock);
2339 if (devicep->state == GOMP_DEVICE_FINALIZED)
2340 {
2341 gomp_mutex_unlock (&devicep->lock);
2342 return;
2343 }
2344
2345 for (i = 0; i < mapnum; i++)
2346 {
2347 struct splay_tree_key_s cur_node;
2348 unsigned char kind = kinds[i] & typemask;
2349 switch (kind)
2350 {
2351 case GOMP_MAP_FROM:
2352 case GOMP_MAP_ALWAYS_FROM:
2353 case GOMP_MAP_DELETE:
2354 case GOMP_MAP_RELEASE:
2355 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2356 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2357 cur_node.host_start = (uintptr_t) hostaddrs[i];
2358 cur_node.host_end = cur_node.host_start + sizes[i];
2359 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2360 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2361 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2362 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2363 if (!k)
2364 continue;
2365
2366 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2367 k->refcount--;
2368 if ((kind == GOMP_MAP_DELETE
2369 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2370 && k->refcount != REFCOUNT_INFINITY)
2371 k->refcount = 0;
2372
2373 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2374 || kind == GOMP_MAP_ALWAYS_FROM)
2375 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2376 (void *) (k->tgt->tgt_start + k->tgt_offset
2377 + cur_node.host_start
2378 - k->host_start),
2379 cur_node.host_end - cur_node.host_start);
2380 if (k->refcount == 0)
2381 gomp_remove_var (devicep, k);
2382
2383 break;
2384 default:
2385 gomp_mutex_unlock (&devicep->lock);
2386 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2387 kind);
2388 }
2389 }
2390
2391 gomp_mutex_unlock (&devicep->lock);
2392 }
2393
2394 void
2395 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2396 size_t *sizes, unsigned short *kinds,
2397 unsigned int flags, void **depend)
2398 {
2399 struct gomp_device_descr *devicep = resolve_device (device);
2400
2401 /* If there are depend clauses, but nowait is not present,
2402 block the parent task until the dependencies are resolved
2403 and then just continue with the rest of the function as if it
2404 is a merged task. Until we are able to schedule task during
2405 variable mapping or unmapping, ignore nowait if depend clauses
2406 are not present. */
2407 if (depend != NULL)
2408 {
2409 struct gomp_thread *thr = gomp_thread ();
2410 if (thr->task && thr->task->depend_hash)
2411 {
2412 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2413 && thr->ts.team
2414 && !thr->task->final_task)
2415 {
2416 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2417 mapnum, hostaddrs, sizes, kinds,
2418 flags, depend, NULL,
2419 GOMP_TARGET_TASK_DATA))
2420 return;
2421 }
2422 else
2423 {
2424 struct gomp_team *team = thr->ts.team;
2425 /* If parallel or taskgroup has been cancelled, don't start new
2426 tasks. */
2427 if (__builtin_expect (gomp_cancel_var, 0) && team)
2428 {
2429 if (gomp_team_barrier_cancelled (&team->barrier))
2430 return;
2431 if (thr->task->taskgroup)
2432 {
2433 if (thr->task->taskgroup->cancelled)
2434 return;
2435 if (thr->task->taskgroup->workshare
2436 && thr->task->taskgroup->prev
2437 && thr->task->taskgroup->prev->cancelled)
2438 return;
2439 }
2440 }
2441
2442 gomp_task_maybe_wait_for_dependencies (depend);
2443 }
2444 }
2445 }
2446
2447 if (devicep == NULL
2448 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2449 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2450 return;
2451
2452 struct gomp_thread *thr = gomp_thread ();
2453 struct gomp_team *team = thr->ts.team;
2454 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2455 if (__builtin_expect (gomp_cancel_var, 0) && team)
2456 {
2457 if (gomp_team_barrier_cancelled (&team->barrier))
2458 return;
2459 if (thr->task->taskgroup)
2460 {
2461 if (thr->task->taskgroup->cancelled)
2462 return;
2463 if (thr->task->taskgroup->workshare
2464 && thr->task->taskgroup->prev
2465 && thr->task->taskgroup->prev->cancelled)
2466 return;
2467 }
2468 }
2469
2470 /* The variables are mapped separately such that they can be released
2471 independently. */
2472 size_t i, j;
2473 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2474 for (i = 0; i < mapnum; i++)
2475 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2476 {
2477 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2478 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2479 i += sizes[i];
2480 }
2481 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2482 {
2483 for (j = i + 1; j < mapnum; j++)
2484 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2485 break;
2486 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2487 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2488 i += j - i - 1;
2489 }
2490 else
2491 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2492 true, GOMP_MAP_VARS_ENTER_DATA);
2493 else
2494 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2495 }
2496
2497 bool
2498 gomp_target_task_fn (void *data)
2499 {
2500 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2501 struct gomp_device_descr *devicep = ttask->devicep;
2502
2503 if (ttask->fn != NULL)
2504 {
2505 void *fn_addr;
2506 if (devicep == NULL
2507 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2508 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2509 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2510 {
2511 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2512 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2513 return false;
2514 }
2515
2516 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2517 {
2518 if (ttask->tgt)
2519 gomp_unmap_vars (ttask->tgt, true);
2520 return false;
2521 }
2522
2523 void *actual_arguments;
2524 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2525 {
2526 ttask->tgt = NULL;
2527 actual_arguments = ttask->hostaddrs;
2528 }
2529 else
2530 {
2531 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2532 NULL, ttask->sizes, ttask->kinds, true,
2533 GOMP_MAP_VARS_TARGET);
2534 actual_arguments = (void *) ttask->tgt->tgt_start;
2535 }
2536 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2537
2538 assert (devicep->async_run_func);
2539 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2540 ttask->args, (void *) ttask);
2541 return true;
2542 }
2543 else if (devicep == NULL
2544 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2545 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2546 return false;
2547
2548 size_t i;
2549 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2550 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2551 ttask->kinds, true);
2552 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2553 for (i = 0; i < ttask->mapnum; i++)
2554 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2555 {
2556 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2557 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2558 GOMP_MAP_VARS_ENTER_DATA);
2559 i += ttask->sizes[i];
2560 }
2561 else
2562 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2563 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2564 else
2565 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2566 ttask->kinds);
2567 return false;
2568 }
2569
2570 void
2571 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2572 {
2573 if (thread_limit)
2574 {
2575 struct gomp_task_icv *icv = gomp_icv (true);
2576 icv->thread_limit_var
2577 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2578 }
2579 (void) num_teams;
2580 }
2581
2582 void *
2583 omp_target_alloc (size_t size, int device_num)
2584 {
2585 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2586 return malloc (size);
2587
2588 if (device_num < 0)
2589 return NULL;
2590
2591 struct gomp_device_descr *devicep = resolve_device (device_num);
2592 if (devicep == NULL)
2593 return NULL;
2594
2595 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2596 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2597 return malloc (size);
2598
2599 gomp_mutex_lock (&devicep->lock);
2600 void *ret = devicep->alloc_func (devicep->target_id, size);
2601 gomp_mutex_unlock (&devicep->lock);
2602 return ret;
2603 }
2604
2605 void
2606 omp_target_free (void *device_ptr, int device_num)
2607 {
2608 if (device_ptr == NULL)
2609 return;
2610
2611 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2612 {
2613 free (device_ptr);
2614 return;
2615 }
2616
2617 if (device_num < 0)
2618 return;
2619
2620 struct gomp_device_descr *devicep = resolve_device (device_num);
2621 if (devicep == NULL)
2622 return;
2623
2624 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2625 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2626 {
2627 free (device_ptr);
2628 return;
2629 }
2630
2631 gomp_mutex_lock (&devicep->lock);
2632 gomp_free_device_memory (devicep, device_ptr);
2633 gomp_mutex_unlock (&devicep->lock);
2634 }
2635
2636 int
2637 omp_target_is_present (const void *ptr, int device_num)
2638 {
2639 if (ptr == NULL)
2640 return 1;
2641
2642 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2643 return 1;
2644
2645 if (device_num < 0)
2646 return 0;
2647
2648 struct gomp_device_descr *devicep = resolve_device (device_num);
2649 if (devicep == NULL)
2650 return 0;
2651
2652 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2653 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2654 return 1;
2655
2656 gomp_mutex_lock (&devicep->lock);
2657 struct splay_tree_s *mem_map = &devicep->mem_map;
2658 struct splay_tree_key_s cur_node;
2659
2660 cur_node.host_start = (uintptr_t) ptr;
2661 cur_node.host_end = cur_node.host_start;
2662 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2663 int ret = n != NULL;
2664 gomp_mutex_unlock (&devicep->lock);
2665 return ret;
2666 }
2667
2668 int
2669 omp_target_memcpy (void *dst, const void *src, size_t length,
2670 size_t dst_offset, size_t src_offset, int dst_device_num,
2671 int src_device_num)
2672 {
2673 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2674 bool ret;
2675
2676 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2677 {
2678 if (dst_device_num < 0)
2679 return EINVAL;
2680
2681 dst_devicep = resolve_device (dst_device_num);
2682 if (dst_devicep == NULL)
2683 return EINVAL;
2684
2685 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2686 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2687 dst_devicep = NULL;
2688 }
2689 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2690 {
2691 if (src_device_num < 0)
2692 return EINVAL;
2693
2694 src_devicep = resolve_device (src_device_num);
2695 if (src_devicep == NULL)
2696 return EINVAL;
2697
2698 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2699 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2700 src_devicep = NULL;
2701 }
2702 if (src_devicep == NULL && dst_devicep == NULL)
2703 {
2704 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2705 return 0;
2706 }
2707 if (src_devicep == NULL)
2708 {
2709 gomp_mutex_lock (&dst_devicep->lock);
2710 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2711 (char *) dst + dst_offset,
2712 (char *) src + src_offset, length);
2713 gomp_mutex_unlock (&dst_devicep->lock);
2714 return (ret ? 0 : EINVAL);
2715 }
2716 if (dst_devicep == NULL)
2717 {
2718 gomp_mutex_lock (&src_devicep->lock);
2719 ret = src_devicep->dev2host_func (src_devicep->target_id,
2720 (char *) dst + dst_offset,
2721 (char *) src + src_offset, length);
2722 gomp_mutex_unlock (&src_devicep->lock);
2723 return (ret ? 0 : EINVAL);
2724 }
2725 if (src_devicep == dst_devicep)
2726 {
2727 gomp_mutex_lock (&src_devicep->lock);
2728 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2729 (char *) dst + dst_offset,
2730 (char *) src + src_offset, length);
2731 gomp_mutex_unlock (&src_devicep->lock);
2732 return (ret ? 0 : EINVAL);
2733 }
2734 return EINVAL;
2735 }
2736
2737 static int
2738 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2739 int num_dims, const size_t *volume,
2740 const size_t *dst_offsets,
2741 const size_t *src_offsets,
2742 const size_t *dst_dimensions,
2743 const size_t *src_dimensions,
2744 struct gomp_device_descr *dst_devicep,
2745 struct gomp_device_descr *src_devicep)
2746 {
2747 size_t dst_slice = element_size;
2748 size_t src_slice = element_size;
2749 size_t j, dst_off, src_off, length;
2750 int i, ret;
2751
2752 if (num_dims == 1)
2753 {
2754 if (__builtin_mul_overflow (element_size, volume[0], &length)
2755 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2756 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2757 return EINVAL;
2758 if (dst_devicep == NULL && src_devicep == NULL)
2759 {
2760 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2761 length);
2762 ret = 1;
2763 }
2764 else if (src_devicep == NULL)
2765 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2766 (char *) dst + dst_off,
2767 (const char *) src + src_off,
2768 length);
2769 else if (dst_devicep == NULL)
2770 ret = src_devicep->dev2host_func (src_devicep->target_id,
2771 (char *) dst + dst_off,
2772 (const char *) src + src_off,
2773 length);
2774 else if (src_devicep == dst_devicep)
2775 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2776 (char *) dst + dst_off,
2777 (const char *) src + src_off,
2778 length);
2779 else
2780 ret = 0;
2781 return ret ? 0 : EINVAL;
2782 }
2783
2784 /* FIXME: it would be nice to have some plugin function to handle
2785 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2786 be handled in the generic recursion below, and for host-host it
2787 should be used even for any num_dims >= 2. */
2788
2789 for (i = 1; i < num_dims; i++)
2790 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2791 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2792 return EINVAL;
2793 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2794 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2795 return EINVAL;
2796 for (j = 0; j < volume[0]; j++)
2797 {
2798 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2799 (const char *) src + src_off,
2800 element_size, num_dims - 1,
2801 volume + 1, dst_offsets + 1,
2802 src_offsets + 1, dst_dimensions + 1,
2803 src_dimensions + 1, dst_devicep,
2804 src_devicep);
2805 if (ret)
2806 return ret;
2807 dst_off += dst_slice;
2808 src_off += src_slice;
2809 }
2810 return 0;
2811 }
2812
2813 int
2814 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2815 int num_dims, const size_t *volume,
2816 const size_t *dst_offsets,
2817 const size_t *src_offsets,
2818 const size_t *dst_dimensions,
2819 const size_t *src_dimensions,
2820 int dst_device_num, int src_device_num)
2821 {
2822 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2823
2824 if (!dst && !src)
2825 return INT_MAX;
2826
2827 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2828 {
2829 if (dst_device_num < 0)
2830 return EINVAL;
2831
2832 dst_devicep = resolve_device (dst_device_num);
2833 if (dst_devicep == NULL)
2834 return EINVAL;
2835
2836 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2837 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2838 dst_devicep = NULL;
2839 }
2840 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2841 {
2842 if (src_device_num < 0)
2843 return EINVAL;
2844
2845 src_devicep = resolve_device (src_device_num);
2846 if (src_devicep == NULL)
2847 return EINVAL;
2848
2849 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2850 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2851 src_devicep = NULL;
2852 }
2853
2854 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2855 return EINVAL;
2856
2857 if (src_devicep)
2858 gomp_mutex_lock (&src_devicep->lock);
2859 else if (dst_devicep)
2860 gomp_mutex_lock (&dst_devicep->lock);
2861 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2862 volume, dst_offsets, src_offsets,
2863 dst_dimensions, src_dimensions,
2864 dst_devicep, src_devicep);
2865 if (src_devicep)
2866 gomp_mutex_unlock (&src_devicep->lock);
2867 else if (dst_devicep)
2868 gomp_mutex_unlock (&dst_devicep->lock);
2869 return ret;
2870 }
2871
2872 int
2873 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2874 size_t size, size_t device_offset, int device_num)
2875 {
2876 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2877 return EINVAL;
2878
2879 if (device_num < 0)
2880 return EINVAL;
2881
2882 struct gomp_device_descr *devicep = resolve_device (device_num);
2883 if (devicep == NULL)
2884 return EINVAL;
2885
2886 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2887 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2888 return EINVAL;
2889
2890 gomp_mutex_lock (&devicep->lock);
2891
2892 struct splay_tree_s *mem_map = &devicep->mem_map;
2893 struct splay_tree_key_s cur_node;
2894 int ret = EINVAL;
2895
2896 cur_node.host_start = (uintptr_t) host_ptr;
2897 cur_node.host_end = cur_node.host_start + size;
2898 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2899 if (n)
2900 {
2901 if (n->tgt->tgt_start + n->tgt_offset
2902 == (uintptr_t) device_ptr + device_offset
2903 && n->host_start <= cur_node.host_start
2904 && n->host_end >= cur_node.host_end)
2905 ret = 0;
2906 }
2907 else
2908 {
2909 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2910 tgt->array = gomp_malloc (sizeof (*tgt->array));
2911 tgt->refcount = 1;
2912 tgt->tgt_start = 0;
2913 tgt->tgt_end = 0;
2914 tgt->to_free = NULL;
2915 tgt->prev = NULL;
2916 tgt->list_count = 0;
2917 tgt->device_descr = devicep;
2918 splay_tree_node array = tgt->array;
2919 splay_tree_key k = &array->key;
2920 k->host_start = cur_node.host_start;
2921 k->host_end = cur_node.host_end;
2922 k->tgt = tgt;
2923 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2924 k->refcount = REFCOUNT_INFINITY;
2925 k->dynamic_refcount = 0;
2926 k->aux = NULL;
2927 array->left = NULL;
2928 array->right = NULL;
2929 splay_tree_insert (&devicep->mem_map, array);
2930 ret = 0;
2931 }
2932 gomp_mutex_unlock (&devicep->lock);
2933 return ret;
2934 }
2935
2936 int
2937 omp_target_disassociate_ptr (const void *ptr, int device_num)
2938 {
2939 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2940 return EINVAL;
2941
2942 if (device_num < 0)
2943 return EINVAL;
2944
2945 struct gomp_device_descr *devicep = resolve_device (device_num);
2946 if (devicep == NULL)
2947 return EINVAL;
2948
2949 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2950 return EINVAL;
2951
2952 gomp_mutex_lock (&devicep->lock);
2953
2954 struct splay_tree_s *mem_map = &devicep->mem_map;
2955 struct splay_tree_key_s cur_node;
2956 int ret = EINVAL;
2957
2958 cur_node.host_start = (uintptr_t) ptr;
2959 cur_node.host_end = cur_node.host_start;
2960 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2961 if (n
2962 && n->host_start == cur_node.host_start
2963 && n->refcount == REFCOUNT_INFINITY
2964 && n->tgt->tgt_start == 0
2965 && n->tgt->to_free == NULL
2966 && n->tgt->refcount == 1
2967 && n->tgt->list_count == 0)
2968 {
2969 splay_tree_remove (&devicep->mem_map, n);
2970 gomp_unmap_tgt (n->tgt);
2971 ret = 0;
2972 }
2973
2974 gomp_mutex_unlock (&devicep->lock);
2975 return ret;
2976 }
2977
2978 int
2979 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2980 {
2981 (void) kind;
2982 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2983 return gomp_pause_host ();
2984 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2985 return -1;
2986 /* Do nothing for target devices for now. */
2987 return 0;
2988 }
2989
2990 int
2991 omp_pause_resource_all (omp_pause_resource_t kind)
2992 {
2993 (void) kind;
2994 if (gomp_pause_host ())
2995 return -1;
2996 /* Do nothing for target devices for now. */
2997 return 0;
2998 }
2999
3000 ialias (omp_pause_resource)
3001 ialias (omp_pause_resource_all)
3002
3003 #ifdef PLUGIN_SUPPORT
3004
3005 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3006 in PLUGIN_NAME.
3007 The handles of the found functions are stored in the corresponding fields
3008 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3009
3010 static bool
3011 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3012 const char *plugin_name)
3013 {
3014 const char *err = NULL, *last_missing = NULL;
3015
3016 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3017 if (!plugin_handle)
3018 goto dl_fail;
3019
3020 /* Check if all required functions are available in the plugin and store
3021 their handlers. None of the symbols can legitimately be NULL,
3022 so we don't need to check dlerror all the time. */
3023 #define DLSYM(f) \
3024 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3025 goto dl_fail
3026 /* Similar, but missing functions are not an error. Return false if
3027 failed, true otherwise. */
3028 #define DLSYM_OPT(f, n) \
3029 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3030 || (last_missing = #n, 0))
3031
3032 DLSYM (version);
3033 if (device->version_func () != GOMP_VERSION)
3034 {
3035 err = "plugin version mismatch";
3036 goto fail;
3037 }
3038
3039 DLSYM (get_name);
3040 DLSYM (get_caps);
3041 DLSYM (get_type);
3042 DLSYM (get_num_devices);
3043 DLSYM (init_device);
3044 DLSYM (fini_device);
3045 DLSYM (load_image);
3046 DLSYM (unload_image);
3047 DLSYM (alloc);
3048 DLSYM (free);
3049 DLSYM (dev2host);
3050 DLSYM (host2dev);
3051 device->capabilities = device->get_caps_func ();
3052 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3053 {
3054 DLSYM (run);
3055 DLSYM_OPT (async_run, async_run);
3056 DLSYM_OPT (can_run, can_run);
3057 DLSYM (dev2dev);
3058 }
3059 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3060 {
3061 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3062 || !DLSYM_OPT (openacc.create_thread_data,
3063 openacc_create_thread_data)
3064 || !DLSYM_OPT (openacc.destroy_thread_data,
3065 openacc_destroy_thread_data)
3066 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3067 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3068 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3069 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3070 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3071 || !DLSYM_OPT (openacc.async.queue_callback,
3072 openacc_async_queue_callback)
3073 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3074 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3075 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3076 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3077 {
3078 /* Require all the OpenACC handlers if we have
3079 GOMP_OFFLOAD_CAP_OPENACC_200. */
3080 err = "plugin missing OpenACC handler function";
3081 goto fail;
3082 }
3083
3084 unsigned cuda = 0;
3085 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3086 openacc_cuda_get_current_device);
3087 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3088 openacc_cuda_get_current_context);
3089 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3090 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3091 if (cuda && cuda != 4)
3092 {
3093 /* Make sure all the CUDA functions are there if any of them are. */
3094 err = "plugin missing OpenACC CUDA handler function";
3095 goto fail;
3096 }
3097 }
3098 #undef DLSYM
3099 #undef DLSYM_OPT
3100
3101 return 1;
3102
3103 dl_fail:
3104 err = dlerror ();
3105 fail:
3106 gomp_error ("while loading %s: %s", plugin_name, err);
3107 if (last_missing)
3108 gomp_error ("missing function was %s", last_missing);
3109 if (plugin_handle)
3110 dlclose (plugin_handle);
3111
3112 return 0;
3113 }
3114
3115 /* This function finalizes all initialized devices. */
3116
3117 static void
3118 gomp_target_fini (void)
3119 {
3120 int i;
3121 for (i = 0; i < num_devices; i++)
3122 {
3123 bool ret = true;
3124 struct gomp_device_descr *devicep = &devices[i];
3125 gomp_mutex_lock (&devicep->lock);
3126 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3127 ret = gomp_fini_device (devicep);
3128 gomp_mutex_unlock (&devicep->lock);
3129 if (!ret)
3130 gomp_fatal ("device finalization failed");
3131 }
3132 }
3133
3134 /* This function initializes the runtime for offloading.
3135 It parses the list of offload plugins, and tries to load these.
3136 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3137 will be set, and the array DEVICES initialized, containing descriptors for
3138 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3139 by the others. */
3140
3141 static void
3142 gomp_target_init (void)
3143 {
3144 const char *prefix ="libgomp-plugin-";
3145 const char *suffix = SONAME_SUFFIX (1);
3146 const char *cur, *next;
3147 char *plugin_name;
3148 int i, new_num_devices;
3149
3150 num_devices = 0;
3151 devices = NULL;
3152
3153 cur = OFFLOAD_PLUGINS;
3154 if (*cur)
3155 do
3156 {
3157 struct gomp_device_descr current_device;
3158 size_t prefix_len, suffix_len, cur_len;
3159
3160 next = strchr (cur, ',');
3161
3162 prefix_len = strlen (prefix);
3163 cur_len = next ? next - cur : strlen (cur);
3164 suffix_len = strlen (suffix);
3165
3166 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3167 if (!plugin_name)
3168 {
3169 num_devices = 0;
3170 break;
3171 }
3172
3173 memcpy (plugin_name, prefix, prefix_len);
3174 memcpy (plugin_name + prefix_len, cur, cur_len);
3175 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3176
3177 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3178 {
3179 new_num_devices = current_device.get_num_devices_func ();
3180 if (new_num_devices >= 1)
3181 {
3182 /* Augment DEVICES and NUM_DEVICES. */
3183
3184 devices = realloc (devices, (num_devices + new_num_devices)
3185 * sizeof (struct gomp_device_descr));
3186 if (!devices)
3187 {
3188 num_devices = 0;
3189 free (plugin_name);
3190 break;
3191 }
3192
3193 current_device.name = current_device.get_name_func ();
3194 /* current_device.capabilities has already been set. */
3195 current_device.type = current_device.get_type_func ();
3196 current_device.mem_map.root = NULL;
3197 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3198 for (i = 0; i < new_num_devices; i++)
3199 {
3200 current_device.target_id = i;
3201 devices[num_devices] = current_device;
3202 gomp_mutex_init (&devices[num_devices].lock);
3203 num_devices++;
3204 }
3205 }
3206 }
3207
3208 free (plugin_name);
3209 cur = next + 1;
3210 }
3211 while (next);
3212
3213 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3214 NUM_DEVICES_OPENMP. */
3215 struct gomp_device_descr *devices_s
3216 = malloc (num_devices * sizeof (struct gomp_device_descr));
3217 if (!devices_s)
3218 {
3219 num_devices = 0;
3220 free (devices);
3221 devices = NULL;
3222 }
3223 num_devices_openmp = 0;
3224 for (i = 0; i < num_devices; i++)
3225 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3226 devices_s[num_devices_openmp++] = devices[i];
3227 int num_devices_after_openmp = num_devices_openmp;
3228 for (i = 0; i < num_devices; i++)
3229 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3230 devices_s[num_devices_after_openmp++] = devices[i];
3231 free (devices);
3232 devices = devices_s;
3233
3234 for (i = 0; i < num_devices; i++)
3235 {
3236 /* The 'devices' array can be moved (by the realloc call) until we have
3237 found all the plugins, so registering with the OpenACC runtime (which
3238 takes a copy of the pointer argument) must be delayed until now. */
3239 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3240 goacc_register (&devices[i]);
3241 }
3242
3243 if (atexit (gomp_target_fini) != 0)
3244 gomp_fatal ("atexit failed");
3245 }
3246
3247 #else /* PLUGIN_SUPPORT */
3248 /* If dlfcn.h is unavailable we always fallback to host execution.
3249 GOMP_target* routines are just stubs for this case. */
3250 static void
3251 gomp_target_init (void)
3252 {
3253 }
3254 #endif /* PLUGIN_SUPPORT */