Use gomp_map_val for OpenACC host-to-device address translation
[gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48
49 static void gomp_target_init (void);
50
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
56
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
65 };
66
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
69
70 /* Total number of offload images. */
71 static int num_offload_images;
72
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
75
76 /* Total number of available devices. */
77 static int num_devices;
78
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
81
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94 }
95
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105 gomp_init_targets_once ();
106 return num_devices_openmp;
107 }
108
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112 if (device_id == GOMP_DEVICE_ICV)
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
120
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 {
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
128 }
129 gomp_mutex_unlock (&devices[device_id].lock);
130
131 return &devices[device_id];
132 }
133
134
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
140
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
152 }
153
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
159
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
164 }
165
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
172 {
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 {
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178 }
179 }
180
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
188 {
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 {
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 }
195 }
196
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
199
200 struct gomp_coalesce_chunk
201 {
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
204 };
205
206 struct gomp_coalesce_buf
207 {
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
223 };
224
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236
237 /* Add region with device tgt_start relative offset and length to CBUF. */
238
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
245 {
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 {
250 cbuf->chunk_cnt = -1;
251 return;
252 }
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 {
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
258 }
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
264 }
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
269 }
270
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
273
274 static inline bool
275 gomp_to_device_kind_p (int kind)
276 {
277 switch (kind)
278 {
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
287 }
288 }
289
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
295 {
296 if (cbuf)
297 {
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 {
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
304 {
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
309 {
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
315 }
316 else
317 last = middle - 1;
318 }
319 }
320 }
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
332 {
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343 if (!devicep->free_func (devicep->target_id, devptr))
344 {
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 }
348 }
349
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
353
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360 tgt_var->key = oldn;
361 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
362 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
363 tgt_var->offset = newn->host_start - oldn->host_start;
364 tgt_var->length = newn->host_end - newn->host_start;
365
366 if ((kind & GOMP_MAP_FLAG_FORCE)
367 || oldn->host_start > newn->host_start
368 || oldn->host_end < newn->host_end)
369 {
370 gomp_mutex_unlock (&devicep->lock);
371 gomp_fatal ("Trying to map into device [%p..%p) object when "
372 "[%p..%p) is already mapped",
373 (void *) newn->host_start, (void *) newn->host_end,
374 (void *) oldn->host_start, (void *) oldn->host_end);
375 }
376
377 if (GOMP_MAP_ALWAYS_TO_P (kind))
378 gomp_copy_host2dev (devicep, aq,
379 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
380 + newn->host_start - oldn->host_start),
381 (void *) newn->host_start,
382 newn->host_end - newn->host_start, cbuf);
383
384 if (oldn->refcount != REFCOUNT_INFINITY)
385 oldn->refcount++;
386 }
387
388 static int
389 get_kind (bool short_mapkind, void *kinds, int idx)
390 {
391 return short_mapkind ? ((unsigned short *) kinds)[idx]
392 : ((unsigned char *) kinds)[idx];
393 }
394
395 static void
396 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
397 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
398 struct gomp_coalesce_buf *cbuf)
399 {
400 struct gomp_device_descr *devicep = tgt->device_descr;
401 struct splay_tree_s *mem_map = &devicep->mem_map;
402 struct splay_tree_key_s cur_node;
403
404 cur_node.host_start = host_ptr;
405 if (cur_node.host_start == (uintptr_t) NULL)
406 {
407 cur_node.tgt_offset = (uintptr_t) NULL;
408 gomp_copy_host2dev (devicep, aq,
409 (void *) (tgt->tgt_start + target_offset),
410 (void *) &cur_node.tgt_offset,
411 sizeof (void *), cbuf);
412 return;
413 }
414 /* Add bias to the pointer value. */
415 cur_node.host_start += bias;
416 cur_node.host_end = cur_node.host_start;
417 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
418 if (n == NULL)
419 {
420 gomp_mutex_unlock (&devicep->lock);
421 gomp_fatal ("Pointer target of array section wasn't mapped");
422 }
423 cur_node.host_start -= n->host_start;
424 cur_node.tgt_offset
425 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
426 /* At this point tgt_offset is target address of the
427 array section. Now subtract bias to get what we want
428 to initialize the pointer with. */
429 cur_node.tgt_offset -= bias;
430 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
431 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
432 }
433
434 static void
435 gomp_map_fields_existing (struct target_mem_desc *tgt,
436 struct goacc_asyncqueue *aq, splay_tree_key n,
437 size_t first, size_t i, void **hostaddrs,
438 size_t *sizes, void *kinds,
439 struct gomp_coalesce_buf *cbuf)
440 {
441 struct gomp_device_descr *devicep = tgt->device_descr;
442 struct splay_tree_s *mem_map = &devicep->mem_map;
443 struct splay_tree_key_s cur_node;
444 int kind;
445 const bool short_mapkind = true;
446 const int typemask = short_mapkind ? 0xff : 0x7;
447
448 cur_node.host_start = (uintptr_t) hostaddrs[i];
449 cur_node.host_end = cur_node.host_start + sizes[i];
450 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
451 kind = get_kind (short_mapkind, kinds, i);
452 if (n2
453 && n2->tgt == n->tgt
454 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
455 {
456 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
457 &tgt->list[i], kind & typemask, cbuf);
458 return;
459 }
460 if (sizes[i] == 0)
461 {
462 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
463 {
464 cur_node.host_start--;
465 n2 = splay_tree_lookup (mem_map, &cur_node);
466 cur_node.host_start++;
467 if (n2
468 && n2->tgt == n->tgt
469 && n2->host_start - n->host_start
470 == n2->tgt_offset - n->tgt_offset)
471 {
472 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
473 &tgt->list[i], kind & typemask, cbuf);
474 return;
475 }
476 }
477 cur_node.host_end++;
478 n2 = splay_tree_lookup (mem_map, &cur_node);
479 cur_node.host_end--;
480 if (n2
481 && n2->tgt == n->tgt
482 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
483 {
484 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
485 kind & typemask, cbuf);
486 return;
487 }
488 }
489 gomp_mutex_unlock (&devicep->lock);
490 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
491 "other mapped elements from the same structure weren't mapped "
492 "together with it", (void *) cur_node.host_start,
493 (void *) cur_node.host_end);
494 }
495
496 attribute_hidden uintptr_t
497 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
498 {
499 if (tgt->list[i].key != NULL)
500 return tgt->list[i].key->tgt->tgt_start
501 + tgt->list[i].key->tgt_offset
502 + tgt->list[i].offset;
503
504 switch (tgt->list[i].offset)
505 {
506 case OFFSET_INLINED:
507 return (uintptr_t) hostaddrs[i];
508
509 case OFFSET_POINTER:
510 return 0;
511
512 case OFFSET_STRUCT:
513 return tgt->list[i + 1].key->tgt->tgt_start
514 + tgt->list[i + 1].key->tgt_offset
515 + tgt->list[i + 1].offset
516 + (uintptr_t) hostaddrs[i]
517 - (uintptr_t) hostaddrs[i + 1];
518
519 default:
520 return tgt->tgt_start + tgt->list[i].offset;
521 }
522 }
523
524 static inline __attribute__((always_inline)) struct target_mem_desc *
525 gomp_map_vars_internal (struct gomp_device_descr *devicep,
526 struct goacc_asyncqueue *aq, size_t mapnum,
527 void **hostaddrs, void **devaddrs, size_t *sizes,
528 void *kinds, bool short_mapkind,
529 enum gomp_map_vars_kind pragma_kind)
530 {
531 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
532 bool has_firstprivate = false;
533 const int rshift = short_mapkind ? 8 : 3;
534 const int typemask = short_mapkind ? 0xff : 0x7;
535 struct splay_tree_s *mem_map = &devicep->mem_map;
536 struct splay_tree_key_s cur_node;
537 struct target_mem_desc *tgt
538 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
539 tgt->list_count = mapnum;
540 tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
541 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
542 tgt->device_descr = devicep;
543 tgt->prev = NULL;
544 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
545
546 if (mapnum == 0)
547 {
548 tgt->tgt_start = 0;
549 tgt->tgt_end = 0;
550 return tgt;
551 }
552
553 tgt_align = sizeof (void *);
554 tgt_size = 0;
555 cbuf.chunks = NULL;
556 cbuf.chunk_cnt = -1;
557 cbuf.use_cnt = 0;
558 cbuf.buf = NULL;
559 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
560 {
561 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
562 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
563 cbuf.chunk_cnt = 0;
564 }
565 if (pragma_kind == GOMP_MAP_VARS_TARGET)
566 {
567 size_t align = 4 * sizeof (void *);
568 tgt_align = align;
569 tgt_size = mapnum * sizeof (void *);
570 cbuf.chunk_cnt = 1;
571 cbuf.use_cnt = 1 + (mapnum > 1);
572 cbuf.chunks[0].start = 0;
573 cbuf.chunks[0].end = tgt_size;
574 }
575
576 gomp_mutex_lock (&devicep->lock);
577 if (devicep->state == GOMP_DEVICE_FINALIZED)
578 {
579 gomp_mutex_unlock (&devicep->lock);
580 free (tgt);
581 return NULL;
582 }
583
584 for (i = 0; i < mapnum; i++)
585 {
586 int kind = get_kind (short_mapkind, kinds, i);
587 if (hostaddrs[i] == NULL
588 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
589 {
590 tgt->list[i].key = NULL;
591 tgt->list[i].offset = OFFSET_INLINED;
592 continue;
593 }
594 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
595 {
596 tgt->list[i].key = NULL;
597 if (!not_found_cnt)
598 {
599 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
600 on a separate construct prior to using use_device_{addr,ptr}.
601 In OpenMP 5.0, map directives need to be ordered by the
602 middle-end before the use_device_* clauses. If
603 !not_found_cnt, all mappings requested (if any) are already
604 mapped, so use_device_{addr,ptr} can be resolved right away.
605 Otherwise, if not_found_cnt, gomp_map_lookup might fail
606 now but would succeed after performing the mappings in the
607 following loop. We can't defer this always to the second
608 loop, because it is not even invoked when !not_found_cnt
609 after the first loop. */
610 cur_node.host_start = (uintptr_t) hostaddrs[i];
611 cur_node.host_end = cur_node.host_start;
612 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
613 if (n == NULL)
614 {
615 gomp_mutex_unlock (&devicep->lock);
616 gomp_fatal ("use_device_ptr pointer wasn't mapped");
617 }
618 cur_node.host_start -= n->host_start;
619 hostaddrs[i]
620 = (void *) (n->tgt->tgt_start + n->tgt_offset
621 + cur_node.host_start);
622 tgt->list[i].offset = ~(uintptr_t) 0;
623 }
624 else
625 tgt->list[i].offset = 0;
626 continue;
627 }
628 else if ((kind & typemask) == GOMP_MAP_STRUCT)
629 {
630 size_t first = i + 1;
631 size_t last = i + sizes[i];
632 cur_node.host_start = (uintptr_t) hostaddrs[i];
633 cur_node.host_end = (uintptr_t) hostaddrs[last]
634 + sizes[last];
635 tgt->list[i].key = NULL;
636 tgt->list[i].offset = OFFSET_STRUCT;
637 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
638 if (n == NULL)
639 {
640 size_t align = (size_t) 1 << (kind >> rshift);
641 if (tgt_align < align)
642 tgt_align = align;
643 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
644 tgt_size = (tgt_size + align - 1) & ~(align - 1);
645 tgt_size += cur_node.host_end - cur_node.host_start;
646 not_found_cnt += last - i;
647 for (i = first; i <= last; i++)
648 {
649 tgt->list[i].key = NULL;
650 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
651 & typemask))
652 gomp_coalesce_buf_add (&cbuf,
653 tgt_size - cur_node.host_end
654 + (uintptr_t) hostaddrs[i],
655 sizes[i]);
656 }
657 i--;
658 continue;
659 }
660 for (i = first; i <= last; i++)
661 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
662 sizes, kinds, NULL);
663 i--;
664 continue;
665 }
666 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
667 {
668 tgt->list[i].key = NULL;
669 tgt->list[i].offset = OFFSET_POINTER;
670 has_firstprivate = true;
671 continue;
672 }
673 cur_node.host_start = (uintptr_t) hostaddrs[i];
674 if (!GOMP_MAP_POINTER_P (kind & typemask))
675 cur_node.host_end = cur_node.host_start + sizes[i];
676 else
677 cur_node.host_end = cur_node.host_start + sizeof (void *);
678 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
679 {
680 tgt->list[i].key = NULL;
681
682 size_t align = (size_t) 1 << (kind >> rshift);
683 if (tgt_align < align)
684 tgt_align = align;
685 tgt_size = (tgt_size + align - 1) & ~(align - 1);
686 gomp_coalesce_buf_add (&cbuf, tgt_size,
687 cur_node.host_end - cur_node.host_start);
688 tgt_size += cur_node.host_end - cur_node.host_start;
689 has_firstprivate = true;
690 continue;
691 }
692 splay_tree_key n;
693 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
694 {
695 n = gomp_map_0len_lookup (mem_map, &cur_node);
696 if (!n)
697 {
698 tgt->list[i].key = NULL;
699 tgt->list[i].offset = OFFSET_POINTER;
700 continue;
701 }
702 }
703 else
704 n = splay_tree_lookup (mem_map, &cur_node);
705 if (n && n->refcount != REFCOUNT_LINK)
706 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
707 kind & typemask, NULL);
708 else
709 {
710 tgt->list[i].key = NULL;
711
712 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
713 {
714 /* Not present, hence, skip entry - including its MAP_POINTER,
715 when existing. */
716 tgt->list[i].offset = OFFSET_POINTER;
717 if (i + 1 < mapnum
718 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
719 == GOMP_MAP_POINTER))
720 {
721 ++i;
722 tgt->list[i].key = NULL;
723 tgt->list[i].offset = 0;
724 }
725 continue;
726 }
727 size_t align = (size_t) 1 << (kind >> rshift);
728 not_found_cnt++;
729 if (tgt_align < align)
730 tgt_align = align;
731 tgt_size = (tgt_size + align - 1) & ~(align - 1);
732 if (gomp_to_device_kind_p (kind & typemask))
733 gomp_coalesce_buf_add (&cbuf, tgt_size,
734 cur_node.host_end - cur_node.host_start);
735 tgt_size += cur_node.host_end - cur_node.host_start;
736 if ((kind & typemask) == GOMP_MAP_TO_PSET)
737 {
738 size_t j;
739 for (j = i + 1; j < mapnum; j++)
740 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
741 & typemask))
742 break;
743 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
744 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
745 > cur_node.host_end))
746 break;
747 else
748 {
749 tgt->list[j].key = NULL;
750 i++;
751 }
752 }
753 }
754 }
755
756 if (devaddrs)
757 {
758 if (mapnum != 1)
759 {
760 gomp_mutex_unlock (&devicep->lock);
761 gomp_fatal ("unexpected aggregation");
762 }
763 tgt->to_free = devaddrs[0];
764 tgt->tgt_start = (uintptr_t) tgt->to_free;
765 tgt->tgt_end = tgt->tgt_start + sizes[0];
766 }
767 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
768 {
769 /* Allocate tgt_align aligned tgt_size block of memory. */
770 /* FIXME: Perhaps change interface to allocate properly aligned
771 memory. */
772 tgt->to_free = devicep->alloc_func (devicep->target_id,
773 tgt_size + tgt_align - 1);
774 if (!tgt->to_free)
775 {
776 gomp_mutex_unlock (&devicep->lock);
777 gomp_fatal ("device memory allocation fail");
778 }
779
780 tgt->tgt_start = (uintptr_t) tgt->to_free;
781 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
782 tgt->tgt_end = tgt->tgt_start + tgt_size;
783
784 if (cbuf.use_cnt == 1)
785 cbuf.chunk_cnt--;
786 if (cbuf.chunk_cnt > 0)
787 {
788 cbuf.buf
789 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
790 if (cbuf.buf)
791 {
792 cbuf.tgt = tgt;
793 cbufp = &cbuf;
794 }
795 }
796 }
797 else
798 {
799 tgt->to_free = NULL;
800 tgt->tgt_start = 0;
801 tgt->tgt_end = 0;
802 }
803
804 tgt_size = 0;
805 if (pragma_kind == GOMP_MAP_VARS_TARGET)
806 tgt_size = mapnum * sizeof (void *);
807
808 tgt->array = NULL;
809 if (not_found_cnt || has_firstprivate)
810 {
811 if (not_found_cnt)
812 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
813 splay_tree_node array = tgt->array;
814 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
815 uintptr_t field_tgt_base = 0;
816
817 for (i = 0; i < mapnum; i++)
818 if (tgt->list[i].key == NULL)
819 {
820 int kind = get_kind (short_mapkind, kinds, i);
821 if (hostaddrs[i] == NULL)
822 continue;
823 switch (kind & typemask)
824 {
825 size_t align, len, first, last;
826 splay_tree_key n;
827 case GOMP_MAP_FIRSTPRIVATE:
828 align = (size_t) 1 << (kind >> rshift);
829 tgt_size = (tgt_size + align - 1) & ~(align - 1);
830 tgt->list[i].offset = tgt_size;
831 len = sizes[i];
832 gomp_copy_host2dev (devicep, aq,
833 (void *) (tgt->tgt_start + tgt_size),
834 (void *) hostaddrs[i], len, cbufp);
835 tgt_size += len;
836 continue;
837 case GOMP_MAP_FIRSTPRIVATE_INT:
838 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
839 continue;
840 case GOMP_MAP_USE_DEVICE_PTR:
841 if (tgt->list[i].offset == 0)
842 {
843 cur_node.host_start = (uintptr_t) hostaddrs[i];
844 cur_node.host_end = cur_node.host_start;
845 n = gomp_map_lookup (mem_map, &cur_node);
846 if (n == NULL)
847 {
848 gomp_mutex_unlock (&devicep->lock);
849 gomp_fatal ("use_device_ptr pointer wasn't mapped");
850 }
851 cur_node.host_start -= n->host_start;
852 hostaddrs[i]
853 = (void *) (n->tgt->tgt_start + n->tgt_offset
854 + cur_node.host_start);
855 tgt->list[i].offset = ~(uintptr_t) 0;
856 }
857 continue;
858 case GOMP_MAP_STRUCT:
859 first = i + 1;
860 last = i + sizes[i];
861 cur_node.host_start = (uintptr_t) hostaddrs[i];
862 cur_node.host_end = (uintptr_t) hostaddrs[last]
863 + sizes[last];
864 if (tgt->list[first].key != NULL)
865 continue;
866 n = splay_tree_lookup (mem_map, &cur_node);
867 if (n == NULL)
868 {
869 size_t align = (size_t) 1 << (kind >> rshift);
870 tgt_size -= (uintptr_t) hostaddrs[first]
871 - (uintptr_t) hostaddrs[i];
872 tgt_size = (tgt_size + align - 1) & ~(align - 1);
873 tgt_size += (uintptr_t) hostaddrs[first]
874 - (uintptr_t) hostaddrs[i];
875 field_tgt_base = (uintptr_t) hostaddrs[first];
876 field_tgt_offset = tgt_size;
877 field_tgt_clear = last;
878 tgt_size += cur_node.host_end
879 - (uintptr_t) hostaddrs[first];
880 continue;
881 }
882 for (i = first; i <= last; i++)
883 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
884 sizes, kinds, cbufp);
885 i--;
886 continue;
887 case GOMP_MAP_ALWAYS_POINTER:
888 cur_node.host_start = (uintptr_t) hostaddrs[i];
889 cur_node.host_end = cur_node.host_start + sizeof (void *);
890 n = splay_tree_lookup (mem_map, &cur_node);
891 if (n == NULL
892 || n->host_start > cur_node.host_start
893 || n->host_end < cur_node.host_end)
894 {
895 gomp_mutex_unlock (&devicep->lock);
896 gomp_fatal ("always pointer not mapped");
897 }
898 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
899 != GOMP_MAP_ALWAYS_POINTER)
900 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
901 if (cur_node.tgt_offset)
902 cur_node.tgt_offset -= sizes[i];
903 gomp_copy_host2dev (devicep, aq,
904 (void *) (n->tgt->tgt_start
905 + n->tgt_offset
906 + cur_node.host_start
907 - n->host_start),
908 (void *) &cur_node.tgt_offset,
909 sizeof (void *), cbufp);
910 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
911 + cur_node.host_start - n->host_start;
912 continue;
913 case GOMP_MAP_IF_PRESENT:
914 /* Not present - otherwise handled above. Skip over its
915 MAP_POINTER as well. */
916 if (i + 1 < mapnum
917 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
918 == GOMP_MAP_POINTER))
919 ++i;
920 continue;
921 default:
922 break;
923 }
924 splay_tree_key k = &array->key;
925 k->host_start = (uintptr_t) hostaddrs[i];
926 if (!GOMP_MAP_POINTER_P (kind & typemask))
927 k->host_end = k->host_start + sizes[i];
928 else
929 k->host_end = k->host_start + sizeof (void *);
930 splay_tree_key n = splay_tree_lookup (mem_map, k);
931 if (n && n->refcount != REFCOUNT_LINK)
932 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
933 kind & typemask, cbufp);
934 else
935 {
936 k->aux = NULL;
937 if (n && n->refcount == REFCOUNT_LINK)
938 {
939 /* Replace target address of the pointer with target address
940 of mapped object in the splay tree. */
941 splay_tree_remove (mem_map, n);
942 k->aux
943 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
944 k->aux->link_key = n;
945 }
946 size_t align = (size_t) 1 << (kind >> rshift);
947 tgt->list[i].key = k;
948 k->tgt = tgt;
949 if (field_tgt_clear != FIELD_TGT_EMPTY)
950 {
951 k->tgt_offset = k->host_start - field_tgt_base
952 + field_tgt_offset;
953 if (i == field_tgt_clear)
954 field_tgt_clear = FIELD_TGT_EMPTY;
955 }
956 else
957 {
958 tgt_size = (tgt_size + align - 1) & ~(align - 1);
959 k->tgt_offset = tgt_size;
960 tgt_size += k->host_end - k->host_start;
961 }
962 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
963 tgt->list[i].always_copy_from
964 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
965 tgt->list[i].offset = 0;
966 tgt->list[i].length = k->host_end - k->host_start;
967 k->refcount = 1;
968 k->virtual_refcount = 0;
969 tgt->refcount++;
970 array->left = NULL;
971 array->right = NULL;
972 splay_tree_insert (mem_map, array);
973 switch (kind & typemask)
974 {
975 case GOMP_MAP_ALLOC:
976 case GOMP_MAP_FROM:
977 case GOMP_MAP_FORCE_ALLOC:
978 case GOMP_MAP_FORCE_FROM:
979 case GOMP_MAP_ALWAYS_FROM:
980 break;
981 case GOMP_MAP_TO:
982 case GOMP_MAP_TOFROM:
983 case GOMP_MAP_FORCE_TO:
984 case GOMP_MAP_FORCE_TOFROM:
985 case GOMP_MAP_ALWAYS_TO:
986 case GOMP_MAP_ALWAYS_TOFROM:
987 gomp_copy_host2dev (devicep, aq,
988 (void *) (tgt->tgt_start
989 + k->tgt_offset),
990 (void *) k->host_start,
991 k->host_end - k->host_start, cbufp);
992 break;
993 case GOMP_MAP_POINTER:
994 gomp_map_pointer (tgt, aq,
995 (uintptr_t) *(void **) k->host_start,
996 k->tgt_offset, sizes[i], cbufp);
997 break;
998 case GOMP_MAP_TO_PSET:
999 gomp_copy_host2dev (devicep, aq,
1000 (void *) (tgt->tgt_start
1001 + k->tgt_offset),
1002 (void *) k->host_start,
1003 k->host_end - k->host_start, cbufp);
1004
1005 for (j = i + 1; j < mapnum; j++)
1006 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1007 j)
1008 & typemask))
1009 break;
1010 else if ((uintptr_t) hostaddrs[j] < k->host_start
1011 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1012 > k->host_end))
1013 break;
1014 else
1015 {
1016 tgt->list[j].key = k;
1017 tgt->list[j].copy_from = false;
1018 tgt->list[j].always_copy_from = false;
1019 if (k->refcount != REFCOUNT_INFINITY)
1020 k->refcount++;
1021 gomp_map_pointer (tgt, aq,
1022 (uintptr_t) *(void **) hostaddrs[j],
1023 k->tgt_offset
1024 + ((uintptr_t) hostaddrs[j]
1025 - k->host_start),
1026 sizes[j], cbufp);
1027 i++;
1028 }
1029 break;
1030 case GOMP_MAP_FORCE_PRESENT:
1031 {
1032 /* We already looked up the memory region above and it
1033 was missing. */
1034 size_t size = k->host_end - k->host_start;
1035 gomp_mutex_unlock (&devicep->lock);
1036 #ifdef HAVE_INTTYPES_H
1037 gomp_fatal ("present clause: !acc_is_present (%p, "
1038 "%"PRIu64" (0x%"PRIx64"))",
1039 (void *) k->host_start,
1040 (uint64_t) size, (uint64_t) size);
1041 #else
1042 gomp_fatal ("present clause: !acc_is_present (%p, "
1043 "%lu (0x%lx))", (void *) k->host_start,
1044 (unsigned long) size, (unsigned long) size);
1045 #endif
1046 }
1047 break;
1048 case GOMP_MAP_FORCE_DEVICEPTR:
1049 assert (k->host_end - k->host_start == sizeof (void *));
1050 gomp_copy_host2dev (devicep, aq,
1051 (void *) (tgt->tgt_start
1052 + k->tgt_offset),
1053 (void *) k->host_start,
1054 sizeof (void *), cbufp);
1055 break;
1056 default:
1057 gomp_mutex_unlock (&devicep->lock);
1058 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1059 kind);
1060 }
1061
1062 if (k->aux && k->aux->link_key)
1063 {
1064 /* Set link pointer on target to the device address of the
1065 mapped object. */
1066 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1067 /* We intentionally do not use coalescing here, as it's not
1068 data allocated by the current call to this function. */
1069 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1070 &tgt_addr, sizeof (void *), NULL);
1071 }
1072 array++;
1073 }
1074 }
1075 }
1076
1077 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1078 {
1079 for (i = 0; i < mapnum; i++)
1080 {
1081 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1082 gomp_copy_host2dev (devicep, aq,
1083 (void *) (tgt->tgt_start + i * sizeof (void *)),
1084 (void *) &cur_node.tgt_offset, sizeof (void *),
1085 cbufp);
1086 }
1087 }
1088
1089 if (cbufp)
1090 {
1091 long c = 0;
1092 for (c = 0; c < cbuf.chunk_cnt; ++c)
1093 gomp_copy_host2dev (devicep, aq,
1094 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1095 (char *) cbuf.buf + (cbuf.chunks[c].start
1096 - cbuf.chunks[0].start),
1097 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1098 free (cbuf.buf);
1099 cbuf.buf = NULL;
1100 cbufp = NULL;
1101 }
1102
1103 /* If the variable from "omp target enter data" map-list was already mapped,
1104 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1105 gomp_exit_data. */
1106 if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
1107 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1108 && tgt->refcount == 0)
1109 {
1110 /* If we're about to discard a target_mem_desc with no "structural"
1111 references (tgt->refcount == 0), any splay keys linked in the tgt's
1112 list must have their virtual refcount incremented to represent that
1113 "lost" reference in order to implement the semantics of the OpenACC
1114 "present increment" operation properly. */
1115 if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1116 for (i = 0; i < tgt->list_count; i++)
1117 if (tgt->list[i].key)
1118 tgt->list[i].key->virtual_refcount++;
1119
1120 free (tgt);
1121 tgt = NULL;
1122 }
1123
1124 gomp_mutex_unlock (&devicep->lock);
1125 return tgt;
1126 }
1127
1128 attribute_hidden struct target_mem_desc *
1129 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1130 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1131 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1132 {
1133 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1134 sizes, kinds, short_mapkind, pragma_kind);
1135 }
1136
1137 attribute_hidden struct target_mem_desc *
1138 gomp_map_vars_async (struct gomp_device_descr *devicep,
1139 struct goacc_asyncqueue *aq, size_t mapnum,
1140 void **hostaddrs, void **devaddrs, size_t *sizes,
1141 void *kinds, bool short_mapkind,
1142 enum gomp_map_vars_kind pragma_kind)
1143 {
1144 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1145 sizes, kinds, short_mapkind, pragma_kind);
1146 }
1147
1148 static void
1149 gomp_unmap_tgt (struct target_mem_desc *tgt)
1150 {
1151 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1152 if (tgt->tgt_end)
1153 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1154
1155 free (tgt->array);
1156 free (tgt);
1157 }
1158
1159 static bool
1160 gomp_unref_tgt (void *ptr)
1161 {
1162 bool is_tgt_unmapped = false;
1163
1164 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1165
1166 if (tgt->refcount > 1)
1167 tgt->refcount--;
1168 else
1169 {
1170 gomp_unmap_tgt (tgt);
1171 is_tgt_unmapped = true;
1172 }
1173
1174 return is_tgt_unmapped;
1175 }
1176
1177 static void
1178 gomp_unref_tgt_void (void *ptr)
1179 {
1180 (void) gomp_unref_tgt (ptr);
1181 }
1182
1183 static inline __attribute__((always_inline)) bool
1184 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1185 struct goacc_asyncqueue *aq)
1186 {
1187 bool is_tgt_unmapped = false;
1188 splay_tree_remove (&devicep->mem_map, k);
1189 if (k->aux)
1190 {
1191 if (k->aux->link_key)
1192 splay_tree_insert (&devicep->mem_map,
1193 (splay_tree_node) k->aux->link_key);
1194 free (k->aux);
1195 k->aux = NULL;
1196 }
1197 if (aq)
1198 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1199 (void *) k->tgt);
1200 else
1201 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1202 return is_tgt_unmapped;
1203 }
1204
1205 attribute_hidden bool
1206 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1207 {
1208 return gomp_remove_var_internal (devicep, k, NULL);
1209 }
1210
1211 /* Remove a variable asynchronously. This actually removes the variable
1212 mapping immediately, but retains the linked target_mem_desc until the
1213 asynchronous operation has completed (as it may still refer to target
1214 memory). The device lock must be held before entry, and remains locked on
1215 exit. */
1216
1217 attribute_hidden void
1218 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1219 struct goacc_asyncqueue *aq)
1220 {
1221 (void) gomp_remove_var_internal (devicep, k, aq);
1222 }
1223
1224 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1225 variables back from device to host: if it is false, it is assumed that this
1226 has been done already. */
1227
1228 static inline __attribute__((always_inline)) void
1229 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1230 struct goacc_asyncqueue *aq)
1231 {
1232 struct gomp_device_descr *devicep = tgt->device_descr;
1233
1234 if (tgt->list_count == 0)
1235 {
1236 free (tgt);
1237 return;
1238 }
1239
1240 gomp_mutex_lock (&devicep->lock);
1241 if (devicep->state == GOMP_DEVICE_FINALIZED)
1242 {
1243 gomp_mutex_unlock (&devicep->lock);
1244 free (tgt->array);
1245 free (tgt);
1246 return;
1247 }
1248
1249 size_t i;
1250 for (i = 0; i < tgt->list_count; i++)
1251 {
1252 splay_tree_key k = tgt->list[i].key;
1253 if (k == NULL)
1254 continue;
1255
1256 bool do_unmap = false;
1257 if (k->tgt == tgt
1258 && k->virtual_refcount > 0
1259 && k->refcount != REFCOUNT_INFINITY)
1260 {
1261 k->virtual_refcount--;
1262 k->refcount--;
1263 }
1264 else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1265 k->refcount--;
1266 else if (k->refcount == 1)
1267 {
1268 k->refcount--;
1269 do_unmap = true;
1270 }
1271
1272 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1273 || tgt->list[i].always_copy_from)
1274 gomp_copy_dev2host (devicep, aq,
1275 (void *) (k->host_start + tgt->list[i].offset),
1276 (void *) (k->tgt->tgt_start + k->tgt_offset
1277 + tgt->list[i].offset),
1278 tgt->list[i].length);
1279 if (do_unmap)
1280 {
1281 struct target_mem_desc *k_tgt = k->tgt;
1282 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1283 /* It would be bad if TGT got unmapped while we're still iterating
1284 over its LIST_COUNT, and also expect to use it in the following
1285 code. */
1286 assert (!is_tgt_unmapped
1287 || k_tgt != tgt);
1288 }
1289 }
1290
1291 if (aq)
1292 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1293 (void *) tgt);
1294 else
1295 gomp_unref_tgt ((void *) tgt);
1296
1297 gomp_mutex_unlock (&devicep->lock);
1298 }
1299
1300 attribute_hidden void
1301 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1302 {
1303 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1304 }
1305
1306 attribute_hidden void
1307 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1308 struct goacc_asyncqueue *aq)
1309 {
1310 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1311 }
1312
1313 static void
1314 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1315 size_t *sizes, void *kinds, bool short_mapkind)
1316 {
1317 size_t i;
1318 struct splay_tree_key_s cur_node;
1319 const int typemask = short_mapkind ? 0xff : 0x7;
1320
1321 if (!devicep)
1322 return;
1323
1324 if (mapnum == 0)
1325 return;
1326
1327 gomp_mutex_lock (&devicep->lock);
1328 if (devicep->state == GOMP_DEVICE_FINALIZED)
1329 {
1330 gomp_mutex_unlock (&devicep->lock);
1331 return;
1332 }
1333
1334 for (i = 0; i < mapnum; i++)
1335 if (sizes[i])
1336 {
1337 cur_node.host_start = (uintptr_t) hostaddrs[i];
1338 cur_node.host_end = cur_node.host_start + sizes[i];
1339 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1340 if (n)
1341 {
1342 int kind = get_kind (short_mapkind, kinds, i);
1343 if (n->host_start > cur_node.host_start
1344 || n->host_end < cur_node.host_end)
1345 {
1346 gomp_mutex_unlock (&devicep->lock);
1347 gomp_fatal ("Trying to update [%p..%p) object when "
1348 "only [%p..%p) is mapped",
1349 (void *) cur_node.host_start,
1350 (void *) cur_node.host_end,
1351 (void *) n->host_start,
1352 (void *) n->host_end);
1353 }
1354
1355
1356 void *hostaddr = (void *) cur_node.host_start;
1357 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1358 + cur_node.host_start - n->host_start);
1359 size_t size = cur_node.host_end - cur_node.host_start;
1360
1361 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1362 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1363 NULL);
1364 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1365 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1366 }
1367 }
1368 gomp_mutex_unlock (&devicep->lock);
1369 }
1370
1371 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1372 And insert to splay tree the mapping between addresses from HOST_TABLE and
1373 from loaded target image. We rely in the host and device compiler
1374 emitting variable and functions in the same order. */
1375
1376 static void
1377 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1378 const void *host_table, const void *target_data,
1379 bool is_register_lock)
1380 {
1381 void **host_func_table = ((void ***) host_table)[0];
1382 void **host_funcs_end = ((void ***) host_table)[1];
1383 void **host_var_table = ((void ***) host_table)[2];
1384 void **host_vars_end = ((void ***) host_table)[3];
1385
1386 /* The func table contains only addresses, the var table contains addresses
1387 and corresponding sizes. */
1388 int num_funcs = host_funcs_end - host_func_table;
1389 int num_vars = (host_vars_end - host_var_table) / 2;
1390
1391 /* Load image to device and get target addresses for the image. */
1392 struct addr_pair *target_table = NULL;
1393 int i, num_target_entries;
1394
1395 num_target_entries
1396 = devicep->load_image_func (devicep->target_id, version,
1397 target_data, &target_table);
1398
1399 if (num_target_entries != num_funcs + num_vars)
1400 {
1401 gomp_mutex_unlock (&devicep->lock);
1402 if (is_register_lock)
1403 gomp_mutex_unlock (&register_lock);
1404 gomp_fatal ("Cannot map target functions or variables"
1405 " (expected %u, have %u)", num_funcs + num_vars,
1406 num_target_entries);
1407 }
1408
1409 /* Insert host-target address mapping into splay tree. */
1410 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1411 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1412 tgt->refcount = REFCOUNT_INFINITY;
1413 tgt->tgt_start = 0;
1414 tgt->tgt_end = 0;
1415 tgt->to_free = NULL;
1416 tgt->prev = NULL;
1417 tgt->list_count = 0;
1418 tgt->device_descr = devicep;
1419 splay_tree_node array = tgt->array;
1420
1421 for (i = 0; i < num_funcs; i++)
1422 {
1423 splay_tree_key k = &array->key;
1424 k->host_start = (uintptr_t) host_func_table[i];
1425 k->host_end = k->host_start + 1;
1426 k->tgt = tgt;
1427 k->tgt_offset = target_table[i].start;
1428 k->refcount = REFCOUNT_INFINITY;
1429 k->virtual_refcount = 0;
1430 k->aux = NULL;
1431 array->left = NULL;
1432 array->right = NULL;
1433 splay_tree_insert (&devicep->mem_map, array);
1434 array++;
1435 }
1436
1437 /* Most significant bit of the size in host and target tables marks
1438 "omp declare target link" variables. */
1439 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1440 const uintptr_t size_mask = ~link_bit;
1441
1442 for (i = 0; i < num_vars; i++)
1443 {
1444 struct addr_pair *target_var = &target_table[num_funcs + i];
1445 uintptr_t target_size = target_var->end - target_var->start;
1446
1447 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1448 {
1449 gomp_mutex_unlock (&devicep->lock);
1450 if (is_register_lock)
1451 gomp_mutex_unlock (&register_lock);
1452 gomp_fatal ("Cannot map target variables (size mismatch)");
1453 }
1454
1455 splay_tree_key k = &array->key;
1456 k->host_start = (uintptr_t) host_var_table[i * 2];
1457 k->host_end
1458 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1459 k->tgt = tgt;
1460 k->tgt_offset = target_var->start;
1461 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1462 k->virtual_refcount = 0;
1463 k->aux = NULL;
1464 array->left = NULL;
1465 array->right = NULL;
1466 splay_tree_insert (&devicep->mem_map, array);
1467 array++;
1468 }
1469
1470 free (target_table);
1471 }
1472
1473 /* Unload the mappings described by target_data from device DEVICE_P.
1474 The device must be locked. */
1475
1476 static void
1477 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1478 unsigned version,
1479 const void *host_table, const void *target_data)
1480 {
1481 void **host_func_table = ((void ***) host_table)[0];
1482 void **host_funcs_end = ((void ***) host_table)[1];
1483 void **host_var_table = ((void ***) host_table)[2];
1484 void **host_vars_end = ((void ***) host_table)[3];
1485
1486 /* The func table contains only addresses, the var table contains addresses
1487 and corresponding sizes. */
1488 int num_funcs = host_funcs_end - host_func_table;
1489 int num_vars = (host_vars_end - host_var_table) / 2;
1490
1491 struct splay_tree_key_s k;
1492 splay_tree_key node = NULL;
1493
1494 /* Find mapping at start of node array */
1495 if (num_funcs || num_vars)
1496 {
1497 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1498 : (uintptr_t) host_var_table[0]);
1499 k.host_end = k.host_start + 1;
1500 node = splay_tree_lookup (&devicep->mem_map, &k);
1501 }
1502
1503 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1504 {
1505 gomp_mutex_unlock (&devicep->lock);
1506 gomp_fatal ("image unload fail");
1507 }
1508
1509 /* Remove mappings from splay tree. */
1510 int i;
1511 for (i = 0; i < num_funcs; i++)
1512 {
1513 k.host_start = (uintptr_t) host_func_table[i];
1514 k.host_end = k.host_start + 1;
1515 splay_tree_remove (&devicep->mem_map, &k);
1516 }
1517
1518 /* Most significant bit of the size in host and target tables marks
1519 "omp declare target link" variables. */
1520 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1521 const uintptr_t size_mask = ~link_bit;
1522 bool is_tgt_unmapped = false;
1523
1524 for (i = 0; i < num_vars; i++)
1525 {
1526 k.host_start = (uintptr_t) host_var_table[i * 2];
1527 k.host_end
1528 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1529
1530 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1531 splay_tree_remove (&devicep->mem_map, &k);
1532 else
1533 {
1534 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1535 is_tgt_unmapped = gomp_remove_var (devicep, n);
1536 }
1537 }
1538
1539 if (node && !is_tgt_unmapped)
1540 {
1541 free (node->tgt);
1542 free (node);
1543 }
1544 }
1545
1546 /* This function should be called from every offload image while loading.
1547 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1548 the target, and TARGET_DATA needed by target plugin. */
1549
1550 void
1551 GOMP_offload_register_ver (unsigned version, const void *host_table,
1552 int target_type, const void *target_data)
1553 {
1554 int i;
1555
1556 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1557 gomp_fatal ("Library too old for offload (version %u < %u)",
1558 GOMP_VERSION, GOMP_VERSION_LIB (version));
1559
1560 gomp_mutex_lock (&register_lock);
1561
1562 /* Load image to all initialized devices. */
1563 for (i = 0; i < num_devices; i++)
1564 {
1565 struct gomp_device_descr *devicep = &devices[i];
1566 gomp_mutex_lock (&devicep->lock);
1567 if (devicep->type == target_type
1568 && devicep->state == GOMP_DEVICE_INITIALIZED)
1569 gomp_load_image_to_device (devicep, version,
1570 host_table, target_data, true);
1571 gomp_mutex_unlock (&devicep->lock);
1572 }
1573
1574 /* Insert image to array of pending images. */
1575 offload_images
1576 = gomp_realloc_unlock (offload_images,
1577 (num_offload_images + 1)
1578 * sizeof (struct offload_image_descr));
1579 offload_images[num_offload_images].version = version;
1580 offload_images[num_offload_images].type = target_type;
1581 offload_images[num_offload_images].host_table = host_table;
1582 offload_images[num_offload_images].target_data = target_data;
1583
1584 num_offload_images++;
1585 gomp_mutex_unlock (&register_lock);
1586 }
1587
1588 void
1589 GOMP_offload_register (const void *host_table, int target_type,
1590 const void *target_data)
1591 {
1592 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1593 }
1594
1595 /* This function should be called from every offload image while unloading.
1596 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1597 the target, and TARGET_DATA needed by target plugin. */
1598
1599 void
1600 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1601 int target_type, const void *target_data)
1602 {
1603 int i;
1604
1605 gomp_mutex_lock (&register_lock);
1606
1607 /* Unload image from all initialized devices. */
1608 for (i = 0; i < num_devices; i++)
1609 {
1610 struct gomp_device_descr *devicep = &devices[i];
1611 gomp_mutex_lock (&devicep->lock);
1612 if (devicep->type == target_type
1613 && devicep->state == GOMP_DEVICE_INITIALIZED)
1614 gomp_unload_image_from_device (devicep, version,
1615 host_table, target_data);
1616 gomp_mutex_unlock (&devicep->lock);
1617 }
1618
1619 /* Remove image from array of pending images. */
1620 for (i = 0; i < num_offload_images; i++)
1621 if (offload_images[i].target_data == target_data)
1622 {
1623 offload_images[i] = offload_images[--num_offload_images];
1624 break;
1625 }
1626
1627 gomp_mutex_unlock (&register_lock);
1628 }
1629
1630 void
1631 GOMP_offload_unregister (const void *host_table, int target_type,
1632 const void *target_data)
1633 {
1634 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1635 }
1636
1637 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1638 must be locked on entry, and remains locked on return. */
1639
1640 attribute_hidden void
1641 gomp_init_device (struct gomp_device_descr *devicep)
1642 {
1643 int i;
1644 if (!devicep->init_device_func (devicep->target_id))
1645 {
1646 gomp_mutex_unlock (&devicep->lock);
1647 gomp_fatal ("device initialization failed");
1648 }
1649
1650 /* Load to device all images registered by the moment. */
1651 for (i = 0; i < num_offload_images; i++)
1652 {
1653 struct offload_image_descr *image = &offload_images[i];
1654 if (image->type == devicep->type)
1655 gomp_load_image_to_device (devicep, image->version,
1656 image->host_table, image->target_data,
1657 false);
1658 }
1659
1660 /* Initialize OpenACC asynchronous queues. */
1661 goacc_init_asyncqueues (devicep);
1662
1663 devicep->state = GOMP_DEVICE_INITIALIZED;
1664 }
1665
1666 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1667 must be locked on entry, and remains locked on return. */
1668
1669 attribute_hidden bool
1670 gomp_fini_device (struct gomp_device_descr *devicep)
1671 {
1672 bool ret = goacc_fini_asyncqueues (devicep);
1673 ret &= devicep->fini_device_func (devicep->target_id);
1674 devicep->state = GOMP_DEVICE_FINALIZED;
1675 return ret;
1676 }
1677
1678 attribute_hidden void
1679 gomp_unload_device (struct gomp_device_descr *devicep)
1680 {
1681 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1682 {
1683 unsigned i;
1684
1685 /* Unload from device all images registered at the moment. */
1686 for (i = 0; i < num_offload_images; i++)
1687 {
1688 struct offload_image_descr *image = &offload_images[i];
1689 if (image->type == devicep->type)
1690 gomp_unload_image_from_device (devicep, image->version,
1691 image->host_table,
1692 image->target_data);
1693 }
1694 }
1695 }
1696
1697 /* Host fallback for GOMP_target{,_ext} routines. */
1698
1699 static void
1700 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1701 {
1702 struct gomp_thread old_thr, *thr = gomp_thread ();
1703 old_thr = *thr;
1704 memset (thr, '\0', sizeof (*thr));
1705 if (gomp_places_list)
1706 {
1707 thr->place = old_thr.place;
1708 thr->ts.place_partition_len = gomp_places_list_len;
1709 }
1710 fn (hostaddrs);
1711 gomp_free_thread (thr);
1712 *thr = old_thr;
1713 }
1714
1715 /* Calculate alignment and size requirements of a private copy of data shared
1716 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1717
1718 static inline void
1719 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1720 unsigned short *kinds, size_t *tgt_align,
1721 size_t *tgt_size)
1722 {
1723 size_t i;
1724 for (i = 0; i < mapnum; i++)
1725 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1726 {
1727 size_t align = (size_t) 1 << (kinds[i] >> 8);
1728 if (*tgt_align < align)
1729 *tgt_align = align;
1730 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1731 *tgt_size += sizes[i];
1732 }
1733 }
1734
1735 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1736
1737 static inline void
1738 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1739 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1740 size_t tgt_size)
1741 {
1742 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1743 if (al)
1744 tgt += tgt_align - al;
1745 tgt_size = 0;
1746 size_t i;
1747 for (i = 0; i < mapnum; i++)
1748 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1749 {
1750 size_t align = (size_t) 1 << (kinds[i] >> 8);
1751 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1752 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1753 hostaddrs[i] = tgt + tgt_size;
1754 tgt_size = tgt_size + sizes[i];
1755 }
1756 }
1757
1758 /* Helper function of GOMP_target{,_ext} routines. */
1759
1760 static void *
1761 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1762 void (*host_fn) (void *))
1763 {
1764 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1765 return (void *) host_fn;
1766 else
1767 {
1768 gomp_mutex_lock (&devicep->lock);
1769 if (devicep->state == GOMP_DEVICE_FINALIZED)
1770 {
1771 gomp_mutex_unlock (&devicep->lock);
1772 return NULL;
1773 }
1774
1775 struct splay_tree_key_s k;
1776 k.host_start = (uintptr_t) host_fn;
1777 k.host_end = k.host_start + 1;
1778 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1779 gomp_mutex_unlock (&devicep->lock);
1780 if (tgt_fn == NULL)
1781 return NULL;
1782
1783 return (void *) tgt_fn->tgt_offset;
1784 }
1785 }
1786
1787 /* Called when encountering a target directive. If DEVICE
1788 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1789 GOMP_DEVICE_HOST_FALLBACK (or any value
1790 larger than last available hw device), use host fallback.
1791 FN is address of host code, UNUSED is part of the current ABI, but
1792 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1793 with MAPNUM entries, with addresses of the host objects,
1794 sizes of the host objects (resp. for pointer kind pointer bias
1795 and assumed sizeof (void *) size) and kinds. */
1796
1797 void
1798 GOMP_target (int device, void (*fn) (void *), const void *unused,
1799 size_t mapnum, void **hostaddrs, size_t *sizes,
1800 unsigned char *kinds)
1801 {
1802 struct gomp_device_descr *devicep = resolve_device (device);
1803
1804 void *fn_addr;
1805 if (devicep == NULL
1806 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1807 /* All shared memory devices should use the GOMP_target_ext function. */
1808 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1809 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1810 return gomp_target_fallback (fn, hostaddrs);
1811
1812 struct target_mem_desc *tgt_vars
1813 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1814 GOMP_MAP_VARS_TARGET);
1815 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1816 NULL);
1817 gomp_unmap_vars (tgt_vars, true);
1818 }
1819
1820 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1821 and several arguments have been added:
1822 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1823 DEPEND is array of dependencies, see GOMP_task for details.
1824
1825 ARGS is a pointer to an array consisting of a variable number of both
1826 device-independent and device-specific arguments, which can take one two
1827 elements where the first specifies for which device it is intended, the type
1828 and optionally also the value. If the value is not present in the first
1829 one, the whole second element the actual value. The last element of the
1830 array is a single NULL. Among the device independent can be for example
1831 NUM_TEAMS and THREAD_LIMIT.
1832
1833 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1834 that value, or 1 if teams construct is not present, or 0, if
1835 teams construct does not have num_teams clause and so the choice is
1836 implementation defined, and -1 if it can't be determined on the host
1837 what value will GOMP_teams have on the device.
1838 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1839 body with that value, or 0, if teams construct does not have thread_limit
1840 clause or the teams construct is not present, or -1 if it can't be
1841 determined on the host what value will GOMP_teams have on the device. */
1842
1843 void
1844 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1845 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1846 unsigned int flags, void **depend, void **args)
1847 {
1848 struct gomp_device_descr *devicep = resolve_device (device);
1849 size_t tgt_align = 0, tgt_size = 0;
1850 bool fpc_done = false;
1851
1852 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1853 {
1854 struct gomp_thread *thr = gomp_thread ();
1855 /* Create a team if we don't have any around, as nowait
1856 target tasks make sense to run asynchronously even when
1857 outside of any parallel. */
1858 if (__builtin_expect (thr->ts.team == NULL, 0))
1859 {
1860 struct gomp_team *team = gomp_new_team (1);
1861 struct gomp_task *task = thr->task;
1862 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1863 team->prev_ts = thr->ts;
1864 thr->ts.team = team;
1865 thr->ts.team_id = 0;
1866 thr->ts.work_share = &team->work_shares[0];
1867 thr->ts.last_work_share = NULL;
1868 #ifdef HAVE_SYNC_BUILTINS
1869 thr->ts.single_count = 0;
1870 #endif
1871 thr->ts.static_trip = 0;
1872 thr->task = &team->implicit_task[0];
1873 gomp_init_task (thr->task, NULL, icv);
1874 if (task)
1875 {
1876 thr->task = task;
1877 gomp_end_task ();
1878 free (task);
1879 thr->task = &team->implicit_task[0];
1880 }
1881 else
1882 pthread_setspecific (gomp_thread_destructor, thr);
1883 }
1884 if (thr->ts.team
1885 && !thr->task->final_task)
1886 {
1887 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1888 sizes, kinds, flags, depend, args,
1889 GOMP_TARGET_TASK_BEFORE_MAP);
1890 return;
1891 }
1892 }
1893
1894 /* If there are depend clauses, but nowait is not present
1895 (or we are in a final task), block the parent task until the
1896 dependencies are resolved and then just continue with the rest
1897 of the function as if it is a merged task. */
1898 if (depend != NULL)
1899 {
1900 struct gomp_thread *thr = gomp_thread ();
1901 if (thr->task && thr->task->depend_hash)
1902 {
1903 /* If we might need to wait, copy firstprivate now. */
1904 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1905 &tgt_align, &tgt_size);
1906 if (tgt_align)
1907 {
1908 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1909 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1910 tgt_align, tgt_size);
1911 }
1912 fpc_done = true;
1913 gomp_task_maybe_wait_for_dependencies (depend);
1914 }
1915 }
1916
1917 void *fn_addr;
1918 if (devicep == NULL
1919 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1920 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1921 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1922 {
1923 if (!fpc_done)
1924 {
1925 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1926 &tgt_align, &tgt_size);
1927 if (tgt_align)
1928 {
1929 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1930 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1931 tgt_align, tgt_size);
1932 }
1933 }
1934 gomp_target_fallback (fn, hostaddrs);
1935 return;
1936 }
1937
1938 struct target_mem_desc *tgt_vars;
1939 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1940 {
1941 if (!fpc_done)
1942 {
1943 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1944 &tgt_align, &tgt_size);
1945 if (tgt_align)
1946 {
1947 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1948 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1949 tgt_align, tgt_size);
1950 }
1951 }
1952 tgt_vars = NULL;
1953 }
1954 else
1955 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1956 true, GOMP_MAP_VARS_TARGET);
1957 devicep->run_func (devicep->target_id, fn_addr,
1958 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1959 args);
1960 if (tgt_vars)
1961 gomp_unmap_vars (tgt_vars, true);
1962 }
1963
1964 /* Host fallback for GOMP_target_data{,_ext} routines. */
1965
1966 static void
1967 gomp_target_data_fallback (void)
1968 {
1969 struct gomp_task_icv *icv = gomp_icv (false);
1970 if (icv->target_data)
1971 {
1972 /* Even when doing a host fallback, if there are any active
1973 #pragma omp target data constructs, need to remember the
1974 new #pragma omp target data, otherwise GOMP_target_end_data
1975 would get out of sync. */
1976 struct target_mem_desc *tgt
1977 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1978 GOMP_MAP_VARS_DATA);
1979 tgt->prev = icv->target_data;
1980 icv->target_data = tgt;
1981 }
1982 }
1983
1984 void
1985 GOMP_target_data (int device, const void *unused, size_t mapnum,
1986 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1987 {
1988 struct gomp_device_descr *devicep = resolve_device (device);
1989
1990 if (devicep == NULL
1991 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1992 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1993 return gomp_target_data_fallback ();
1994
1995 struct target_mem_desc *tgt
1996 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1997 GOMP_MAP_VARS_DATA);
1998 struct gomp_task_icv *icv = gomp_icv (true);
1999 tgt->prev = icv->target_data;
2000 icv->target_data = tgt;
2001 }
2002
2003 void
2004 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2005 size_t *sizes, unsigned short *kinds)
2006 {
2007 struct gomp_device_descr *devicep = resolve_device (device);
2008
2009 if (devicep == NULL
2010 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2011 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2012 return gomp_target_data_fallback ();
2013
2014 struct target_mem_desc *tgt
2015 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2016 GOMP_MAP_VARS_DATA);
2017 struct gomp_task_icv *icv = gomp_icv (true);
2018 tgt->prev = icv->target_data;
2019 icv->target_data = tgt;
2020 }
2021
2022 void
2023 GOMP_target_end_data (void)
2024 {
2025 struct gomp_task_icv *icv = gomp_icv (false);
2026 if (icv->target_data)
2027 {
2028 struct target_mem_desc *tgt = icv->target_data;
2029 icv->target_data = tgt->prev;
2030 gomp_unmap_vars (tgt, true);
2031 }
2032 }
2033
2034 void
2035 GOMP_target_update (int device, const void *unused, size_t mapnum,
2036 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2037 {
2038 struct gomp_device_descr *devicep = resolve_device (device);
2039
2040 if (devicep == NULL
2041 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2042 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2043 return;
2044
2045 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2046 }
2047
2048 void
2049 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2050 size_t *sizes, unsigned short *kinds,
2051 unsigned int flags, void **depend)
2052 {
2053 struct gomp_device_descr *devicep = resolve_device (device);
2054
2055 /* If there are depend clauses, but nowait is not present,
2056 block the parent task until the dependencies are resolved
2057 and then just continue with the rest of the function as if it
2058 is a merged task. Until we are able to schedule task during
2059 variable mapping or unmapping, ignore nowait if depend clauses
2060 are not present. */
2061 if (depend != NULL)
2062 {
2063 struct gomp_thread *thr = gomp_thread ();
2064 if (thr->task && thr->task->depend_hash)
2065 {
2066 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2067 && thr->ts.team
2068 && !thr->task->final_task)
2069 {
2070 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2071 mapnum, hostaddrs, sizes, kinds,
2072 flags | GOMP_TARGET_FLAG_UPDATE,
2073 depend, NULL, GOMP_TARGET_TASK_DATA))
2074 return;
2075 }
2076 else
2077 {
2078 struct gomp_team *team = thr->ts.team;
2079 /* If parallel or taskgroup has been cancelled, don't start new
2080 tasks. */
2081 if (__builtin_expect (gomp_cancel_var, 0) && team)
2082 {
2083 if (gomp_team_barrier_cancelled (&team->barrier))
2084 return;
2085 if (thr->task->taskgroup)
2086 {
2087 if (thr->task->taskgroup->cancelled)
2088 return;
2089 if (thr->task->taskgroup->workshare
2090 && thr->task->taskgroup->prev
2091 && thr->task->taskgroup->prev->cancelled)
2092 return;
2093 }
2094 }
2095
2096 gomp_task_maybe_wait_for_dependencies (depend);
2097 }
2098 }
2099 }
2100
2101 if (devicep == NULL
2102 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2103 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2104 return;
2105
2106 struct gomp_thread *thr = gomp_thread ();
2107 struct gomp_team *team = thr->ts.team;
2108 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2109 if (__builtin_expect (gomp_cancel_var, 0) && team)
2110 {
2111 if (gomp_team_barrier_cancelled (&team->barrier))
2112 return;
2113 if (thr->task->taskgroup)
2114 {
2115 if (thr->task->taskgroup->cancelled)
2116 return;
2117 if (thr->task->taskgroup->workshare
2118 && thr->task->taskgroup->prev
2119 && thr->task->taskgroup->prev->cancelled)
2120 return;
2121 }
2122 }
2123
2124 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2125 }
2126
2127 static void
2128 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2129 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2130 {
2131 const int typemask = 0xff;
2132 size_t i;
2133 gomp_mutex_lock (&devicep->lock);
2134 if (devicep->state == GOMP_DEVICE_FINALIZED)
2135 {
2136 gomp_mutex_unlock (&devicep->lock);
2137 return;
2138 }
2139
2140 for (i = 0; i < mapnum; i++)
2141 {
2142 struct splay_tree_key_s cur_node;
2143 unsigned char kind = kinds[i] & typemask;
2144 switch (kind)
2145 {
2146 case GOMP_MAP_FROM:
2147 case GOMP_MAP_ALWAYS_FROM:
2148 case GOMP_MAP_DELETE:
2149 case GOMP_MAP_RELEASE:
2150 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2151 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2152 cur_node.host_start = (uintptr_t) hostaddrs[i];
2153 cur_node.host_end = cur_node.host_start + sizes[i];
2154 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2155 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2156 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2157 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2158 if (!k)
2159 continue;
2160
2161 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2162 k->refcount--;
2163 if ((kind == GOMP_MAP_DELETE
2164 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2165 && k->refcount != REFCOUNT_INFINITY)
2166 k->refcount = 0;
2167
2168 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2169 || kind == GOMP_MAP_ALWAYS_FROM)
2170 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2171 (void *) (k->tgt->tgt_start + k->tgt_offset
2172 + cur_node.host_start
2173 - k->host_start),
2174 cur_node.host_end - cur_node.host_start);
2175 if (k->refcount == 0)
2176 gomp_remove_var (devicep, k);
2177
2178 break;
2179 default:
2180 gomp_mutex_unlock (&devicep->lock);
2181 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2182 kind);
2183 }
2184 }
2185
2186 gomp_mutex_unlock (&devicep->lock);
2187 }
2188
2189 void
2190 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2191 size_t *sizes, unsigned short *kinds,
2192 unsigned int flags, void **depend)
2193 {
2194 struct gomp_device_descr *devicep = resolve_device (device);
2195
2196 /* If there are depend clauses, but nowait is not present,
2197 block the parent task until the dependencies are resolved
2198 and then just continue with the rest of the function as if it
2199 is a merged task. Until we are able to schedule task during
2200 variable mapping or unmapping, ignore nowait if depend clauses
2201 are not present. */
2202 if (depend != NULL)
2203 {
2204 struct gomp_thread *thr = gomp_thread ();
2205 if (thr->task && thr->task->depend_hash)
2206 {
2207 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2208 && thr->ts.team
2209 && !thr->task->final_task)
2210 {
2211 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2212 mapnum, hostaddrs, sizes, kinds,
2213 flags, depend, NULL,
2214 GOMP_TARGET_TASK_DATA))
2215 return;
2216 }
2217 else
2218 {
2219 struct gomp_team *team = thr->ts.team;
2220 /* If parallel or taskgroup has been cancelled, don't start new
2221 tasks. */
2222 if (__builtin_expect (gomp_cancel_var, 0) && team)
2223 {
2224 if (gomp_team_barrier_cancelled (&team->barrier))
2225 return;
2226 if (thr->task->taskgroup)
2227 {
2228 if (thr->task->taskgroup->cancelled)
2229 return;
2230 if (thr->task->taskgroup->workshare
2231 && thr->task->taskgroup->prev
2232 && thr->task->taskgroup->prev->cancelled)
2233 return;
2234 }
2235 }
2236
2237 gomp_task_maybe_wait_for_dependencies (depend);
2238 }
2239 }
2240 }
2241
2242 if (devicep == NULL
2243 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2244 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2245 return;
2246
2247 struct gomp_thread *thr = gomp_thread ();
2248 struct gomp_team *team = thr->ts.team;
2249 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2250 if (__builtin_expect (gomp_cancel_var, 0) && team)
2251 {
2252 if (gomp_team_barrier_cancelled (&team->barrier))
2253 return;
2254 if (thr->task->taskgroup)
2255 {
2256 if (thr->task->taskgroup->cancelled)
2257 return;
2258 if (thr->task->taskgroup->workshare
2259 && thr->task->taskgroup->prev
2260 && thr->task->taskgroup->prev->cancelled)
2261 return;
2262 }
2263 }
2264
2265 size_t i;
2266 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2267 for (i = 0; i < mapnum; i++)
2268 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2269 {
2270 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2271 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2272 i += sizes[i];
2273 }
2274 else
2275 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2276 true, GOMP_MAP_VARS_ENTER_DATA);
2277 else
2278 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2279 }
2280
2281 bool
2282 gomp_target_task_fn (void *data)
2283 {
2284 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2285 struct gomp_device_descr *devicep = ttask->devicep;
2286
2287 if (ttask->fn != NULL)
2288 {
2289 void *fn_addr;
2290 if (devicep == NULL
2291 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2292 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2293 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2294 {
2295 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2296 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2297 return false;
2298 }
2299
2300 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2301 {
2302 if (ttask->tgt)
2303 gomp_unmap_vars (ttask->tgt, true);
2304 return false;
2305 }
2306
2307 void *actual_arguments;
2308 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2309 {
2310 ttask->tgt = NULL;
2311 actual_arguments = ttask->hostaddrs;
2312 }
2313 else
2314 {
2315 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2316 NULL, ttask->sizes, ttask->kinds, true,
2317 GOMP_MAP_VARS_TARGET);
2318 actual_arguments = (void *) ttask->tgt->tgt_start;
2319 }
2320 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2321
2322 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2323 ttask->args, (void *) ttask);
2324 return true;
2325 }
2326 else if (devicep == NULL
2327 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2328 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2329 return false;
2330
2331 size_t i;
2332 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2333 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2334 ttask->kinds, true);
2335 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2336 for (i = 0; i < ttask->mapnum; i++)
2337 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2338 {
2339 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2340 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2341 GOMP_MAP_VARS_ENTER_DATA);
2342 i += ttask->sizes[i];
2343 }
2344 else
2345 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2346 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2347 else
2348 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2349 ttask->kinds);
2350 return false;
2351 }
2352
2353 void
2354 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2355 {
2356 if (thread_limit)
2357 {
2358 struct gomp_task_icv *icv = gomp_icv (true);
2359 icv->thread_limit_var
2360 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2361 }
2362 (void) num_teams;
2363 }
2364
2365 void *
2366 omp_target_alloc (size_t size, int device_num)
2367 {
2368 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2369 return malloc (size);
2370
2371 if (device_num < 0)
2372 return NULL;
2373
2374 struct gomp_device_descr *devicep = resolve_device (device_num);
2375 if (devicep == NULL)
2376 return NULL;
2377
2378 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2379 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2380 return malloc (size);
2381
2382 gomp_mutex_lock (&devicep->lock);
2383 void *ret = devicep->alloc_func (devicep->target_id, size);
2384 gomp_mutex_unlock (&devicep->lock);
2385 return ret;
2386 }
2387
2388 void
2389 omp_target_free (void *device_ptr, int device_num)
2390 {
2391 if (device_ptr == NULL)
2392 return;
2393
2394 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2395 {
2396 free (device_ptr);
2397 return;
2398 }
2399
2400 if (device_num < 0)
2401 return;
2402
2403 struct gomp_device_descr *devicep = resolve_device (device_num);
2404 if (devicep == NULL)
2405 return;
2406
2407 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2408 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2409 {
2410 free (device_ptr);
2411 return;
2412 }
2413
2414 gomp_mutex_lock (&devicep->lock);
2415 gomp_free_device_memory (devicep, device_ptr);
2416 gomp_mutex_unlock (&devicep->lock);
2417 }
2418
2419 int
2420 omp_target_is_present (const void *ptr, int device_num)
2421 {
2422 if (ptr == NULL)
2423 return 1;
2424
2425 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2426 return 1;
2427
2428 if (device_num < 0)
2429 return 0;
2430
2431 struct gomp_device_descr *devicep = resolve_device (device_num);
2432 if (devicep == NULL)
2433 return 0;
2434
2435 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2436 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2437 return 1;
2438
2439 gomp_mutex_lock (&devicep->lock);
2440 struct splay_tree_s *mem_map = &devicep->mem_map;
2441 struct splay_tree_key_s cur_node;
2442
2443 cur_node.host_start = (uintptr_t) ptr;
2444 cur_node.host_end = cur_node.host_start;
2445 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2446 int ret = n != NULL;
2447 gomp_mutex_unlock (&devicep->lock);
2448 return ret;
2449 }
2450
2451 int
2452 omp_target_memcpy (void *dst, const void *src, size_t length,
2453 size_t dst_offset, size_t src_offset, int dst_device_num,
2454 int src_device_num)
2455 {
2456 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2457 bool ret;
2458
2459 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2460 {
2461 if (dst_device_num < 0)
2462 return EINVAL;
2463
2464 dst_devicep = resolve_device (dst_device_num);
2465 if (dst_devicep == NULL)
2466 return EINVAL;
2467
2468 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2469 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2470 dst_devicep = NULL;
2471 }
2472 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2473 {
2474 if (src_device_num < 0)
2475 return EINVAL;
2476
2477 src_devicep = resolve_device (src_device_num);
2478 if (src_devicep == NULL)
2479 return EINVAL;
2480
2481 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2482 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2483 src_devicep = NULL;
2484 }
2485 if (src_devicep == NULL && dst_devicep == NULL)
2486 {
2487 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2488 return 0;
2489 }
2490 if (src_devicep == NULL)
2491 {
2492 gomp_mutex_lock (&dst_devicep->lock);
2493 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2494 (char *) dst + dst_offset,
2495 (char *) src + src_offset, length);
2496 gomp_mutex_unlock (&dst_devicep->lock);
2497 return (ret ? 0 : EINVAL);
2498 }
2499 if (dst_devicep == NULL)
2500 {
2501 gomp_mutex_lock (&src_devicep->lock);
2502 ret = src_devicep->dev2host_func (src_devicep->target_id,
2503 (char *) dst + dst_offset,
2504 (char *) src + src_offset, length);
2505 gomp_mutex_unlock (&src_devicep->lock);
2506 return (ret ? 0 : EINVAL);
2507 }
2508 if (src_devicep == dst_devicep)
2509 {
2510 gomp_mutex_lock (&src_devicep->lock);
2511 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2512 (char *) dst + dst_offset,
2513 (char *) src + src_offset, length);
2514 gomp_mutex_unlock (&src_devicep->lock);
2515 return (ret ? 0 : EINVAL);
2516 }
2517 return EINVAL;
2518 }
2519
2520 static int
2521 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2522 int num_dims, const size_t *volume,
2523 const size_t *dst_offsets,
2524 const size_t *src_offsets,
2525 const size_t *dst_dimensions,
2526 const size_t *src_dimensions,
2527 struct gomp_device_descr *dst_devicep,
2528 struct gomp_device_descr *src_devicep)
2529 {
2530 size_t dst_slice = element_size;
2531 size_t src_slice = element_size;
2532 size_t j, dst_off, src_off, length;
2533 int i, ret;
2534
2535 if (num_dims == 1)
2536 {
2537 if (__builtin_mul_overflow (element_size, volume[0], &length)
2538 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2539 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2540 return EINVAL;
2541 if (dst_devicep == NULL && src_devicep == NULL)
2542 {
2543 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2544 length);
2545 ret = 1;
2546 }
2547 else if (src_devicep == NULL)
2548 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2549 (char *) dst + dst_off,
2550 (const char *) src + src_off,
2551 length);
2552 else if (dst_devicep == NULL)
2553 ret = src_devicep->dev2host_func (src_devicep->target_id,
2554 (char *) dst + dst_off,
2555 (const char *) src + src_off,
2556 length);
2557 else if (src_devicep == dst_devicep)
2558 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2559 (char *) dst + dst_off,
2560 (const char *) src + src_off,
2561 length);
2562 else
2563 ret = 0;
2564 return ret ? 0 : EINVAL;
2565 }
2566
2567 /* FIXME: it would be nice to have some plugin function to handle
2568 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2569 be handled in the generic recursion below, and for host-host it
2570 should be used even for any num_dims >= 2. */
2571
2572 for (i = 1; i < num_dims; i++)
2573 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2574 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2575 return EINVAL;
2576 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2577 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2578 return EINVAL;
2579 for (j = 0; j < volume[0]; j++)
2580 {
2581 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2582 (const char *) src + src_off,
2583 element_size, num_dims - 1,
2584 volume + 1, dst_offsets + 1,
2585 src_offsets + 1, dst_dimensions + 1,
2586 src_dimensions + 1, dst_devicep,
2587 src_devicep);
2588 if (ret)
2589 return ret;
2590 dst_off += dst_slice;
2591 src_off += src_slice;
2592 }
2593 return 0;
2594 }
2595
2596 int
2597 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2598 int num_dims, const size_t *volume,
2599 const size_t *dst_offsets,
2600 const size_t *src_offsets,
2601 const size_t *dst_dimensions,
2602 const size_t *src_dimensions,
2603 int dst_device_num, int src_device_num)
2604 {
2605 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2606
2607 if (!dst && !src)
2608 return INT_MAX;
2609
2610 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2611 {
2612 if (dst_device_num < 0)
2613 return EINVAL;
2614
2615 dst_devicep = resolve_device (dst_device_num);
2616 if (dst_devicep == NULL)
2617 return EINVAL;
2618
2619 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2620 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2621 dst_devicep = NULL;
2622 }
2623 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2624 {
2625 if (src_device_num < 0)
2626 return EINVAL;
2627
2628 src_devicep = resolve_device (src_device_num);
2629 if (src_devicep == NULL)
2630 return EINVAL;
2631
2632 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2633 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2634 src_devicep = NULL;
2635 }
2636
2637 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2638 return EINVAL;
2639
2640 if (src_devicep)
2641 gomp_mutex_lock (&src_devicep->lock);
2642 else if (dst_devicep)
2643 gomp_mutex_lock (&dst_devicep->lock);
2644 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2645 volume, dst_offsets, src_offsets,
2646 dst_dimensions, src_dimensions,
2647 dst_devicep, src_devicep);
2648 if (src_devicep)
2649 gomp_mutex_unlock (&src_devicep->lock);
2650 else if (dst_devicep)
2651 gomp_mutex_unlock (&dst_devicep->lock);
2652 return ret;
2653 }
2654
2655 int
2656 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2657 size_t size, size_t device_offset, int device_num)
2658 {
2659 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2660 return EINVAL;
2661
2662 if (device_num < 0)
2663 return EINVAL;
2664
2665 struct gomp_device_descr *devicep = resolve_device (device_num);
2666 if (devicep == NULL)
2667 return EINVAL;
2668
2669 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2670 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2671 return EINVAL;
2672
2673 gomp_mutex_lock (&devicep->lock);
2674
2675 struct splay_tree_s *mem_map = &devicep->mem_map;
2676 struct splay_tree_key_s cur_node;
2677 int ret = EINVAL;
2678
2679 cur_node.host_start = (uintptr_t) host_ptr;
2680 cur_node.host_end = cur_node.host_start + size;
2681 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2682 if (n)
2683 {
2684 if (n->tgt->tgt_start + n->tgt_offset
2685 == (uintptr_t) device_ptr + device_offset
2686 && n->host_start <= cur_node.host_start
2687 && n->host_end >= cur_node.host_end)
2688 ret = 0;
2689 }
2690 else
2691 {
2692 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2693 tgt->array = gomp_malloc (sizeof (*tgt->array));
2694 tgt->refcount = 1;
2695 tgt->tgt_start = 0;
2696 tgt->tgt_end = 0;
2697 tgt->to_free = NULL;
2698 tgt->prev = NULL;
2699 tgt->list_count = 0;
2700 tgt->device_descr = devicep;
2701 splay_tree_node array = tgt->array;
2702 splay_tree_key k = &array->key;
2703 k->host_start = cur_node.host_start;
2704 k->host_end = cur_node.host_end;
2705 k->tgt = tgt;
2706 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2707 k->refcount = REFCOUNT_INFINITY;
2708 k->virtual_refcount = 0;
2709 k->aux = NULL;
2710 array->left = NULL;
2711 array->right = NULL;
2712 splay_tree_insert (&devicep->mem_map, array);
2713 ret = 0;
2714 }
2715 gomp_mutex_unlock (&devicep->lock);
2716 return ret;
2717 }
2718
2719 int
2720 omp_target_disassociate_ptr (const void *ptr, int device_num)
2721 {
2722 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2723 return EINVAL;
2724
2725 if (device_num < 0)
2726 return EINVAL;
2727
2728 struct gomp_device_descr *devicep = resolve_device (device_num);
2729 if (devicep == NULL)
2730 return EINVAL;
2731
2732 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2733 return EINVAL;
2734
2735 gomp_mutex_lock (&devicep->lock);
2736
2737 struct splay_tree_s *mem_map = &devicep->mem_map;
2738 struct splay_tree_key_s cur_node;
2739 int ret = EINVAL;
2740
2741 cur_node.host_start = (uintptr_t) ptr;
2742 cur_node.host_end = cur_node.host_start;
2743 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2744 if (n
2745 && n->host_start == cur_node.host_start
2746 && n->refcount == REFCOUNT_INFINITY
2747 && n->tgt->tgt_start == 0
2748 && n->tgt->to_free == NULL
2749 && n->tgt->refcount == 1
2750 && n->tgt->list_count == 0)
2751 {
2752 splay_tree_remove (&devicep->mem_map, n);
2753 gomp_unmap_tgt (n->tgt);
2754 ret = 0;
2755 }
2756
2757 gomp_mutex_unlock (&devicep->lock);
2758 return ret;
2759 }
2760
2761 int
2762 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2763 {
2764 (void) kind;
2765 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2766 return gomp_pause_host ();
2767 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2768 return -1;
2769 /* Do nothing for target devices for now. */
2770 return 0;
2771 }
2772
2773 int
2774 omp_pause_resource_all (omp_pause_resource_t kind)
2775 {
2776 (void) kind;
2777 if (gomp_pause_host ())
2778 return -1;
2779 /* Do nothing for target devices for now. */
2780 return 0;
2781 }
2782
2783 ialias (omp_pause_resource)
2784 ialias (omp_pause_resource_all)
2785
2786 #ifdef PLUGIN_SUPPORT
2787
2788 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2789 in PLUGIN_NAME.
2790 The handles of the found functions are stored in the corresponding fields
2791 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2792
2793 static bool
2794 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2795 const char *plugin_name)
2796 {
2797 const char *err = NULL, *last_missing = NULL;
2798
2799 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2800 if (!plugin_handle)
2801 goto dl_fail;
2802
2803 /* Check if all required functions are available in the plugin and store
2804 their handlers. None of the symbols can legitimately be NULL,
2805 so we don't need to check dlerror all the time. */
2806 #define DLSYM(f) \
2807 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2808 goto dl_fail
2809 /* Similar, but missing functions are not an error. Return false if
2810 failed, true otherwise. */
2811 #define DLSYM_OPT(f, n) \
2812 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2813 || (last_missing = #n, 0))
2814
2815 DLSYM (version);
2816 if (device->version_func () != GOMP_VERSION)
2817 {
2818 err = "plugin version mismatch";
2819 goto fail;
2820 }
2821
2822 DLSYM (get_name);
2823 DLSYM (get_caps);
2824 DLSYM (get_type);
2825 DLSYM (get_num_devices);
2826 DLSYM (init_device);
2827 DLSYM (fini_device);
2828 DLSYM (load_image);
2829 DLSYM (unload_image);
2830 DLSYM (alloc);
2831 DLSYM (free);
2832 DLSYM (dev2host);
2833 DLSYM (host2dev);
2834 device->capabilities = device->get_caps_func ();
2835 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2836 {
2837 DLSYM (run);
2838 DLSYM (async_run);
2839 DLSYM_OPT (can_run, can_run);
2840 DLSYM (dev2dev);
2841 }
2842 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2843 {
2844 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2845 || !DLSYM_OPT (openacc.create_thread_data,
2846 openacc_create_thread_data)
2847 || !DLSYM_OPT (openacc.destroy_thread_data,
2848 openacc_destroy_thread_data)
2849 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2850 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2851 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2852 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2853 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2854 || !DLSYM_OPT (openacc.async.queue_callback,
2855 openacc_async_queue_callback)
2856 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2857 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2858 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
2859 {
2860 /* Require all the OpenACC handlers if we have
2861 GOMP_OFFLOAD_CAP_OPENACC_200. */
2862 err = "plugin missing OpenACC handler function";
2863 goto fail;
2864 }
2865
2866 unsigned cuda = 0;
2867 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2868 openacc_cuda_get_current_device);
2869 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2870 openacc_cuda_get_current_context);
2871 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2872 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2873 if (cuda && cuda != 4)
2874 {
2875 /* Make sure all the CUDA functions are there if any of them are. */
2876 err = "plugin missing OpenACC CUDA handler function";
2877 goto fail;
2878 }
2879 }
2880 #undef DLSYM
2881 #undef DLSYM_OPT
2882
2883 return 1;
2884
2885 dl_fail:
2886 err = dlerror ();
2887 fail:
2888 gomp_error ("while loading %s: %s", plugin_name, err);
2889 if (last_missing)
2890 gomp_error ("missing function was %s", last_missing);
2891 if (plugin_handle)
2892 dlclose (plugin_handle);
2893
2894 return 0;
2895 }
2896
2897 /* This function finalizes all initialized devices. */
2898
2899 static void
2900 gomp_target_fini (void)
2901 {
2902 int i;
2903 for (i = 0; i < num_devices; i++)
2904 {
2905 bool ret = true;
2906 struct gomp_device_descr *devicep = &devices[i];
2907 gomp_mutex_lock (&devicep->lock);
2908 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2909 ret = gomp_fini_device (devicep);
2910 gomp_mutex_unlock (&devicep->lock);
2911 if (!ret)
2912 gomp_fatal ("device finalization failed");
2913 }
2914 }
2915
2916 /* This function initializes the runtime for offloading.
2917 It parses the list of offload plugins, and tries to load these.
2918 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2919 will be set, and the array DEVICES initialized, containing descriptors for
2920 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2921 by the others. */
2922
2923 static void
2924 gomp_target_init (void)
2925 {
2926 const char *prefix ="libgomp-plugin-";
2927 const char *suffix = SONAME_SUFFIX (1);
2928 const char *cur, *next;
2929 char *plugin_name;
2930 int i, new_num_devices;
2931
2932 num_devices = 0;
2933 devices = NULL;
2934
2935 cur = OFFLOAD_PLUGINS;
2936 if (*cur)
2937 do
2938 {
2939 struct gomp_device_descr current_device;
2940 size_t prefix_len, suffix_len, cur_len;
2941
2942 next = strchr (cur, ',');
2943
2944 prefix_len = strlen (prefix);
2945 cur_len = next ? next - cur : strlen (cur);
2946 suffix_len = strlen (suffix);
2947
2948 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2949 if (!plugin_name)
2950 {
2951 num_devices = 0;
2952 break;
2953 }
2954
2955 memcpy (plugin_name, prefix, prefix_len);
2956 memcpy (plugin_name + prefix_len, cur, cur_len);
2957 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2958
2959 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2960 {
2961 new_num_devices = current_device.get_num_devices_func ();
2962 if (new_num_devices >= 1)
2963 {
2964 /* Augment DEVICES and NUM_DEVICES. */
2965
2966 devices = realloc (devices, (num_devices + new_num_devices)
2967 * sizeof (struct gomp_device_descr));
2968 if (!devices)
2969 {
2970 num_devices = 0;
2971 free (plugin_name);
2972 break;
2973 }
2974
2975 current_device.name = current_device.get_name_func ();
2976 /* current_device.capabilities has already been set. */
2977 current_device.type = current_device.get_type_func ();
2978 current_device.mem_map.root = NULL;
2979 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2980 for (i = 0; i < new_num_devices; i++)
2981 {
2982 current_device.target_id = i;
2983 devices[num_devices] = current_device;
2984 gomp_mutex_init (&devices[num_devices].lock);
2985 num_devices++;
2986 }
2987 }
2988 }
2989
2990 free (plugin_name);
2991 cur = next + 1;
2992 }
2993 while (next);
2994
2995 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2996 NUM_DEVICES_OPENMP. */
2997 struct gomp_device_descr *devices_s
2998 = malloc (num_devices * sizeof (struct gomp_device_descr));
2999 if (!devices_s)
3000 {
3001 num_devices = 0;
3002 free (devices);
3003 devices = NULL;
3004 }
3005 num_devices_openmp = 0;
3006 for (i = 0; i < num_devices; i++)
3007 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3008 devices_s[num_devices_openmp++] = devices[i];
3009 int num_devices_after_openmp = num_devices_openmp;
3010 for (i = 0; i < num_devices; i++)
3011 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3012 devices_s[num_devices_after_openmp++] = devices[i];
3013 free (devices);
3014 devices = devices_s;
3015
3016 for (i = 0; i < num_devices; i++)
3017 {
3018 /* The 'devices' array can be moved (by the realloc call) until we have
3019 found all the plugins, so registering with the OpenACC runtime (which
3020 takes a copy of the pointer argument) must be delayed until now. */
3021 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3022 goacc_register (&devices[i]);
3023 }
3024
3025 if (atexit (gomp_target_fini) != 0)
3026 gomp_fatal ("atexit failed");
3027 }
3028
3029 #else /* PLUGIN_SUPPORT */
3030 /* If dlfcn.h is unavailable we always fallback to host execution.
3031 GOMP_target* routines are just stubs for this case. */
3032 static void
3033 gomp_target_init (void)
3034 {
3035 }
3036 #endif /* PLUGIN_SUPPORT */