Brig front-end
[gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h> /* For PRIu64. */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 static void gomp_target_init (void);
49
50 /* The whole initialization code for offloading plugins is only run one. */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52
53 /* Mutex for offload image registration. */
54 static gomp_mutex_t register_lock;
55
56 /* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59 struct offload_image_descr {
60 unsigned version;
61 enum offload_target_type type;
62 const void *host_table;
63 const void *target_data;
64 };
65
66 /* Array of descriptors of offload images. */
67 static struct offload_image_descr *offload_images;
68
69 /* Total number of offload images. */
70 static int num_offload_images;
71
72 /* Array of descriptors for all available devices. */
73 static struct gomp_device_descr *devices;
74
75 /* Total number of available devices. */
76 static int num_devices;
77
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79 static int num_devices_openmp;
80
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
85 {
86 void *ret = realloc (old, size);
87 if (ret == NULL)
88 {
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 }
92 return ret;
93 }
94
95 attribute_hidden void
96 gomp_init_targets_once (void)
97 {
98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 }
100
101 attribute_hidden int
102 gomp_get_num_devices (void)
103 {
104 gomp_init_targets_once ();
105 return num_devices_openmp;
106 }
107
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
110 {
111 if (device_id == GOMP_DEVICE_ICV)
112 {
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
115 }
116
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
119
120 gomp_mutex_lock (&devices[device_id].lock);
121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122 gomp_init_device (&devices[device_id]);
123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 {
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
127 }
128 gomp_mutex_unlock (&devices[device_id].lock);
129
130 return &devices[device_id];
131 }
132
133
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 {
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
139
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
151 }
152
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 {
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
158
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
163 }
164
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
171 {
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 {
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177 }
178 }
179
180 static void
181 gomp_copy_host2dev (struct gomp_device_descr *devicep,
182 void *d, const void *h, size_t sz)
183 {
184 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
185 }
186
187 static void
188 gomp_copy_dev2host (struct gomp_device_descr *devicep,
189 void *h, const void *d, size_t sz)
190 {
191 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
192 }
193
194 static void
195 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
196 {
197 if (!devicep->free_func (devicep->target_id, devptr))
198 {
199 gomp_mutex_unlock (&devicep->lock);
200 gomp_fatal ("error in freeing device memory block at %p", devptr);
201 }
202 }
203
204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
205 gomp_map_0len_lookup found oldn for newn.
206 Helper function of gomp_map_vars. */
207
208 static inline void
209 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
210 splay_tree_key newn, struct target_var_desc *tgt_var,
211 unsigned char kind)
212 {
213 tgt_var->key = oldn;
214 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
215 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
216 tgt_var->offset = newn->host_start - oldn->host_start;
217 tgt_var->length = newn->host_end - newn->host_start;
218
219 if ((kind & GOMP_MAP_FLAG_FORCE)
220 || oldn->host_start > newn->host_start
221 || oldn->host_end < newn->host_end)
222 {
223 gomp_mutex_unlock (&devicep->lock);
224 gomp_fatal ("Trying to map into device [%p..%p) object when "
225 "[%p..%p) is already mapped",
226 (void *) newn->host_start, (void *) newn->host_end,
227 (void *) oldn->host_start, (void *) oldn->host_end);
228 }
229
230 if (GOMP_MAP_ALWAYS_TO_P (kind))
231 gomp_copy_host2dev (devicep,
232 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
233 + newn->host_start - oldn->host_start),
234 (void *) newn->host_start,
235 newn->host_end - newn->host_start);
236
237 if (oldn->refcount != REFCOUNT_INFINITY)
238 oldn->refcount++;
239 }
240
241 static int
242 get_kind (bool short_mapkind, void *kinds, int idx)
243 {
244 return short_mapkind ? ((unsigned short *) kinds)[idx]
245 : ((unsigned char *) kinds)[idx];
246 }
247
248 static void
249 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
250 uintptr_t target_offset, uintptr_t bias)
251 {
252 struct gomp_device_descr *devicep = tgt->device_descr;
253 struct splay_tree_s *mem_map = &devicep->mem_map;
254 struct splay_tree_key_s cur_node;
255
256 cur_node.host_start = host_ptr;
257 if (cur_node.host_start == (uintptr_t) NULL)
258 {
259 cur_node.tgt_offset = (uintptr_t) NULL;
260 /* FIXME: see comment about coalescing host/dev transfers below. */
261 gomp_copy_host2dev (devicep,
262 (void *) (tgt->tgt_start + target_offset),
263 (void *) &cur_node.tgt_offset,
264 sizeof (void *));
265 return;
266 }
267 /* Add bias to the pointer value. */
268 cur_node.host_start += bias;
269 cur_node.host_end = cur_node.host_start;
270 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
271 if (n == NULL)
272 {
273 gomp_mutex_unlock (&devicep->lock);
274 gomp_fatal ("Pointer target of array section wasn't mapped");
275 }
276 cur_node.host_start -= n->host_start;
277 cur_node.tgt_offset
278 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
279 /* At this point tgt_offset is target address of the
280 array section. Now subtract bias to get what we want
281 to initialize the pointer with. */
282 cur_node.tgt_offset -= bias;
283 /* FIXME: see comment about coalescing host/dev transfers below. */
284 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
285 (void *) &cur_node.tgt_offset, sizeof (void *));
286 }
287
288 static void
289 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
290 size_t first, size_t i, void **hostaddrs,
291 size_t *sizes, void *kinds)
292 {
293 struct gomp_device_descr *devicep = tgt->device_descr;
294 struct splay_tree_s *mem_map = &devicep->mem_map;
295 struct splay_tree_key_s cur_node;
296 int kind;
297 const bool short_mapkind = true;
298 const int typemask = short_mapkind ? 0xff : 0x7;
299
300 cur_node.host_start = (uintptr_t) hostaddrs[i];
301 cur_node.host_end = cur_node.host_start + sizes[i];
302 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
303 kind = get_kind (short_mapkind, kinds, i);
304 if (n2
305 && n2->tgt == n->tgt
306 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
307 {
308 gomp_map_vars_existing (devicep, n2, &cur_node,
309 &tgt->list[i], kind & typemask);
310 return;
311 }
312 if (sizes[i] == 0)
313 {
314 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
315 {
316 cur_node.host_start--;
317 n2 = splay_tree_lookup (mem_map, &cur_node);
318 cur_node.host_start++;
319 if (n2
320 && n2->tgt == n->tgt
321 && n2->host_start - n->host_start
322 == n2->tgt_offset - n->tgt_offset)
323 {
324 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
325 kind & typemask);
326 return;
327 }
328 }
329 cur_node.host_end++;
330 n2 = splay_tree_lookup (mem_map, &cur_node);
331 cur_node.host_end--;
332 if (n2
333 && n2->tgt == n->tgt
334 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
335 {
336 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
337 kind & typemask);
338 return;
339 }
340 }
341 gomp_mutex_unlock (&devicep->lock);
342 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
343 "other mapped elements from the same structure weren't mapped "
344 "together with it", (void *) cur_node.host_start,
345 (void *) cur_node.host_end);
346 }
347
348 static inline uintptr_t
349 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
350 {
351 if (tgt->list[i].key != NULL)
352 return tgt->list[i].key->tgt->tgt_start
353 + tgt->list[i].key->tgt_offset
354 + tgt->list[i].offset;
355 if (tgt->list[i].offset == ~(uintptr_t) 0)
356 return (uintptr_t) hostaddrs[i];
357 if (tgt->list[i].offset == ~(uintptr_t) 1)
358 return 0;
359 if (tgt->list[i].offset == ~(uintptr_t) 2)
360 return tgt->list[i + 1].key->tgt->tgt_start
361 + tgt->list[i + 1].key->tgt_offset
362 + tgt->list[i + 1].offset
363 + (uintptr_t) hostaddrs[i]
364 - (uintptr_t) hostaddrs[i + 1];
365 return tgt->tgt_start + tgt->list[i].offset;
366 }
367
368 attribute_hidden struct target_mem_desc *
369 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
370 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
371 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
372 {
373 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
374 bool has_firstprivate = false;
375 const int rshift = short_mapkind ? 8 : 3;
376 const int typemask = short_mapkind ? 0xff : 0x7;
377 struct splay_tree_s *mem_map = &devicep->mem_map;
378 struct splay_tree_key_s cur_node;
379 struct target_mem_desc *tgt
380 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
381 tgt->list_count = mapnum;
382 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
383 tgt->device_descr = devicep;
384
385 if (mapnum == 0)
386 {
387 tgt->tgt_start = 0;
388 tgt->tgt_end = 0;
389 return tgt;
390 }
391
392 tgt_align = sizeof (void *);
393 tgt_size = 0;
394 if (pragma_kind == GOMP_MAP_VARS_TARGET)
395 {
396 size_t align = 4 * sizeof (void *);
397 tgt_align = align;
398 tgt_size = mapnum * sizeof (void *);
399 }
400
401 gomp_mutex_lock (&devicep->lock);
402 if (devicep->state == GOMP_DEVICE_FINALIZED)
403 {
404 gomp_mutex_unlock (&devicep->lock);
405 free (tgt);
406 return NULL;
407 }
408
409 for (i = 0; i < mapnum; i++)
410 {
411 int kind = get_kind (short_mapkind, kinds, i);
412 if (hostaddrs[i] == NULL
413 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
414 {
415 tgt->list[i].key = NULL;
416 tgt->list[i].offset = ~(uintptr_t) 0;
417 continue;
418 }
419 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
420 {
421 cur_node.host_start = (uintptr_t) hostaddrs[i];
422 cur_node.host_end = cur_node.host_start;
423 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
424 if (n == NULL)
425 {
426 gomp_mutex_unlock (&devicep->lock);
427 gomp_fatal ("use_device_ptr pointer wasn't mapped");
428 }
429 cur_node.host_start -= n->host_start;
430 hostaddrs[i]
431 = (void *) (n->tgt->tgt_start + n->tgt_offset
432 + cur_node.host_start);
433 tgt->list[i].key = NULL;
434 tgt->list[i].offset = ~(uintptr_t) 0;
435 continue;
436 }
437 else if ((kind & typemask) == GOMP_MAP_STRUCT)
438 {
439 size_t first = i + 1;
440 size_t last = i + sizes[i];
441 cur_node.host_start = (uintptr_t) hostaddrs[i];
442 cur_node.host_end = (uintptr_t) hostaddrs[last]
443 + sizes[last];
444 tgt->list[i].key = NULL;
445 tgt->list[i].offset = ~(uintptr_t) 2;
446 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
447 if (n == NULL)
448 {
449 size_t align = (size_t) 1 << (kind >> rshift);
450 if (tgt_align < align)
451 tgt_align = align;
452 tgt_size -= (uintptr_t) hostaddrs[first]
453 - (uintptr_t) hostaddrs[i];
454 tgt_size = (tgt_size + align - 1) & ~(align - 1);
455 tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
456 not_found_cnt += last - i;
457 for (i = first; i <= last; i++)
458 tgt->list[i].key = NULL;
459 i--;
460 continue;
461 }
462 for (i = first; i <= last; i++)
463 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
464 sizes, kinds);
465 i--;
466 continue;
467 }
468 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
469 {
470 tgt->list[i].key = NULL;
471 tgt->list[i].offset = ~(uintptr_t) 1;
472 has_firstprivate = true;
473 continue;
474 }
475 cur_node.host_start = (uintptr_t) hostaddrs[i];
476 if (!GOMP_MAP_POINTER_P (kind & typemask))
477 cur_node.host_end = cur_node.host_start + sizes[i];
478 else
479 cur_node.host_end = cur_node.host_start + sizeof (void *);
480 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
481 {
482 tgt->list[i].key = NULL;
483
484 size_t align = (size_t) 1 << (kind >> rshift);
485 if (tgt_align < align)
486 tgt_align = align;
487 tgt_size = (tgt_size + align - 1) & ~(align - 1);
488 tgt_size += cur_node.host_end - cur_node.host_start;
489 has_firstprivate = true;
490 continue;
491 }
492 splay_tree_key n;
493 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
494 {
495 n = gomp_map_0len_lookup (mem_map, &cur_node);
496 if (!n)
497 {
498 tgt->list[i].key = NULL;
499 tgt->list[i].offset = ~(uintptr_t) 1;
500 continue;
501 }
502 }
503 else
504 n = splay_tree_lookup (mem_map, &cur_node);
505 if (n && n->refcount != REFCOUNT_LINK)
506 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
507 kind & typemask);
508 else
509 {
510 tgt->list[i].key = NULL;
511
512 size_t align = (size_t) 1 << (kind >> rshift);
513 not_found_cnt++;
514 if (tgt_align < align)
515 tgt_align = align;
516 tgt_size = (tgt_size + align - 1) & ~(align - 1);
517 tgt_size += cur_node.host_end - cur_node.host_start;
518 if ((kind & typemask) == GOMP_MAP_TO_PSET)
519 {
520 size_t j;
521 for (j = i + 1; j < mapnum; j++)
522 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
523 & typemask))
524 break;
525 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
526 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
527 > cur_node.host_end))
528 break;
529 else
530 {
531 tgt->list[j].key = NULL;
532 i++;
533 }
534 }
535 }
536 }
537
538 if (devaddrs)
539 {
540 if (mapnum != 1)
541 {
542 gomp_mutex_unlock (&devicep->lock);
543 gomp_fatal ("unexpected aggregation");
544 }
545 tgt->to_free = devaddrs[0];
546 tgt->tgt_start = (uintptr_t) tgt->to_free;
547 tgt->tgt_end = tgt->tgt_start + sizes[0];
548 }
549 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
550 {
551 /* Allocate tgt_align aligned tgt_size block of memory. */
552 /* FIXME: Perhaps change interface to allocate properly aligned
553 memory. */
554 tgt->to_free = devicep->alloc_func (devicep->target_id,
555 tgt_size + tgt_align - 1);
556 if (!tgt->to_free)
557 {
558 gomp_mutex_unlock (&devicep->lock);
559 gomp_fatal ("device memory allocation fail");
560 }
561
562 tgt->tgt_start = (uintptr_t) tgt->to_free;
563 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
564 tgt->tgt_end = tgt->tgt_start + tgt_size;
565 }
566 else
567 {
568 tgt->to_free = NULL;
569 tgt->tgt_start = 0;
570 tgt->tgt_end = 0;
571 }
572
573 tgt_size = 0;
574 if (pragma_kind == GOMP_MAP_VARS_TARGET)
575 tgt_size = mapnum * sizeof (void *);
576
577 tgt->array = NULL;
578 if (not_found_cnt || has_firstprivate)
579 {
580 if (not_found_cnt)
581 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
582 splay_tree_node array = tgt->array;
583 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
584 uintptr_t field_tgt_base = 0;
585
586 for (i = 0; i < mapnum; i++)
587 if (tgt->list[i].key == NULL)
588 {
589 int kind = get_kind (short_mapkind, kinds, i);
590 if (hostaddrs[i] == NULL)
591 continue;
592 switch (kind & typemask)
593 {
594 size_t align, len, first, last;
595 splay_tree_key n;
596 case GOMP_MAP_FIRSTPRIVATE:
597 align = (size_t) 1 << (kind >> rshift);
598 tgt_size = (tgt_size + align - 1) & ~(align - 1);
599 tgt->list[i].offset = tgt_size;
600 len = sizes[i];
601 gomp_copy_host2dev (devicep,
602 (void *) (tgt->tgt_start + tgt_size),
603 (void *) hostaddrs[i], len);
604 tgt_size += len;
605 continue;
606 case GOMP_MAP_FIRSTPRIVATE_INT:
607 case GOMP_MAP_USE_DEVICE_PTR:
608 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
609 continue;
610 case GOMP_MAP_STRUCT:
611 first = i + 1;
612 last = i + sizes[i];
613 cur_node.host_start = (uintptr_t) hostaddrs[i];
614 cur_node.host_end = (uintptr_t) hostaddrs[last]
615 + sizes[last];
616 if (tgt->list[first].key != NULL)
617 continue;
618 n = splay_tree_lookup (mem_map, &cur_node);
619 if (n == NULL)
620 {
621 size_t align = (size_t) 1 << (kind >> rshift);
622 tgt_size -= (uintptr_t) hostaddrs[first]
623 - (uintptr_t) hostaddrs[i];
624 tgt_size = (tgt_size + align - 1) & ~(align - 1);
625 tgt_size += (uintptr_t) hostaddrs[first]
626 - (uintptr_t) hostaddrs[i];
627 field_tgt_base = (uintptr_t) hostaddrs[first];
628 field_tgt_offset = tgt_size;
629 field_tgt_clear = last;
630 tgt_size += cur_node.host_end
631 - (uintptr_t) hostaddrs[first];
632 continue;
633 }
634 for (i = first; i <= last; i++)
635 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
636 sizes, kinds);
637 i--;
638 continue;
639 case GOMP_MAP_ALWAYS_POINTER:
640 cur_node.host_start = (uintptr_t) hostaddrs[i];
641 cur_node.host_end = cur_node.host_start + sizeof (void *);
642 n = splay_tree_lookup (mem_map, &cur_node);
643 if (n == NULL
644 || n->host_start > cur_node.host_start
645 || n->host_end < cur_node.host_end)
646 {
647 gomp_mutex_unlock (&devicep->lock);
648 gomp_fatal ("always pointer not mapped");
649 }
650 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
651 != GOMP_MAP_ALWAYS_POINTER)
652 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
653 if (cur_node.tgt_offset)
654 cur_node.tgt_offset -= sizes[i];
655 gomp_copy_host2dev (devicep,
656 (void *) (n->tgt->tgt_start
657 + n->tgt_offset
658 + cur_node.host_start
659 - n->host_start),
660 (void *) &cur_node.tgt_offset,
661 sizeof (void *));
662 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
663 + cur_node.host_start - n->host_start;
664 continue;
665 default:
666 break;
667 }
668 splay_tree_key k = &array->key;
669 k->host_start = (uintptr_t) hostaddrs[i];
670 if (!GOMP_MAP_POINTER_P (kind & typemask))
671 k->host_end = k->host_start + sizes[i];
672 else
673 k->host_end = k->host_start + sizeof (void *);
674 splay_tree_key n = splay_tree_lookup (mem_map, k);
675 if (n && n->refcount != REFCOUNT_LINK)
676 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
677 kind & typemask);
678 else
679 {
680 k->link_key = NULL;
681 if (n && n->refcount == REFCOUNT_LINK)
682 {
683 /* Replace target address of the pointer with target address
684 of mapped object in the splay tree. */
685 splay_tree_remove (mem_map, n);
686 k->link_key = n;
687 }
688 size_t align = (size_t) 1 << (kind >> rshift);
689 tgt->list[i].key = k;
690 k->tgt = tgt;
691 if (field_tgt_clear != ~(size_t) 0)
692 {
693 k->tgt_offset = k->host_start - field_tgt_base
694 + field_tgt_offset;
695 if (i == field_tgt_clear)
696 field_tgt_clear = ~(size_t) 0;
697 }
698 else
699 {
700 tgt_size = (tgt_size + align - 1) & ~(align - 1);
701 k->tgt_offset = tgt_size;
702 tgt_size += k->host_end - k->host_start;
703 }
704 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
705 tgt->list[i].always_copy_from
706 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
707 tgt->list[i].offset = 0;
708 tgt->list[i].length = k->host_end - k->host_start;
709 k->refcount = 1;
710 tgt->refcount++;
711 array->left = NULL;
712 array->right = NULL;
713 splay_tree_insert (mem_map, array);
714 switch (kind & typemask)
715 {
716 case GOMP_MAP_ALLOC:
717 case GOMP_MAP_FROM:
718 case GOMP_MAP_FORCE_ALLOC:
719 case GOMP_MAP_FORCE_FROM:
720 case GOMP_MAP_ALWAYS_FROM:
721 break;
722 case GOMP_MAP_TO:
723 case GOMP_MAP_TOFROM:
724 case GOMP_MAP_FORCE_TO:
725 case GOMP_MAP_FORCE_TOFROM:
726 case GOMP_MAP_ALWAYS_TO:
727 case GOMP_MAP_ALWAYS_TOFROM:
728 /* FIXME: Perhaps add some smarts, like if copying
729 several adjacent fields from host to target, use some
730 host buffer to avoid sending each var individually. */
731 gomp_copy_host2dev (devicep,
732 (void *) (tgt->tgt_start
733 + k->tgt_offset),
734 (void *) k->host_start,
735 k->host_end - k->host_start);
736 break;
737 case GOMP_MAP_POINTER:
738 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
739 k->tgt_offset, sizes[i]);
740 break;
741 case GOMP_MAP_TO_PSET:
742 /* FIXME: see above FIXME comment. */
743 gomp_copy_host2dev (devicep,
744 (void *) (tgt->tgt_start
745 + k->tgt_offset),
746 (void *) k->host_start,
747 k->host_end - k->host_start);
748
749 for (j = i + 1; j < mapnum; j++)
750 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
751 j)
752 & typemask))
753 break;
754 else if ((uintptr_t) hostaddrs[j] < k->host_start
755 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
756 > k->host_end))
757 break;
758 else
759 {
760 tgt->list[j].key = k;
761 tgt->list[j].copy_from = false;
762 tgt->list[j].always_copy_from = false;
763 if (k->refcount != REFCOUNT_INFINITY)
764 k->refcount++;
765 gomp_map_pointer (tgt,
766 (uintptr_t) *(void **) hostaddrs[j],
767 k->tgt_offset
768 + ((uintptr_t) hostaddrs[j]
769 - k->host_start),
770 sizes[j]);
771 i++;
772 }
773 break;
774 case GOMP_MAP_FORCE_PRESENT:
775 {
776 /* We already looked up the memory region above and it
777 was missing. */
778 size_t size = k->host_end - k->host_start;
779 gomp_mutex_unlock (&devicep->lock);
780 #ifdef HAVE_INTTYPES_H
781 gomp_fatal ("present clause: !acc_is_present (%p, "
782 "%"PRIu64" (0x%"PRIx64"))",
783 (void *) k->host_start,
784 (uint64_t) size, (uint64_t) size);
785 #else
786 gomp_fatal ("present clause: !acc_is_present (%p, "
787 "%lu (0x%lx))", (void *) k->host_start,
788 (unsigned long) size, (unsigned long) size);
789 #endif
790 }
791 break;
792 case GOMP_MAP_FORCE_DEVICEPTR:
793 assert (k->host_end - k->host_start == sizeof (void *));
794 gomp_copy_host2dev (devicep,
795 (void *) (tgt->tgt_start
796 + k->tgt_offset),
797 (void *) k->host_start,
798 sizeof (void *));
799 break;
800 default:
801 gomp_mutex_unlock (&devicep->lock);
802 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
803 kind);
804 }
805
806 if (k->link_key)
807 {
808 /* Set link pointer on target to the device address of the
809 mapped object. */
810 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
811 devicep->host2dev_func (devicep->target_id,
812 (void *) n->tgt_offset,
813 &tgt_addr, sizeof (void *));
814 }
815 array++;
816 }
817 }
818 }
819
820 if (pragma_kind == GOMP_MAP_VARS_TARGET)
821 {
822 for (i = 0; i < mapnum; i++)
823 {
824 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
825 /* FIXME: see above FIXME comment. */
826 gomp_copy_host2dev (devicep,
827 (void *) (tgt->tgt_start + i * sizeof (void *)),
828 (void *) &cur_node.tgt_offset, sizeof (void *));
829 }
830 }
831
832 /* If the variable from "omp target enter data" map-list was already mapped,
833 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
834 gomp_exit_data. */
835 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
836 {
837 free (tgt);
838 tgt = NULL;
839 }
840
841 gomp_mutex_unlock (&devicep->lock);
842 return tgt;
843 }
844
845 static void
846 gomp_unmap_tgt (struct target_mem_desc *tgt)
847 {
848 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
849 if (tgt->tgt_end)
850 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
851
852 free (tgt->array);
853 free (tgt);
854 }
855
856 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
857 variables back from device to host: if it is false, it is assumed that this
858 has been done already. */
859
860 attribute_hidden void
861 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
862 {
863 struct gomp_device_descr *devicep = tgt->device_descr;
864
865 if (tgt->list_count == 0)
866 {
867 free (tgt);
868 return;
869 }
870
871 gomp_mutex_lock (&devicep->lock);
872 if (devicep->state == GOMP_DEVICE_FINALIZED)
873 {
874 gomp_mutex_unlock (&devicep->lock);
875 free (tgt->array);
876 free (tgt);
877 return;
878 }
879
880 size_t i;
881 for (i = 0; i < tgt->list_count; i++)
882 {
883 splay_tree_key k = tgt->list[i].key;
884 if (k == NULL)
885 continue;
886
887 bool do_unmap = false;
888 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
889 k->refcount--;
890 else if (k->refcount == 1)
891 {
892 k->refcount--;
893 do_unmap = true;
894 }
895
896 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
897 || tgt->list[i].always_copy_from)
898 gomp_copy_dev2host (devicep,
899 (void *) (k->host_start + tgt->list[i].offset),
900 (void *) (k->tgt->tgt_start + k->tgt_offset
901 + tgt->list[i].offset),
902 tgt->list[i].length);
903 if (do_unmap)
904 {
905 splay_tree_remove (&devicep->mem_map, k);
906 if (k->link_key)
907 splay_tree_insert (&devicep->mem_map,
908 (splay_tree_node) k->link_key);
909 if (k->tgt->refcount > 1)
910 k->tgt->refcount--;
911 else
912 gomp_unmap_tgt (k->tgt);
913 }
914 }
915
916 if (tgt->refcount > 1)
917 tgt->refcount--;
918 else
919 gomp_unmap_tgt (tgt);
920
921 gomp_mutex_unlock (&devicep->lock);
922 }
923
924 static void
925 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
926 size_t *sizes, void *kinds, bool short_mapkind)
927 {
928 size_t i;
929 struct splay_tree_key_s cur_node;
930 const int typemask = short_mapkind ? 0xff : 0x7;
931
932 if (!devicep)
933 return;
934
935 if (mapnum == 0)
936 return;
937
938 gomp_mutex_lock (&devicep->lock);
939 if (devicep->state == GOMP_DEVICE_FINALIZED)
940 {
941 gomp_mutex_unlock (&devicep->lock);
942 return;
943 }
944
945 for (i = 0; i < mapnum; i++)
946 if (sizes[i])
947 {
948 cur_node.host_start = (uintptr_t) hostaddrs[i];
949 cur_node.host_end = cur_node.host_start + sizes[i];
950 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
951 if (n)
952 {
953 int kind = get_kind (short_mapkind, kinds, i);
954 if (n->host_start > cur_node.host_start
955 || n->host_end < cur_node.host_end)
956 {
957 gomp_mutex_unlock (&devicep->lock);
958 gomp_fatal ("Trying to update [%p..%p) object when "
959 "only [%p..%p) is mapped",
960 (void *) cur_node.host_start,
961 (void *) cur_node.host_end,
962 (void *) n->host_start,
963 (void *) n->host_end);
964 }
965
966
967 void *hostaddr = (void *) cur_node.host_start;
968 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
969 + cur_node.host_start - n->host_start);
970 size_t size = cur_node.host_end - cur_node.host_start;
971
972 if (GOMP_MAP_COPY_TO_P (kind & typemask))
973 gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
974 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
975 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
976 }
977 }
978 gomp_mutex_unlock (&devicep->lock);
979 }
980
981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
982 And insert to splay tree the mapping between addresses from HOST_TABLE and
983 from loaded target image. We rely in the host and device compiler
984 emitting variable and functions in the same order. */
985
986 static void
987 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
988 const void *host_table, const void *target_data,
989 bool is_register_lock)
990 {
991 void **host_func_table = ((void ***) host_table)[0];
992 void **host_funcs_end = ((void ***) host_table)[1];
993 void **host_var_table = ((void ***) host_table)[2];
994 void **host_vars_end = ((void ***) host_table)[3];
995
996 /* The func table contains only addresses, the var table contains addresses
997 and corresponding sizes. */
998 int num_funcs = host_funcs_end - host_func_table;
999 int num_vars = (host_vars_end - host_var_table) / 2;
1000
1001 /* Load image to device and get target addresses for the image. */
1002 struct addr_pair *target_table = NULL;
1003 int i, num_target_entries;
1004
1005 num_target_entries
1006 = devicep->load_image_func (devicep->target_id, version,
1007 target_data, &target_table);
1008
1009 if (num_target_entries != num_funcs + num_vars)
1010 {
1011 gomp_mutex_unlock (&devicep->lock);
1012 if (is_register_lock)
1013 gomp_mutex_unlock (&register_lock);
1014 gomp_fatal ("Cannot map target functions or variables"
1015 " (expected %u, have %u)", num_funcs + num_vars,
1016 num_target_entries);
1017 }
1018
1019 /* Insert host-target address mapping into splay tree. */
1020 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1021 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1022 tgt->refcount = REFCOUNT_INFINITY;
1023 tgt->tgt_start = 0;
1024 tgt->tgt_end = 0;
1025 tgt->to_free = NULL;
1026 tgt->prev = NULL;
1027 tgt->list_count = 0;
1028 tgt->device_descr = devicep;
1029 splay_tree_node array = tgt->array;
1030
1031 for (i = 0; i < num_funcs; i++)
1032 {
1033 splay_tree_key k = &array->key;
1034 k->host_start = (uintptr_t) host_func_table[i];
1035 k->host_end = k->host_start + 1;
1036 k->tgt = tgt;
1037 k->tgt_offset = target_table[i].start;
1038 k->refcount = REFCOUNT_INFINITY;
1039 k->link_key = NULL;
1040 array->left = NULL;
1041 array->right = NULL;
1042 splay_tree_insert (&devicep->mem_map, array);
1043 array++;
1044 }
1045
1046 /* Most significant bit of the size in host and target tables marks
1047 "omp declare target link" variables. */
1048 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1049 const uintptr_t size_mask = ~link_bit;
1050
1051 for (i = 0; i < num_vars; i++)
1052 {
1053 struct addr_pair *target_var = &target_table[num_funcs + i];
1054 uintptr_t target_size = target_var->end - target_var->start;
1055
1056 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1057 {
1058 gomp_mutex_unlock (&devicep->lock);
1059 if (is_register_lock)
1060 gomp_mutex_unlock (&register_lock);
1061 gomp_fatal ("Cannot map target variables (size mismatch)");
1062 }
1063
1064 splay_tree_key k = &array->key;
1065 k->host_start = (uintptr_t) host_var_table[i * 2];
1066 k->host_end
1067 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1068 k->tgt = tgt;
1069 k->tgt_offset = target_var->start;
1070 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1071 k->link_key = NULL;
1072 array->left = NULL;
1073 array->right = NULL;
1074 splay_tree_insert (&devicep->mem_map, array);
1075 array++;
1076 }
1077
1078 free (target_table);
1079 }
1080
1081 /* Unload the mappings described by target_data from device DEVICE_P.
1082 The device must be locked. */
1083
1084 static void
1085 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1086 unsigned version,
1087 const void *host_table, const void *target_data)
1088 {
1089 void **host_func_table = ((void ***) host_table)[0];
1090 void **host_funcs_end = ((void ***) host_table)[1];
1091 void **host_var_table = ((void ***) host_table)[2];
1092 void **host_vars_end = ((void ***) host_table)[3];
1093
1094 /* The func table contains only addresses, the var table contains addresses
1095 and corresponding sizes. */
1096 int num_funcs = host_funcs_end - host_func_table;
1097 int num_vars = (host_vars_end - host_var_table) / 2;
1098
1099 struct splay_tree_key_s k;
1100 splay_tree_key node = NULL;
1101
1102 /* Find mapping at start of node array */
1103 if (num_funcs || num_vars)
1104 {
1105 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1106 : (uintptr_t) host_var_table[0]);
1107 k.host_end = k.host_start + 1;
1108 node = splay_tree_lookup (&devicep->mem_map, &k);
1109 }
1110
1111 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1112 {
1113 gomp_mutex_unlock (&devicep->lock);
1114 gomp_fatal ("image unload fail");
1115 }
1116
1117 /* Remove mappings from splay tree. */
1118 int i;
1119 for (i = 0; i < num_funcs; i++)
1120 {
1121 k.host_start = (uintptr_t) host_func_table[i];
1122 k.host_end = k.host_start + 1;
1123 splay_tree_remove (&devicep->mem_map, &k);
1124 }
1125
1126 /* Most significant bit of the size in host and target tables marks
1127 "omp declare target link" variables. */
1128 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1129 const uintptr_t size_mask = ~link_bit;
1130 bool is_tgt_unmapped = false;
1131
1132 for (i = 0; i < num_vars; i++)
1133 {
1134 k.host_start = (uintptr_t) host_var_table[i * 2];
1135 k.host_end
1136 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1137
1138 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1139 splay_tree_remove (&devicep->mem_map, &k);
1140 else
1141 {
1142 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1143 splay_tree_remove (&devicep->mem_map, n);
1144 if (n->link_key)
1145 {
1146 if (n->tgt->refcount > 1)
1147 n->tgt->refcount--;
1148 else
1149 {
1150 is_tgt_unmapped = true;
1151 gomp_unmap_tgt (n->tgt);
1152 }
1153 }
1154 }
1155 }
1156
1157 if (node && !is_tgt_unmapped)
1158 {
1159 free (node->tgt);
1160 free (node);
1161 }
1162 }
1163
1164 /* This function should be called from every offload image while loading.
1165 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1166 the target, and TARGET_DATA needed by target plugin. */
1167
1168 void
1169 GOMP_offload_register_ver (unsigned version, const void *host_table,
1170 int target_type, const void *target_data)
1171 {
1172 int i;
1173
1174 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1175 gomp_fatal ("Library too old for offload (version %u < %u)",
1176 GOMP_VERSION, GOMP_VERSION_LIB (version));
1177
1178 gomp_mutex_lock (&register_lock);
1179
1180 /* Load image to all initialized devices. */
1181 for (i = 0; i < num_devices; i++)
1182 {
1183 struct gomp_device_descr *devicep = &devices[i];
1184 gomp_mutex_lock (&devicep->lock);
1185 if (devicep->type == target_type
1186 && devicep->state == GOMP_DEVICE_INITIALIZED)
1187 gomp_load_image_to_device (devicep, version,
1188 host_table, target_data, true);
1189 gomp_mutex_unlock (&devicep->lock);
1190 }
1191
1192 /* Insert image to array of pending images. */
1193 offload_images
1194 = gomp_realloc_unlock (offload_images,
1195 (num_offload_images + 1)
1196 * sizeof (struct offload_image_descr));
1197 offload_images[num_offload_images].version = version;
1198 offload_images[num_offload_images].type = target_type;
1199 offload_images[num_offload_images].host_table = host_table;
1200 offload_images[num_offload_images].target_data = target_data;
1201
1202 num_offload_images++;
1203 gomp_mutex_unlock (&register_lock);
1204 }
1205
1206 void
1207 GOMP_offload_register (const void *host_table, int target_type,
1208 const void *target_data)
1209 {
1210 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1211 }
1212
1213 /* This function should be called from every offload image while unloading.
1214 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1215 the target, and TARGET_DATA needed by target plugin. */
1216
1217 void
1218 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1219 int target_type, const void *target_data)
1220 {
1221 int i;
1222
1223 gomp_mutex_lock (&register_lock);
1224
1225 /* Unload image from all initialized devices. */
1226 for (i = 0; i < num_devices; i++)
1227 {
1228 struct gomp_device_descr *devicep = &devices[i];
1229 gomp_mutex_lock (&devicep->lock);
1230 if (devicep->type == target_type
1231 && devicep->state == GOMP_DEVICE_INITIALIZED)
1232 gomp_unload_image_from_device (devicep, version,
1233 host_table, target_data);
1234 gomp_mutex_unlock (&devicep->lock);
1235 }
1236
1237 /* Remove image from array of pending images. */
1238 for (i = 0; i < num_offload_images; i++)
1239 if (offload_images[i].target_data == target_data)
1240 {
1241 offload_images[i] = offload_images[--num_offload_images];
1242 break;
1243 }
1244
1245 gomp_mutex_unlock (&register_lock);
1246 }
1247
1248 void
1249 GOMP_offload_unregister (const void *host_table, int target_type,
1250 const void *target_data)
1251 {
1252 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1253 }
1254
1255 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1256 must be locked on entry, and remains locked on return. */
1257
1258 attribute_hidden void
1259 gomp_init_device (struct gomp_device_descr *devicep)
1260 {
1261 int i;
1262 if (!devicep->init_device_func (devicep->target_id))
1263 {
1264 gomp_mutex_unlock (&devicep->lock);
1265 gomp_fatal ("device initialization failed");
1266 }
1267
1268 /* Load to device all images registered by the moment. */
1269 for (i = 0; i < num_offload_images; i++)
1270 {
1271 struct offload_image_descr *image = &offload_images[i];
1272 if (image->type == devicep->type)
1273 gomp_load_image_to_device (devicep, image->version,
1274 image->host_table, image->target_data,
1275 false);
1276 }
1277
1278 devicep->state = GOMP_DEVICE_INITIALIZED;
1279 }
1280
1281 attribute_hidden void
1282 gomp_unload_device (struct gomp_device_descr *devicep)
1283 {
1284 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1285 {
1286 unsigned i;
1287
1288 /* Unload from device all images registered at the moment. */
1289 for (i = 0; i < num_offload_images; i++)
1290 {
1291 struct offload_image_descr *image = &offload_images[i];
1292 if (image->type == devicep->type)
1293 gomp_unload_image_from_device (devicep, image->version,
1294 image->host_table,
1295 image->target_data);
1296 }
1297 }
1298 }
1299
1300 /* Free address mapping tables. MM must be locked on entry, and remains locked
1301 on return. */
1302
1303 attribute_hidden void
1304 gomp_free_memmap (struct splay_tree_s *mem_map)
1305 {
1306 while (mem_map->root)
1307 {
1308 struct target_mem_desc *tgt = mem_map->root->key.tgt;
1309
1310 splay_tree_remove (mem_map, &mem_map->root->key);
1311 free (tgt->array);
1312 free (tgt);
1313 }
1314 }
1315
1316 /* Host fallback for GOMP_target{,_ext} routines. */
1317
1318 static void
1319 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1320 {
1321 struct gomp_thread old_thr, *thr = gomp_thread ();
1322 old_thr = *thr;
1323 memset (thr, '\0', sizeof (*thr));
1324 if (gomp_places_list)
1325 {
1326 thr->place = old_thr.place;
1327 thr->ts.place_partition_len = gomp_places_list_len;
1328 }
1329 fn (hostaddrs);
1330 gomp_free_thread (thr);
1331 *thr = old_thr;
1332 }
1333
1334 /* Calculate alignment and size requirements of a private copy of data shared
1335 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1336
1337 static inline void
1338 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1339 unsigned short *kinds, size_t *tgt_align,
1340 size_t *tgt_size)
1341 {
1342 size_t i;
1343 for (i = 0; i < mapnum; i++)
1344 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1345 {
1346 size_t align = (size_t) 1 << (kinds[i] >> 8);
1347 if (*tgt_align < align)
1348 *tgt_align = align;
1349 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1350 *tgt_size += sizes[i];
1351 }
1352 }
1353
1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1355
1356 static inline void
1357 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1358 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1359 size_t tgt_size)
1360 {
1361 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1362 if (al)
1363 tgt += tgt_align - al;
1364 tgt_size = 0;
1365 size_t i;
1366 for (i = 0; i < mapnum; i++)
1367 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1368 {
1369 size_t align = (size_t) 1 << (kinds[i] >> 8);
1370 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1371 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1372 hostaddrs[i] = tgt + tgt_size;
1373 tgt_size = tgt_size + sizes[i];
1374 }
1375 }
1376
1377 /* Helper function of GOMP_target{,_ext} routines. */
1378
1379 static void *
1380 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1381 void (*host_fn) (void *))
1382 {
1383 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1384 return (void *) host_fn;
1385 else
1386 {
1387 gomp_mutex_lock (&devicep->lock);
1388 if (devicep->state == GOMP_DEVICE_FINALIZED)
1389 {
1390 gomp_mutex_unlock (&devicep->lock);
1391 return NULL;
1392 }
1393
1394 struct splay_tree_key_s k;
1395 k.host_start = (uintptr_t) host_fn;
1396 k.host_end = k.host_start + 1;
1397 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1398 gomp_mutex_unlock (&devicep->lock);
1399 if (tgt_fn == NULL)
1400 return NULL;
1401
1402 return (void *) tgt_fn->tgt_offset;
1403 }
1404 }
1405
1406 /* Called when encountering a target directive. If DEVICE
1407 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1408 GOMP_DEVICE_HOST_FALLBACK (or any value
1409 larger than last available hw device), use host fallback.
1410 FN is address of host code, UNUSED is part of the current ABI, but
1411 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1412 with MAPNUM entries, with addresses of the host objects,
1413 sizes of the host objects (resp. for pointer kind pointer bias
1414 and assumed sizeof (void *) size) and kinds. */
1415
1416 void
1417 GOMP_target (int device, void (*fn) (void *), const void *unused,
1418 size_t mapnum, void **hostaddrs, size_t *sizes,
1419 unsigned char *kinds)
1420 {
1421 struct gomp_device_descr *devicep = resolve_device (device);
1422
1423 void *fn_addr;
1424 if (devicep == NULL
1425 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1426 /* All shared memory devices should use the GOMP_target_ext function. */
1427 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1428 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1429 return gomp_target_fallback (fn, hostaddrs);
1430
1431 struct target_mem_desc *tgt_vars
1432 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1433 GOMP_MAP_VARS_TARGET);
1434 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1435 NULL);
1436 gomp_unmap_vars (tgt_vars, true);
1437 }
1438
1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1440 and several arguments have been added:
1441 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1442 DEPEND is array of dependencies, see GOMP_task for details.
1443
1444 ARGS is a pointer to an array consisting of a variable number of both
1445 device-independent and device-specific arguments, which can take one two
1446 elements where the first specifies for which device it is intended, the type
1447 and optionally also the value. If the value is not present in the first
1448 one, the whole second element the actual value. The last element of the
1449 array is a single NULL. Among the device independent can be for example
1450 NUM_TEAMS and THREAD_LIMIT.
1451
1452 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1453 that value, or 1 if teams construct is not present, or 0, if
1454 teams construct does not have num_teams clause and so the choice is
1455 implementation defined, and -1 if it can't be determined on the host
1456 what value will GOMP_teams have on the device.
1457 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1458 body with that value, or 0, if teams construct does not have thread_limit
1459 clause or the teams construct is not present, or -1 if it can't be
1460 determined on the host what value will GOMP_teams have on the device. */
1461
1462 void
1463 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1464 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1465 unsigned int flags, void **depend, void **args)
1466 {
1467 struct gomp_device_descr *devicep = resolve_device (device);
1468 size_t tgt_align = 0, tgt_size = 0;
1469 bool fpc_done = false;
1470
1471 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1472 {
1473 struct gomp_thread *thr = gomp_thread ();
1474 /* Create a team if we don't have any around, as nowait
1475 target tasks make sense to run asynchronously even when
1476 outside of any parallel. */
1477 if (__builtin_expect (thr->ts.team == NULL, 0))
1478 {
1479 struct gomp_team *team = gomp_new_team (1);
1480 struct gomp_task *task = thr->task;
1481 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1482 team->prev_ts = thr->ts;
1483 thr->ts.team = team;
1484 thr->ts.team_id = 0;
1485 thr->ts.work_share = &team->work_shares[0];
1486 thr->ts.last_work_share = NULL;
1487 #ifdef HAVE_SYNC_BUILTINS
1488 thr->ts.single_count = 0;
1489 #endif
1490 thr->ts.static_trip = 0;
1491 thr->task = &team->implicit_task[0];
1492 gomp_init_task (thr->task, NULL, icv);
1493 if (task)
1494 {
1495 thr->task = task;
1496 gomp_end_task ();
1497 free (task);
1498 thr->task = &team->implicit_task[0];
1499 }
1500 else
1501 pthread_setspecific (gomp_thread_destructor, thr);
1502 }
1503 if (thr->ts.team
1504 && !thr->task->final_task)
1505 {
1506 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1507 sizes, kinds, flags, depend, args,
1508 GOMP_TARGET_TASK_BEFORE_MAP);
1509 return;
1510 }
1511 }
1512
1513 /* If there are depend clauses, but nowait is not present
1514 (or we are in a final task), block the parent task until the
1515 dependencies are resolved and then just continue with the rest
1516 of the function as if it is a merged task. */
1517 if (depend != NULL)
1518 {
1519 struct gomp_thread *thr = gomp_thread ();
1520 if (thr->task && thr->task->depend_hash)
1521 {
1522 /* If we might need to wait, copy firstprivate now. */
1523 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1524 &tgt_align, &tgt_size);
1525 if (tgt_align)
1526 {
1527 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1528 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1529 tgt_align, tgt_size);
1530 }
1531 fpc_done = true;
1532 gomp_task_maybe_wait_for_dependencies (depend);
1533 }
1534 }
1535
1536 void *fn_addr;
1537 if (devicep == NULL
1538 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1539 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1540 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1541 {
1542 if (!fpc_done)
1543 {
1544 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1545 &tgt_align, &tgt_size);
1546 if (tgt_align)
1547 {
1548 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1549 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1550 tgt_align, tgt_size);
1551 }
1552 }
1553 gomp_target_fallback (fn, hostaddrs);
1554 return;
1555 }
1556
1557 struct target_mem_desc *tgt_vars;
1558 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1559 {
1560 if (!fpc_done)
1561 {
1562 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1563 &tgt_align, &tgt_size);
1564 if (tgt_align)
1565 {
1566 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1567 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1568 tgt_align, tgt_size);
1569 }
1570 }
1571 tgt_vars = NULL;
1572 }
1573 else
1574 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1575 true, GOMP_MAP_VARS_TARGET);
1576 devicep->run_func (devicep->target_id, fn_addr,
1577 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1578 args);
1579 if (tgt_vars)
1580 gomp_unmap_vars (tgt_vars, true);
1581 }
1582
1583 /* Host fallback for GOMP_target_data{,_ext} routines. */
1584
1585 static void
1586 gomp_target_data_fallback (void)
1587 {
1588 struct gomp_task_icv *icv = gomp_icv (false);
1589 if (icv->target_data)
1590 {
1591 /* Even when doing a host fallback, if there are any active
1592 #pragma omp target data constructs, need to remember the
1593 new #pragma omp target data, otherwise GOMP_target_end_data
1594 would get out of sync. */
1595 struct target_mem_desc *tgt
1596 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1597 GOMP_MAP_VARS_DATA);
1598 tgt->prev = icv->target_data;
1599 icv->target_data = tgt;
1600 }
1601 }
1602
1603 void
1604 GOMP_target_data (int device, const void *unused, size_t mapnum,
1605 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1606 {
1607 struct gomp_device_descr *devicep = resolve_device (device);
1608
1609 if (devicep == NULL
1610 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1611 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1612 return gomp_target_data_fallback ();
1613
1614 struct target_mem_desc *tgt
1615 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1616 GOMP_MAP_VARS_DATA);
1617 struct gomp_task_icv *icv = gomp_icv (true);
1618 tgt->prev = icv->target_data;
1619 icv->target_data = tgt;
1620 }
1621
1622 void
1623 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1624 size_t *sizes, unsigned short *kinds)
1625 {
1626 struct gomp_device_descr *devicep = resolve_device (device);
1627
1628 if (devicep == NULL
1629 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1630 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1631 return gomp_target_data_fallback ();
1632
1633 struct target_mem_desc *tgt
1634 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1635 GOMP_MAP_VARS_DATA);
1636 struct gomp_task_icv *icv = gomp_icv (true);
1637 tgt->prev = icv->target_data;
1638 icv->target_data = tgt;
1639 }
1640
1641 void
1642 GOMP_target_end_data (void)
1643 {
1644 struct gomp_task_icv *icv = gomp_icv (false);
1645 if (icv->target_data)
1646 {
1647 struct target_mem_desc *tgt = icv->target_data;
1648 icv->target_data = tgt->prev;
1649 gomp_unmap_vars (tgt, true);
1650 }
1651 }
1652
1653 void
1654 GOMP_target_update (int device, const void *unused, size_t mapnum,
1655 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1656 {
1657 struct gomp_device_descr *devicep = resolve_device (device);
1658
1659 if (devicep == NULL
1660 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1661 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1662 return;
1663
1664 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1665 }
1666
1667 void
1668 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1669 size_t *sizes, unsigned short *kinds,
1670 unsigned int flags, void **depend)
1671 {
1672 struct gomp_device_descr *devicep = resolve_device (device);
1673
1674 /* If there are depend clauses, but nowait is not present,
1675 block the parent task until the dependencies are resolved
1676 and then just continue with the rest of the function as if it
1677 is a merged task. Until we are able to schedule task during
1678 variable mapping or unmapping, ignore nowait if depend clauses
1679 are not present. */
1680 if (depend != NULL)
1681 {
1682 struct gomp_thread *thr = gomp_thread ();
1683 if (thr->task && thr->task->depend_hash)
1684 {
1685 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1686 && thr->ts.team
1687 && !thr->task->final_task)
1688 {
1689 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1690 mapnum, hostaddrs, sizes, kinds,
1691 flags | GOMP_TARGET_FLAG_UPDATE,
1692 depend, NULL, GOMP_TARGET_TASK_DATA))
1693 return;
1694 }
1695 else
1696 {
1697 struct gomp_team *team = thr->ts.team;
1698 /* If parallel or taskgroup has been cancelled, don't start new
1699 tasks. */
1700 if (team
1701 && (gomp_team_barrier_cancelled (&team->barrier)
1702 || (thr->task->taskgroup
1703 && thr->task->taskgroup->cancelled)))
1704 return;
1705
1706 gomp_task_maybe_wait_for_dependencies (depend);
1707 }
1708 }
1709 }
1710
1711 if (devicep == NULL
1712 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1713 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1714 return;
1715
1716 struct gomp_thread *thr = gomp_thread ();
1717 struct gomp_team *team = thr->ts.team;
1718 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1719 if (team
1720 && (gomp_team_barrier_cancelled (&team->barrier)
1721 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1722 return;
1723
1724 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1725 }
1726
1727 static void
1728 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1729 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1730 {
1731 const int typemask = 0xff;
1732 size_t i;
1733 gomp_mutex_lock (&devicep->lock);
1734 if (devicep->state == GOMP_DEVICE_FINALIZED)
1735 {
1736 gomp_mutex_unlock (&devicep->lock);
1737 return;
1738 }
1739
1740 for (i = 0; i < mapnum; i++)
1741 {
1742 struct splay_tree_key_s cur_node;
1743 unsigned char kind = kinds[i] & typemask;
1744 switch (kind)
1745 {
1746 case GOMP_MAP_FROM:
1747 case GOMP_MAP_ALWAYS_FROM:
1748 case GOMP_MAP_DELETE:
1749 case GOMP_MAP_RELEASE:
1750 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1751 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1752 cur_node.host_start = (uintptr_t) hostaddrs[i];
1753 cur_node.host_end = cur_node.host_start + sizes[i];
1754 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1755 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1756 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1757 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1758 if (!k)
1759 continue;
1760
1761 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1762 k->refcount--;
1763 if ((kind == GOMP_MAP_DELETE
1764 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1765 && k->refcount != REFCOUNT_INFINITY)
1766 k->refcount = 0;
1767
1768 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1769 || kind == GOMP_MAP_ALWAYS_FROM)
1770 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1771 (void *) (k->tgt->tgt_start + k->tgt_offset
1772 + cur_node.host_start
1773 - k->host_start),
1774 cur_node.host_end - cur_node.host_start);
1775 if (k->refcount == 0)
1776 {
1777 splay_tree_remove (&devicep->mem_map, k);
1778 if (k->link_key)
1779 splay_tree_insert (&devicep->mem_map,
1780 (splay_tree_node) k->link_key);
1781 if (k->tgt->refcount > 1)
1782 k->tgt->refcount--;
1783 else
1784 gomp_unmap_tgt (k->tgt);
1785 }
1786
1787 break;
1788 default:
1789 gomp_mutex_unlock (&devicep->lock);
1790 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1791 kind);
1792 }
1793 }
1794
1795 gomp_mutex_unlock (&devicep->lock);
1796 }
1797
1798 void
1799 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1800 size_t *sizes, unsigned short *kinds,
1801 unsigned int flags, void **depend)
1802 {
1803 struct gomp_device_descr *devicep = resolve_device (device);
1804
1805 /* If there are depend clauses, but nowait is not present,
1806 block the parent task until the dependencies are resolved
1807 and then just continue with the rest of the function as if it
1808 is a merged task. Until we are able to schedule task during
1809 variable mapping or unmapping, ignore nowait if depend clauses
1810 are not present. */
1811 if (depend != NULL)
1812 {
1813 struct gomp_thread *thr = gomp_thread ();
1814 if (thr->task && thr->task->depend_hash)
1815 {
1816 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1817 && thr->ts.team
1818 && !thr->task->final_task)
1819 {
1820 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1821 mapnum, hostaddrs, sizes, kinds,
1822 flags, depend, NULL,
1823 GOMP_TARGET_TASK_DATA))
1824 return;
1825 }
1826 else
1827 {
1828 struct gomp_team *team = thr->ts.team;
1829 /* If parallel or taskgroup has been cancelled, don't start new
1830 tasks. */
1831 if (team
1832 && (gomp_team_barrier_cancelled (&team->barrier)
1833 || (thr->task->taskgroup
1834 && thr->task->taskgroup->cancelled)))
1835 return;
1836
1837 gomp_task_maybe_wait_for_dependencies (depend);
1838 }
1839 }
1840 }
1841
1842 if (devicep == NULL
1843 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1844 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1845 return;
1846
1847 struct gomp_thread *thr = gomp_thread ();
1848 struct gomp_team *team = thr->ts.team;
1849 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1850 if (team
1851 && (gomp_team_barrier_cancelled (&team->barrier)
1852 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1853 return;
1854
1855 size_t i;
1856 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1857 for (i = 0; i < mapnum; i++)
1858 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1859 {
1860 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1861 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1862 i += sizes[i];
1863 }
1864 else
1865 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1866 true, GOMP_MAP_VARS_ENTER_DATA);
1867 else
1868 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1869 }
1870
1871 bool
1872 gomp_target_task_fn (void *data)
1873 {
1874 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1875 struct gomp_device_descr *devicep = ttask->devicep;
1876
1877 if (ttask->fn != NULL)
1878 {
1879 void *fn_addr;
1880 if (devicep == NULL
1881 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1882 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1883 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1884 {
1885 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1886 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
1887 return false;
1888 }
1889
1890 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1891 {
1892 if (ttask->tgt)
1893 gomp_unmap_vars (ttask->tgt, true);
1894 return false;
1895 }
1896
1897 void *actual_arguments;
1898 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1899 {
1900 ttask->tgt = NULL;
1901 actual_arguments = ttask->hostaddrs;
1902 }
1903 else
1904 {
1905 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1906 NULL, ttask->sizes, ttask->kinds, true,
1907 GOMP_MAP_VARS_TARGET);
1908 actual_arguments = (void *) ttask->tgt->tgt_start;
1909 }
1910 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1911
1912 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1913 ttask->args, (void *) ttask);
1914 return true;
1915 }
1916 else if (devicep == NULL
1917 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1918 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1919 return false;
1920
1921 size_t i;
1922 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1923 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1924 ttask->kinds, true);
1925 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1926 for (i = 0; i < ttask->mapnum; i++)
1927 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1928 {
1929 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1930 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1931 GOMP_MAP_VARS_ENTER_DATA);
1932 i += ttask->sizes[i];
1933 }
1934 else
1935 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1936 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1937 else
1938 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1939 ttask->kinds);
1940 return false;
1941 }
1942
1943 void
1944 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1945 {
1946 if (thread_limit)
1947 {
1948 struct gomp_task_icv *icv = gomp_icv (true);
1949 icv->thread_limit_var
1950 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1951 }
1952 (void) num_teams;
1953 }
1954
1955 void *
1956 omp_target_alloc (size_t size, int device_num)
1957 {
1958 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1959 return malloc (size);
1960
1961 if (device_num < 0)
1962 return NULL;
1963
1964 struct gomp_device_descr *devicep = resolve_device (device_num);
1965 if (devicep == NULL)
1966 return NULL;
1967
1968 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1969 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1970 return malloc (size);
1971
1972 gomp_mutex_lock (&devicep->lock);
1973 void *ret = devicep->alloc_func (devicep->target_id, size);
1974 gomp_mutex_unlock (&devicep->lock);
1975 return ret;
1976 }
1977
1978 void
1979 omp_target_free (void *device_ptr, int device_num)
1980 {
1981 if (device_ptr == NULL)
1982 return;
1983
1984 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1985 {
1986 free (device_ptr);
1987 return;
1988 }
1989
1990 if (device_num < 0)
1991 return;
1992
1993 struct gomp_device_descr *devicep = resolve_device (device_num);
1994 if (devicep == NULL)
1995 return;
1996
1997 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1998 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1999 {
2000 free (device_ptr);
2001 return;
2002 }
2003
2004 gomp_mutex_lock (&devicep->lock);
2005 gomp_free_device_memory (devicep, device_ptr);
2006 gomp_mutex_unlock (&devicep->lock);
2007 }
2008
2009 int
2010 omp_target_is_present (void *ptr, int device_num)
2011 {
2012 if (ptr == NULL)
2013 return 1;
2014
2015 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2016 return 1;
2017
2018 if (device_num < 0)
2019 return 0;
2020
2021 struct gomp_device_descr *devicep = resolve_device (device_num);
2022 if (devicep == NULL)
2023 return 0;
2024
2025 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2026 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2027 return 1;
2028
2029 gomp_mutex_lock (&devicep->lock);
2030 struct splay_tree_s *mem_map = &devicep->mem_map;
2031 struct splay_tree_key_s cur_node;
2032
2033 cur_node.host_start = (uintptr_t) ptr;
2034 cur_node.host_end = cur_node.host_start;
2035 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2036 int ret = n != NULL;
2037 gomp_mutex_unlock (&devicep->lock);
2038 return ret;
2039 }
2040
2041 int
2042 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2043 size_t src_offset, int dst_device_num, int src_device_num)
2044 {
2045 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2046 bool ret;
2047
2048 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2049 {
2050 if (dst_device_num < 0)
2051 return EINVAL;
2052
2053 dst_devicep = resolve_device (dst_device_num);
2054 if (dst_devicep == NULL)
2055 return EINVAL;
2056
2057 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2058 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2059 dst_devicep = NULL;
2060 }
2061 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2062 {
2063 if (src_device_num < 0)
2064 return EINVAL;
2065
2066 src_devicep = resolve_device (src_device_num);
2067 if (src_devicep == NULL)
2068 return EINVAL;
2069
2070 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2071 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2072 src_devicep = NULL;
2073 }
2074 if (src_devicep == NULL && dst_devicep == NULL)
2075 {
2076 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2077 return 0;
2078 }
2079 if (src_devicep == NULL)
2080 {
2081 gomp_mutex_lock (&dst_devicep->lock);
2082 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2083 (char *) dst + dst_offset,
2084 (char *) src + src_offset, length);
2085 gomp_mutex_unlock (&dst_devicep->lock);
2086 return (ret ? 0 : EINVAL);
2087 }
2088 if (dst_devicep == NULL)
2089 {
2090 gomp_mutex_lock (&src_devicep->lock);
2091 ret = src_devicep->dev2host_func (src_devicep->target_id,
2092 (char *) dst + dst_offset,
2093 (char *) src + src_offset, length);
2094 gomp_mutex_unlock (&src_devicep->lock);
2095 return (ret ? 0 : EINVAL);
2096 }
2097 if (src_devicep == dst_devicep)
2098 {
2099 gomp_mutex_lock (&src_devicep->lock);
2100 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2101 (char *) dst + dst_offset,
2102 (char *) src + src_offset, length);
2103 gomp_mutex_unlock (&src_devicep->lock);
2104 return (ret ? 0 : EINVAL);
2105 }
2106 return EINVAL;
2107 }
2108
2109 static int
2110 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2111 int num_dims, const size_t *volume,
2112 const size_t *dst_offsets,
2113 const size_t *src_offsets,
2114 const size_t *dst_dimensions,
2115 const size_t *src_dimensions,
2116 struct gomp_device_descr *dst_devicep,
2117 struct gomp_device_descr *src_devicep)
2118 {
2119 size_t dst_slice = element_size;
2120 size_t src_slice = element_size;
2121 size_t j, dst_off, src_off, length;
2122 int i, ret;
2123
2124 if (num_dims == 1)
2125 {
2126 if (__builtin_mul_overflow (element_size, volume[0], &length)
2127 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2128 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2129 return EINVAL;
2130 if (dst_devicep == NULL && src_devicep == NULL)
2131 {
2132 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2133 ret = 1;
2134 }
2135 else if (src_devicep == NULL)
2136 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2137 (char *) dst + dst_off,
2138 (char *) src + src_off, length);
2139 else if (dst_devicep == NULL)
2140 ret = src_devicep->dev2host_func (src_devicep->target_id,
2141 (char *) dst + dst_off,
2142 (char *) src + src_off, length);
2143 else if (src_devicep == dst_devicep)
2144 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2145 (char *) dst + dst_off,
2146 (char *) src + src_off, length);
2147 else
2148 ret = 0;
2149 return ret ? 0 : EINVAL;
2150 }
2151
2152 /* FIXME: it would be nice to have some plugin function to handle
2153 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2154 be handled in the generic recursion below, and for host-host it
2155 should be used even for any num_dims >= 2. */
2156
2157 for (i = 1; i < num_dims; i++)
2158 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2159 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2160 return EINVAL;
2161 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2162 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2163 return EINVAL;
2164 for (j = 0; j < volume[0]; j++)
2165 {
2166 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2167 (char *) src + src_off,
2168 element_size, num_dims - 1,
2169 volume + 1, dst_offsets + 1,
2170 src_offsets + 1, dst_dimensions + 1,
2171 src_dimensions + 1, dst_devicep,
2172 src_devicep);
2173 if (ret)
2174 return ret;
2175 dst_off += dst_slice;
2176 src_off += src_slice;
2177 }
2178 return 0;
2179 }
2180
2181 int
2182 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2183 int num_dims, const size_t *volume,
2184 const size_t *dst_offsets,
2185 const size_t *src_offsets,
2186 const size_t *dst_dimensions,
2187 const size_t *src_dimensions,
2188 int dst_device_num, int src_device_num)
2189 {
2190 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2191
2192 if (!dst && !src)
2193 return INT_MAX;
2194
2195 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2196 {
2197 if (dst_device_num < 0)
2198 return EINVAL;
2199
2200 dst_devicep = resolve_device (dst_device_num);
2201 if (dst_devicep == NULL)
2202 return EINVAL;
2203
2204 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2205 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2206 dst_devicep = NULL;
2207 }
2208 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2209 {
2210 if (src_device_num < 0)
2211 return EINVAL;
2212
2213 src_devicep = resolve_device (src_device_num);
2214 if (src_devicep == NULL)
2215 return EINVAL;
2216
2217 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2218 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2219 src_devicep = NULL;
2220 }
2221
2222 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2223 return EINVAL;
2224
2225 if (src_devicep)
2226 gomp_mutex_lock (&src_devicep->lock);
2227 else if (dst_devicep)
2228 gomp_mutex_lock (&dst_devicep->lock);
2229 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2230 volume, dst_offsets, src_offsets,
2231 dst_dimensions, src_dimensions,
2232 dst_devicep, src_devicep);
2233 if (src_devicep)
2234 gomp_mutex_unlock (&src_devicep->lock);
2235 else if (dst_devicep)
2236 gomp_mutex_unlock (&dst_devicep->lock);
2237 return ret;
2238 }
2239
2240 int
2241 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2242 size_t device_offset, int device_num)
2243 {
2244 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2245 return EINVAL;
2246
2247 if (device_num < 0)
2248 return EINVAL;
2249
2250 struct gomp_device_descr *devicep = resolve_device (device_num);
2251 if (devicep == NULL)
2252 return EINVAL;
2253
2254 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2255 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2256 return EINVAL;
2257
2258 gomp_mutex_lock (&devicep->lock);
2259
2260 struct splay_tree_s *mem_map = &devicep->mem_map;
2261 struct splay_tree_key_s cur_node;
2262 int ret = EINVAL;
2263
2264 cur_node.host_start = (uintptr_t) host_ptr;
2265 cur_node.host_end = cur_node.host_start + size;
2266 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2267 if (n)
2268 {
2269 if (n->tgt->tgt_start + n->tgt_offset
2270 == (uintptr_t) device_ptr + device_offset
2271 && n->host_start <= cur_node.host_start
2272 && n->host_end >= cur_node.host_end)
2273 ret = 0;
2274 }
2275 else
2276 {
2277 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2278 tgt->array = gomp_malloc (sizeof (*tgt->array));
2279 tgt->refcount = 1;
2280 tgt->tgt_start = 0;
2281 tgt->tgt_end = 0;
2282 tgt->to_free = NULL;
2283 tgt->prev = NULL;
2284 tgt->list_count = 0;
2285 tgt->device_descr = devicep;
2286 splay_tree_node array = tgt->array;
2287 splay_tree_key k = &array->key;
2288 k->host_start = cur_node.host_start;
2289 k->host_end = cur_node.host_end;
2290 k->tgt = tgt;
2291 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2292 k->refcount = REFCOUNT_INFINITY;
2293 array->left = NULL;
2294 array->right = NULL;
2295 splay_tree_insert (&devicep->mem_map, array);
2296 ret = 0;
2297 }
2298 gomp_mutex_unlock (&devicep->lock);
2299 return ret;
2300 }
2301
2302 int
2303 omp_target_disassociate_ptr (void *ptr, int device_num)
2304 {
2305 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2306 return EINVAL;
2307
2308 if (device_num < 0)
2309 return EINVAL;
2310
2311 struct gomp_device_descr *devicep = resolve_device (device_num);
2312 if (devicep == NULL)
2313 return EINVAL;
2314
2315 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2316 return EINVAL;
2317
2318 gomp_mutex_lock (&devicep->lock);
2319
2320 struct splay_tree_s *mem_map = &devicep->mem_map;
2321 struct splay_tree_key_s cur_node;
2322 int ret = EINVAL;
2323
2324 cur_node.host_start = (uintptr_t) ptr;
2325 cur_node.host_end = cur_node.host_start;
2326 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2327 if (n
2328 && n->host_start == cur_node.host_start
2329 && n->refcount == REFCOUNT_INFINITY
2330 && n->tgt->tgt_start == 0
2331 && n->tgt->to_free == NULL
2332 && n->tgt->refcount == 1
2333 && n->tgt->list_count == 0)
2334 {
2335 splay_tree_remove (&devicep->mem_map, n);
2336 gomp_unmap_tgt (n->tgt);
2337 ret = 0;
2338 }
2339
2340 gomp_mutex_unlock (&devicep->lock);
2341 return ret;
2342 }
2343
2344 #ifdef PLUGIN_SUPPORT
2345
2346 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2347 in PLUGIN_NAME.
2348 The handles of the found functions are stored in the corresponding fields
2349 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2350
2351 static bool
2352 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2353 const char *plugin_name)
2354 {
2355 const char *err = NULL, *last_missing = NULL;
2356
2357 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2358 if (!plugin_handle)
2359 goto dl_fail;
2360
2361 /* Check if all required functions are available in the plugin and store
2362 their handlers. None of the symbols can legitimately be NULL,
2363 so we don't need to check dlerror all the time. */
2364 #define DLSYM(f) \
2365 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2366 goto dl_fail
2367 /* Similar, but missing functions are not an error. Return false if
2368 failed, true otherwise. */
2369 #define DLSYM_OPT(f, n) \
2370 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2371 || (last_missing = #n, 0))
2372
2373 DLSYM (version);
2374 if (device->version_func () != GOMP_VERSION)
2375 {
2376 err = "plugin version mismatch";
2377 goto fail;
2378 }
2379
2380 DLSYM (get_name);
2381 DLSYM (get_caps);
2382 DLSYM (get_type);
2383 DLSYM (get_num_devices);
2384 DLSYM (init_device);
2385 DLSYM (fini_device);
2386 DLSYM (load_image);
2387 DLSYM (unload_image);
2388 DLSYM (alloc);
2389 DLSYM (free);
2390 DLSYM (dev2host);
2391 DLSYM (host2dev);
2392 device->capabilities = device->get_caps_func ();
2393 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2394 {
2395 DLSYM (run);
2396 DLSYM (async_run);
2397 DLSYM_OPT (can_run, can_run);
2398 DLSYM (dev2dev);
2399 }
2400 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2401 {
2402 if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2403 || !DLSYM_OPT (openacc.register_async_cleanup,
2404 openacc_register_async_cleanup)
2405 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2406 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2407 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2408 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2409 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2410 || !DLSYM_OPT (openacc.async_wait_all_async,
2411 openacc_async_wait_all_async)
2412 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2413 || !DLSYM_OPT (openacc.create_thread_data,
2414 openacc_create_thread_data)
2415 || !DLSYM_OPT (openacc.destroy_thread_data,
2416 openacc_destroy_thread_data))
2417 {
2418 /* Require all the OpenACC handlers if we have
2419 GOMP_OFFLOAD_CAP_OPENACC_200. */
2420 err = "plugin missing OpenACC handler function";
2421 goto fail;
2422 }
2423
2424 unsigned cuda = 0;
2425 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2426 openacc_get_current_cuda_device);
2427 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2428 openacc_get_current_cuda_context);
2429 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2430 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2431 if (cuda && cuda != 4)
2432 {
2433 /* Make sure all the CUDA functions are there if any of them are. */
2434 err = "plugin missing OpenACC CUDA handler function";
2435 goto fail;
2436 }
2437 }
2438 #undef DLSYM
2439 #undef DLSYM_OPT
2440
2441 return 1;
2442
2443 dl_fail:
2444 err = dlerror ();
2445 fail:
2446 gomp_error ("while loading %s: %s", plugin_name, err);
2447 if (last_missing)
2448 gomp_error ("missing function was %s", last_missing);
2449 if (plugin_handle)
2450 dlclose (plugin_handle);
2451
2452 return 0;
2453 }
2454
2455 /* This function finalizes all initialized devices. */
2456
2457 static void
2458 gomp_target_fini (void)
2459 {
2460 int i;
2461 for (i = 0; i < num_devices; i++)
2462 {
2463 bool ret = true;
2464 struct gomp_device_descr *devicep = &devices[i];
2465 gomp_mutex_lock (&devicep->lock);
2466 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2467 {
2468 ret = devicep->fini_device_func (devicep->target_id);
2469 devicep->state = GOMP_DEVICE_FINALIZED;
2470 }
2471 gomp_mutex_unlock (&devicep->lock);
2472 if (!ret)
2473 gomp_fatal ("device finalization failed");
2474 }
2475 }
2476
2477 /* This function initializes the runtime needed for offloading.
2478 It parses the list of offload targets and tries to load the plugins for
2479 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2480 will be set, and the array DEVICES initialized, containing descriptors for
2481 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2482 by the others. */
2483
2484 static void
2485 gomp_target_init (void)
2486 {
2487 const char *prefix ="libgomp-plugin-";
2488 const char *suffix = SONAME_SUFFIX (1);
2489 const char *cur, *next;
2490 char *plugin_name;
2491 int i, new_num_devices;
2492
2493 num_devices = 0;
2494 devices = NULL;
2495
2496 cur = OFFLOAD_TARGETS;
2497 if (*cur)
2498 do
2499 {
2500 struct gomp_device_descr current_device;
2501
2502 next = strchr (cur, ',');
2503
2504 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2505 + strlen (prefix) + strlen (suffix));
2506 if (!plugin_name)
2507 {
2508 num_devices = 0;
2509 break;
2510 }
2511
2512 strcpy (plugin_name, prefix);
2513 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2514 strcat (plugin_name, suffix);
2515
2516 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2517 {
2518 new_num_devices = current_device.get_num_devices_func ();
2519 if (new_num_devices >= 1)
2520 {
2521 /* Augment DEVICES and NUM_DEVICES. */
2522
2523 devices = realloc (devices, (num_devices + new_num_devices)
2524 * sizeof (struct gomp_device_descr));
2525 if (!devices)
2526 {
2527 num_devices = 0;
2528 free (plugin_name);
2529 break;
2530 }
2531
2532 current_device.name = current_device.get_name_func ();
2533 /* current_device.capabilities has already been set. */
2534 current_device.type = current_device.get_type_func ();
2535 current_device.mem_map.root = NULL;
2536 current_device.state = GOMP_DEVICE_UNINITIALIZED;
2537 current_device.openacc.data_environ = NULL;
2538 for (i = 0; i < new_num_devices; i++)
2539 {
2540 current_device.target_id = i;
2541 devices[num_devices] = current_device;
2542 gomp_mutex_init (&devices[num_devices].lock);
2543 num_devices++;
2544 }
2545 }
2546 }
2547
2548 free (plugin_name);
2549 cur = next + 1;
2550 }
2551 while (next);
2552
2553 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2554 NUM_DEVICES_OPENMP. */
2555 struct gomp_device_descr *devices_s
2556 = malloc (num_devices * sizeof (struct gomp_device_descr));
2557 if (!devices_s)
2558 {
2559 num_devices = 0;
2560 free (devices);
2561 devices = NULL;
2562 }
2563 num_devices_openmp = 0;
2564 for (i = 0; i < num_devices; i++)
2565 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2566 devices_s[num_devices_openmp++] = devices[i];
2567 int num_devices_after_openmp = num_devices_openmp;
2568 for (i = 0; i < num_devices; i++)
2569 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2570 devices_s[num_devices_after_openmp++] = devices[i];
2571 free (devices);
2572 devices = devices_s;
2573
2574 for (i = 0; i < num_devices; i++)
2575 {
2576 /* The 'devices' array can be moved (by the realloc call) until we have
2577 found all the plugins, so registering with the OpenACC runtime (which
2578 takes a copy of the pointer argument) must be delayed until now. */
2579 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2580 goacc_register (&devices[i]);
2581 }
2582
2583 if (atexit (gomp_target_fini) != 0)
2584 gomp_fatal ("atexit failed");
2585 }
2586
2587 #else /* PLUGIN_SUPPORT */
2588 /* If dlfcn.h is unavailable we always fallback to host execution.
2589 GOMP_target* routines are just stubs for this case. */
2590 static void
2591 gomp_target_init (void)
2592 {
2593 }
2594 #endif /* PLUGIN_SUPPORT */