* tree-vect-loop-manip.c (create_intersect_range_checks_index)
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2017 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <string.h>
36 #include <assert.h>
37
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
40
41 static splay_tree_key
42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43 {
44 struct splay_tree_key_s node;
45 splay_tree_key key;
46
47 node.host_start = (uintptr_t) h;
48 node.host_end = (uintptr_t) h + s;
49
50 key = splay_tree_lookup (&dev->mem_map, &node);
51
52 return key;
53 }
54
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
60
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64 int i;
65 struct target_mem_desc *t;
66
67 if (!tgt)
68 return NULL;
69
70 for (t = tgt; t != NULL; t = t->prev)
71 {
72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73 break;
74 }
75
76 if (!t)
77 return NULL;
78
79 for (i = 0; i < t->list_count; i++)
80 {
81 void * offset;
82
83 splay_tree_key k = &t->array[i].key;
84 offset = d - t->tgt_start + k->tgt_offset;
85
86 if (k->host_start + offset <= (void *) k->host_end)
87 return k;
88 }
89
90 return NULL;
91 }
92
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
94 NULL. */
95
96 void *
97 acc_malloc (size_t s)
98 {
99 if (!s)
100 return NULL;
101
102 goacc_lazy_initialize ();
103
104 struct goacc_thread *thr = goacc_thread ();
105
106 assert (thr->dev);
107
108 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109 return malloc (s);
110
111 return thr->dev->alloc_func (thr->dev->target_id, s);
112 }
113
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115 the device address is mapped. We choose to check if it mapped,
116 and if it is, to unmap it. */
117 void
118 acc_free (void *d)
119 {
120 splay_tree_key k;
121
122 if (!d)
123 return;
124
125 struct goacc_thread *thr = goacc_thread ();
126
127 assert (thr && thr->dev);
128
129 struct gomp_device_descr *acc_dev = thr->dev;
130
131 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
132 return free (d);
133
134 gomp_mutex_lock (&acc_dev->lock);
135
136 /* We don't have to call lazy open here, as the ptr value must have
137 been returned by acc_malloc. It's not permitted to pass NULL in
138 (unless you got that null from acc_malloc). */
139 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
140 {
141 void *offset;
142
143 offset = d - k->tgt->tgt_start + k->tgt_offset;
144
145 gomp_mutex_unlock (&acc_dev->lock);
146
147 acc_unmap_data ((void *)(k->host_start + offset));
148 }
149 else
150 gomp_mutex_unlock (&acc_dev->lock);
151
152 if (!acc_dev->free_func (acc_dev->target_id, d))
153 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
154 }
155
156 void
157 acc_memcpy_to_device (void *d, void *h, size_t s)
158 {
159 /* No need to call lazy open here, as the device pointer must have
160 been obtained from a routine that did that. */
161 struct goacc_thread *thr = goacc_thread ();
162
163 assert (thr && thr->dev);
164
165 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
166 {
167 memmove (d, h, s);
168 return;
169 }
170
171 if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
172 gomp_fatal ("error in %s", __FUNCTION__);
173 }
174
175 void
176 acc_memcpy_from_device (void *h, void *d, size_t s)
177 {
178 /* No need to call lazy open here, as the device pointer must have
179 been obtained from a routine that did that. */
180 struct goacc_thread *thr = goacc_thread ();
181
182 assert (thr && thr->dev);
183
184 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185 {
186 memmove (h, d, s);
187 return;
188 }
189
190 if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
191 gomp_fatal ("error in %s", __FUNCTION__);
192 }
193
194 /* Return the device pointer that corresponds to host data H. Or NULL
195 if no mapping. */
196
197 void *
198 acc_deviceptr (void *h)
199 {
200 splay_tree_key n;
201 void *d;
202 void *offset;
203
204 goacc_lazy_initialize ();
205
206 struct goacc_thread *thr = goacc_thread ();
207 struct gomp_device_descr *dev = thr->dev;
208
209 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
210 return h;
211
212 gomp_mutex_lock (&dev->lock);
213
214 n = lookup_host (dev, h, 1);
215
216 if (!n)
217 {
218 gomp_mutex_unlock (&dev->lock);
219 return NULL;
220 }
221
222 offset = h - n->host_start;
223
224 d = n->tgt->tgt_start + n->tgt_offset + offset;
225
226 gomp_mutex_unlock (&dev->lock);
227
228 return d;
229 }
230
231 /* Return the host pointer that corresponds to device data D. Or NULL
232 if no mapping. */
233
234 void *
235 acc_hostptr (void *d)
236 {
237 splay_tree_key n;
238 void *h;
239 void *offset;
240
241 goacc_lazy_initialize ();
242
243 struct goacc_thread *thr = goacc_thread ();
244 struct gomp_device_descr *acc_dev = thr->dev;
245
246 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
247 return d;
248
249 gomp_mutex_lock (&acc_dev->lock);
250
251 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
252
253 if (!n)
254 {
255 gomp_mutex_unlock (&acc_dev->lock);
256 return NULL;
257 }
258
259 offset = d - n->tgt->tgt_start + n->tgt_offset;
260
261 h = n->host_start + offset;
262
263 gomp_mutex_unlock (&acc_dev->lock);
264
265 return h;
266 }
267
268 /* Return 1 if host data [H,+S] is present on the device. */
269
270 int
271 acc_is_present (void *h, size_t s)
272 {
273 splay_tree_key n;
274
275 if (!s || !h)
276 return 0;
277
278 goacc_lazy_initialize ();
279
280 struct goacc_thread *thr = goacc_thread ();
281 struct gomp_device_descr *acc_dev = thr->dev;
282
283 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
284 return h != NULL;
285
286 gomp_mutex_lock (&acc_dev->lock);
287
288 n = lookup_host (acc_dev, h, s);
289
290 if (n && ((uintptr_t)h < n->host_start
291 || (uintptr_t)h + s > n->host_end
292 || s > n->host_end - n->host_start))
293 n = NULL;
294
295 gomp_mutex_unlock (&acc_dev->lock);
296
297 return n != NULL;
298 }
299
300 /* Create a mapping for host [H,+S] -> device [D,+S] */
301
302 void
303 acc_map_data (void *h, void *d, size_t s)
304 {
305 struct target_mem_desc *tgt = NULL;
306 size_t mapnum = 1;
307 void *hostaddrs = h;
308 void *devaddrs = d;
309 size_t sizes = s;
310 unsigned short kinds = GOMP_MAP_ALLOC;
311
312 goacc_lazy_initialize ();
313
314 struct goacc_thread *thr = goacc_thread ();
315 struct gomp_device_descr *acc_dev = thr->dev;
316
317 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
318 {
319 if (d != h)
320 gomp_fatal ("cannot map data on shared-memory system");
321 }
322 else
323 {
324 struct goacc_thread *thr = goacc_thread ();
325
326 if (!d || !h || !s)
327 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
328 (void *)h, (int)s, (void *)d, (int)s);
329
330 gomp_mutex_lock (&acc_dev->lock);
331
332 if (lookup_host (acc_dev, h, s))
333 {
334 gomp_mutex_unlock (&acc_dev->lock);
335 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
336 (int)s);
337 }
338
339 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
340 {
341 gomp_mutex_unlock (&acc_dev->lock);
342 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
343 (int)s);
344 }
345
346 gomp_mutex_unlock (&acc_dev->lock);
347
348 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
349 &kinds, true, GOMP_MAP_VARS_OPENACC);
350 }
351
352 gomp_mutex_lock (&acc_dev->lock);
353 tgt->prev = acc_dev->openacc.data_environ;
354 acc_dev->openacc.data_environ = tgt;
355 gomp_mutex_unlock (&acc_dev->lock);
356 }
357
358 void
359 acc_unmap_data (void *h)
360 {
361 struct goacc_thread *thr = goacc_thread ();
362 struct gomp_device_descr *acc_dev = thr->dev;
363
364 /* No need to call lazy open, as the address must have been mapped. */
365
366 /* This is a no-op on shared-memory targets. */
367 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
368 return;
369
370 size_t host_size;
371
372 gomp_mutex_lock (&acc_dev->lock);
373
374 splay_tree_key n = lookup_host (acc_dev, h, 1);
375 struct target_mem_desc *t;
376
377 if (!n)
378 {
379 gomp_mutex_unlock (&acc_dev->lock);
380 gomp_fatal ("%p is not a mapped block", (void *)h);
381 }
382
383 host_size = n->host_end - n->host_start;
384
385 if (n->host_start != (uintptr_t) h)
386 {
387 gomp_mutex_unlock (&acc_dev->lock);
388 gomp_fatal ("[%p,%d] surrounds %p",
389 (void *) n->host_start, (int) host_size, (void *) h);
390 }
391
392 t = n->tgt;
393
394 if (t->refcount == 2)
395 {
396 struct target_mem_desc *tp;
397
398 /* This is the last reference, so pull the descriptor off the
399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400 freeing the device memory. */
401 t->tgt_end = 0;
402 t->to_free = 0;
403
404 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
405 tp = t, t = t->prev)
406 if (n->tgt == t)
407 {
408 if (tp)
409 tp->prev = t->prev;
410 else
411 acc_dev->openacc.data_environ = t->prev;
412
413 break;
414 }
415 }
416
417 gomp_mutex_unlock (&acc_dev->lock);
418
419 gomp_unmap_vars (t, true);
420 }
421
422 #define FLAG_PRESENT (1 << 0)
423 #define FLAG_CREATE (1 << 1)
424 #define FLAG_COPY (1 << 2)
425
426 static void *
427 present_create_copy (unsigned f, void *h, size_t s)
428 {
429 void *d;
430 splay_tree_key n;
431
432 if (!h || !s)
433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
434
435 goacc_lazy_initialize ();
436
437 struct goacc_thread *thr = goacc_thread ();
438 struct gomp_device_descr *acc_dev = thr->dev;
439
440 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
441 return h;
442
443 gomp_mutex_lock (&acc_dev->lock);
444
445 n = lookup_host (acc_dev, h, s);
446 if (n)
447 {
448 /* Present. */
449 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
450
451 if (!(f & FLAG_PRESENT))
452 {
453 gomp_mutex_unlock (&acc_dev->lock);
454 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
455 (void *)h, (int)s, (void *)d, (int)s);
456 }
457 if ((h + s) > (void *)n->host_end)
458 {
459 gomp_mutex_unlock (&acc_dev->lock);
460 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
461 }
462
463 gomp_mutex_unlock (&acc_dev->lock);
464 }
465 else if (!(f & FLAG_CREATE))
466 {
467 gomp_mutex_unlock (&acc_dev->lock);
468 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
469 }
470 else
471 {
472 struct target_mem_desc *tgt;
473 size_t mapnum = 1;
474 unsigned short kinds;
475 void *hostaddrs = h;
476
477 if (f & FLAG_COPY)
478 kinds = GOMP_MAP_TO;
479 else
480 kinds = GOMP_MAP_ALLOC;
481
482 gomp_mutex_unlock (&acc_dev->lock);
483
484 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
485 GOMP_MAP_VARS_OPENACC);
486
487 gomp_mutex_lock (&acc_dev->lock);
488
489 d = tgt->to_free;
490 tgt->prev = acc_dev->openacc.data_environ;
491 acc_dev->openacc.data_environ = tgt;
492
493 gomp_mutex_unlock (&acc_dev->lock);
494 }
495
496 return d;
497 }
498
499 void *
500 acc_create (void *h, size_t s)
501 {
502 return present_create_copy (FLAG_CREATE, h, s);
503 }
504
505 void *
506 acc_copyin (void *h, size_t s)
507 {
508 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
509 }
510
511 void *
512 acc_present_or_create (void *h, size_t s)
513 {
514 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
515 }
516
517 /* acc_pcreate is acc_present_or_create by a different name. */
518 #ifdef HAVE_ATTRIBUTE_ALIAS
519 strong_alias (acc_present_or_create, acc_pcreate)
520 #else
521 void *
522 acc_pcreate (void *h, size_t s)
523 {
524 return acc_present_or_create (h, s);
525 }
526 #endif
527
528 void *
529 acc_present_or_copyin (void *h, size_t s)
530 {
531 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
532 }
533
534 /* acc_pcopyin is acc_present_or_copyin by a different name. */
535 #ifdef HAVE_ATTRIBUTE_ALIAS
536 strong_alias (acc_present_or_copyin, acc_pcopyin)
537 #else
538 void *
539 acc_pcopyin (void *h, size_t s)
540 {
541 return acc_present_or_copyin (h, s);
542 }
543 #endif
544
545 #define FLAG_COPYOUT (1 << 0)
546
547 static void
548 delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
549 {
550 size_t host_size;
551 splay_tree_key n;
552 void *d;
553 struct goacc_thread *thr = goacc_thread ();
554 struct gomp_device_descr *acc_dev = thr->dev;
555
556 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
557 return;
558
559 gomp_mutex_lock (&acc_dev->lock);
560
561 n = lookup_host (acc_dev, h, s);
562
563 /* No need to call lazy open, as the data must already have been
564 mapped. */
565
566 if (!n)
567 {
568 gomp_mutex_unlock (&acc_dev->lock);
569 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
570 }
571
572 d = (void *) (n->tgt->tgt_start + n->tgt_offset
573 + (uintptr_t) h - n->host_start);
574
575 host_size = n->host_end - n->host_start;
576
577 if (n->host_start != (uintptr_t) h || host_size != s)
578 {
579 gomp_mutex_unlock (&acc_dev->lock);
580 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
581 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
582 }
583
584 gomp_mutex_unlock (&acc_dev->lock);
585
586 if (f & FLAG_COPYOUT)
587 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
588
589 acc_unmap_data (h);
590
591 if (!acc_dev->free_func (acc_dev->target_id, d))
592 gomp_fatal ("error in freeing device memory in %s", libfnname);
593 }
594
595 void
596 acc_delete (void *h , size_t s)
597 {
598 delete_copyout (0, h, s, __FUNCTION__);
599 }
600
601 void
602 acc_copyout (void *h, size_t s)
603 {
604 delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
605 }
606
607 static void
608 update_dev_host (int is_dev, void *h, size_t s)
609 {
610 splay_tree_key n;
611 void *d;
612
613 goacc_lazy_initialize ();
614
615 struct goacc_thread *thr = goacc_thread ();
616 struct gomp_device_descr *acc_dev = thr->dev;
617
618 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
619 return;
620
621 gomp_mutex_lock (&acc_dev->lock);
622
623 n = lookup_host (acc_dev, h, s);
624
625 if (!n)
626 {
627 gomp_mutex_unlock (&acc_dev->lock);
628 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
629 }
630
631 d = (void *) (n->tgt->tgt_start + n->tgt_offset
632 + (uintptr_t) h - n->host_start);
633
634 if (is_dev)
635 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
636 else
637 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
638
639 gomp_mutex_unlock (&acc_dev->lock);
640 }
641
642 void
643 acc_update_device (void *h, size_t s)
644 {
645 update_dev_host (1, h, s);
646 }
647
648 void
649 acc_update_self (void *h, size_t s)
650 {
651 update_dev_host (0, h, s);
652 }
653
654 void
655 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
656 void *kinds)
657 {
658 struct target_mem_desc *tgt;
659 struct goacc_thread *thr = goacc_thread ();
660 struct gomp_device_descr *acc_dev = thr->dev;
661
662 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
663 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
664 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
665 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
666
667 gomp_mutex_lock (&acc_dev->lock);
668 tgt->prev = acc_dev->openacc.data_environ;
669 acc_dev->openacc.data_environ = tgt;
670 gomp_mutex_unlock (&acc_dev->lock);
671 }
672
673 void
674 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
675 {
676 struct goacc_thread *thr = goacc_thread ();
677 struct gomp_device_descr *acc_dev = thr->dev;
678 splay_tree_key n;
679 struct target_mem_desc *t;
680 int minrefs = (mapnum == 1) ? 2 : 3;
681
682 gomp_mutex_lock (&acc_dev->lock);
683
684 n = lookup_host (acc_dev, h, 1);
685
686 if (!n)
687 {
688 gomp_mutex_unlock (&acc_dev->lock);
689 gomp_fatal ("%p is not a mapped block", (void *)h);
690 }
691
692 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
693
694 t = n->tgt;
695
696 struct target_mem_desc *tp;
697
698 if (t->refcount == minrefs)
699 {
700 /* This is the last reference, so pull the descriptor off the
701 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
702 freeing the device memory. */
703 t->tgt_end = 0;
704 t->to_free = 0;
705
706 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
707 tp = t, t = t->prev)
708 {
709 if (n->tgt == t)
710 {
711 if (tp)
712 tp->prev = t->prev;
713 else
714 acc_dev->openacc.data_environ = t->prev;
715 break;
716 }
717 }
718 }
719
720 if (force_copyfrom)
721 t->list[0].copy_from = 1;
722
723 gomp_mutex_unlock (&acc_dev->lock);
724
725 /* If running synchronously, unmap immediately. */
726 if (async < acc_async_noval)
727 gomp_unmap_vars (t, true);
728 else
729 t->device_descr->openacc.register_async_cleanup_func (t, async);
730
731 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
732 }