c-parser.c (c_parser_oacc_declare): Add support for GOMP_MAP_FIRSTPRIVATE_POINTER.
[gcc.git] / libgomp / oacc-mem.c
1 /* OpenACC Runtime initialization routines
2
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
4
5 Contributed by Mentor Embedded.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "openacc.h"
30 #include "config.h"
31 #include "libgomp.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #include <stdint.h>
35 #include <string.h>
36 #include <assert.h>
37
38 /* Return block containing [H->S), or NULL if not contained. The device lock
39 for DEV must be locked on entry, and remains locked on exit. */
40
41 static splay_tree_key
42 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43 {
44 struct splay_tree_key_s node;
45 splay_tree_key key;
46
47 node.host_start = (uintptr_t) h;
48 node.host_end = (uintptr_t) h + s;
49
50 key = splay_tree_lookup (&dev->mem_map, &node);
51
52 return key;
53 }
54
55 /* Return block containing [D->S), or NULL if not contained.
56 The list isn't ordered by device address, so we have to iterate
57 over the whole array. This is not expected to be a common
58 operation. The device lock associated with TGT must be locked on entry, and
59 remains locked on exit. */
60
61 static splay_tree_key
62 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63 {
64 int i;
65 struct target_mem_desc *t;
66
67 if (!tgt)
68 return NULL;
69
70 for (t = tgt; t != NULL; t = t->prev)
71 {
72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73 break;
74 }
75
76 if (!t)
77 return NULL;
78
79 for (i = 0; i < t->list_count; i++)
80 {
81 void * offset;
82
83 splay_tree_key k = &t->array[i].key;
84 offset = d - t->tgt_start + k->tgt_offset;
85
86 if (k->host_start + offset <= (void *) k->host_end)
87 return k;
88 }
89
90 return NULL;
91 }
92
93 /* OpenACC is silent on how memory exhaustion is indicated. We return
94 NULL. */
95
96 void *
97 acc_malloc (size_t s)
98 {
99 if (!s)
100 return NULL;
101
102 goacc_lazy_initialize ();
103
104 struct goacc_thread *thr = goacc_thread ();
105
106 assert (thr->dev);
107
108 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109 return malloc (s);
110
111 return thr->dev->alloc_func (thr->dev->target_id, s);
112 }
113
114 /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115 the device address is mapped. We choose to check if it mapped,
116 and if it is, to unmap it. */
117 void
118 acc_free (void *d)
119 {
120 splay_tree_key k;
121
122 if (!d)
123 return;
124
125 struct goacc_thread *thr = goacc_thread ();
126
127 assert (thr && thr->dev);
128
129 struct gomp_device_descr *acc_dev = thr->dev;
130
131 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
132 return free (d);
133
134 gomp_mutex_lock (&acc_dev->lock);
135
136 /* We don't have to call lazy open here, as the ptr value must have
137 been returned by acc_malloc. It's not permitted to pass NULL in
138 (unless you got that null from acc_malloc). */
139 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
140 {
141 void *offset;
142
143 offset = d - k->tgt->tgt_start + k->tgt_offset;
144
145 gomp_mutex_unlock (&acc_dev->lock);
146
147 acc_unmap_data ((void *)(k->host_start + offset));
148 }
149 else
150 gomp_mutex_unlock (&acc_dev->lock);
151
152 acc_dev->free_func (acc_dev->target_id, d);
153 }
154
155 void
156 acc_memcpy_to_device (void *d, void *h, size_t s)
157 {
158 /* No need to call lazy open here, as the device pointer must have
159 been obtained from a routine that did that. */
160 struct goacc_thread *thr = goacc_thread ();
161
162 assert (thr && thr->dev);
163
164 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
165 {
166 memmove (d, h, s);
167 return;
168 }
169
170 thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
171 }
172
173 void
174 acc_memcpy_from_device (void *h, void *d, size_t s)
175 {
176 /* No need to call lazy open here, as the device pointer must have
177 been obtained from a routine that did that. */
178 struct goacc_thread *thr = goacc_thread ();
179
180 assert (thr && thr->dev);
181
182 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
183 {
184 memmove (h, d, s);
185 return;
186 }
187
188 thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
189 }
190
191 /* Return the device pointer that corresponds to host data H. Or NULL
192 if no mapping. */
193
194 void *
195 acc_deviceptr (void *h)
196 {
197 splay_tree_key n;
198 void *d;
199 void *offset;
200
201 goacc_lazy_initialize ();
202
203 struct goacc_thread *thr = goacc_thread ();
204 struct gomp_device_descr *dev = thr->dev;
205
206 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
207 return h;
208
209 gomp_mutex_lock (&dev->lock);
210
211 n = lookup_host (dev, h, 1);
212
213 if (!n)
214 {
215 gomp_mutex_unlock (&dev->lock);
216 return NULL;
217 }
218
219 offset = h - n->host_start;
220
221 d = n->tgt->tgt_start + n->tgt_offset + offset;
222
223 gomp_mutex_unlock (&dev->lock);
224
225 return d;
226 }
227
228 /* Return the host pointer that corresponds to device data D. Or NULL
229 if no mapping. */
230
231 void *
232 acc_hostptr (void *d)
233 {
234 splay_tree_key n;
235 void *h;
236 void *offset;
237
238 goacc_lazy_initialize ();
239
240 struct goacc_thread *thr = goacc_thread ();
241 struct gomp_device_descr *acc_dev = thr->dev;
242
243 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
244 return d;
245
246 gomp_mutex_lock (&acc_dev->lock);
247
248 n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
249
250 if (!n)
251 {
252 gomp_mutex_unlock (&acc_dev->lock);
253 return NULL;
254 }
255
256 offset = d - n->tgt->tgt_start + n->tgt_offset;
257
258 h = n->host_start + offset;
259
260 gomp_mutex_unlock (&acc_dev->lock);
261
262 return h;
263 }
264
265 /* Return 1 if host data [H,+S] is present on the device. */
266
267 int
268 acc_is_present (void *h, size_t s)
269 {
270 splay_tree_key n;
271
272 if (!s || !h)
273 return 0;
274
275 goacc_lazy_initialize ();
276
277 struct goacc_thread *thr = goacc_thread ();
278 struct gomp_device_descr *acc_dev = thr->dev;
279
280 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
281 return h != NULL;
282
283 gomp_mutex_lock (&acc_dev->lock);
284
285 n = lookup_host (acc_dev, h, s);
286
287 if (n && ((uintptr_t)h < n->host_start
288 || (uintptr_t)h + s > n->host_end
289 || s > n->host_end - n->host_start))
290 n = NULL;
291
292 gomp_mutex_unlock (&acc_dev->lock);
293
294 return n != NULL;
295 }
296
297 /* Create a mapping for host [H,+S] -> device [D,+S] */
298
299 void
300 acc_map_data (void *h, void *d, size_t s)
301 {
302 struct target_mem_desc *tgt = NULL;
303 size_t mapnum = 1;
304 void *hostaddrs = h;
305 void *devaddrs = d;
306 size_t sizes = s;
307 unsigned short kinds = GOMP_MAP_ALLOC;
308
309 goacc_lazy_initialize ();
310
311 struct goacc_thread *thr = goacc_thread ();
312 struct gomp_device_descr *acc_dev = thr->dev;
313
314 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
315 {
316 if (d != h)
317 gomp_fatal ("cannot map data on shared-memory system");
318 }
319 else
320 {
321 struct goacc_thread *thr = goacc_thread ();
322
323 if (!d || !h || !s)
324 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
325 (void *)h, (int)s, (void *)d, (int)s);
326
327 gomp_mutex_lock (&acc_dev->lock);
328
329 if (lookup_host (acc_dev, h, s))
330 {
331 gomp_mutex_unlock (&acc_dev->lock);
332 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
333 (int)s);
334 }
335
336 if (lookup_dev (thr->dev->openacc.data_environ, d, s))
337 {
338 gomp_mutex_unlock (&acc_dev->lock);
339 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
340 (int)s);
341 }
342
343 gomp_mutex_unlock (&acc_dev->lock);
344
345 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
346 &kinds, true, GOMP_MAP_VARS_OPENACC);
347 }
348
349 gomp_mutex_lock (&acc_dev->lock);
350 tgt->prev = acc_dev->openacc.data_environ;
351 acc_dev->openacc.data_environ = tgt;
352 gomp_mutex_unlock (&acc_dev->lock);
353 }
354
355 void
356 acc_unmap_data (void *h)
357 {
358 struct goacc_thread *thr = goacc_thread ();
359 struct gomp_device_descr *acc_dev = thr->dev;
360
361 /* No need to call lazy open, as the address must have been mapped. */
362
363 /* This is a no-op on shared-memory targets. */
364 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
365 return;
366
367 size_t host_size;
368
369 gomp_mutex_lock (&acc_dev->lock);
370
371 splay_tree_key n = lookup_host (acc_dev, h, 1);
372 struct target_mem_desc *t;
373
374 if (!n)
375 {
376 gomp_mutex_unlock (&acc_dev->lock);
377 gomp_fatal ("%p is not a mapped block", (void *)h);
378 }
379
380 host_size = n->host_end - n->host_start;
381
382 if (n->host_start != (uintptr_t) h)
383 {
384 gomp_mutex_unlock (&acc_dev->lock);
385 gomp_fatal ("[%p,%d] surrounds %p",
386 (void *) n->host_start, (int) host_size, (void *) h);
387 }
388
389 t = n->tgt;
390
391 if (t->refcount == 2)
392 {
393 struct target_mem_desc *tp;
394
395 /* This is the last reference, so pull the descriptor off the
396 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
397 freeing the device memory. */
398 t->tgt_end = 0;
399 t->to_free = 0;
400
401 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
402 tp = t, t = t->prev)
403 if (n->tgt == t)
404 {
405 if (tp)
406 tp->prev = t->prev;
407 else
408 acc_dev->openacc.data_environ = t->prev;
409
410 break;
411 }
412 }
413
414 gomp_mutex_unlock (&acc_dev->lock);
415
416 gomp_unmap_vars (t, true);
417 }
418
419 #define FLAG_PRESENT (1 << 0)
420 #define FLAG_CREATE (1 << 1)
421 #define FLAG_COPY (1 << 2)
422
423 static void *
424 present_create_copy (unsigned f, void *h, size_t s)
425 {
426 void *d;
427 splay_tree_key n;
428
429 if (!h || !s)
430 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
431
432 goacc_lazy_initialize ();
433
434 struct goacc_thread *thr = goacc_thread ();
435 struct gomp_device_descr *acc_dev = thr->dev;
436
437 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
438 return h;
439
440 gomp_mutex_lock (&acc_dev->lock);
441
442 n = lookup_host (acc_dev, h, s);
443 if (n)
444 {
445 /* Present. */
446 d = (void *) (n->tgt->tgt_start + n->tgt_offset);
447
448 if (!(f & FLAG_PRESENT))
449 {
450 gomp_mutex_unlock (&acc_dev->lock);
451 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
452 (void *)h, (int)s, (void *)d, (int)s);
453 }
454 if ((h + s) > (void *)n->host_end)
455 {
456 gomp_mutex_unlock (&acc_dev->lock);
457 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
458 }
459
460 gomp_mutex_unlock (&acc_dev->lock);
461 }
462 else if (!(f & FLAG_CREATE))
463 {
464 gomp_mutex_unlock (&acc_dev->lock);
465 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
466 }
467 else
468 {
469 struct target_mem_desc *tgt;
470 size_t mapnum = 1;
471 unsigned short kinds;
472 void *hostaddrs = h;
473
474 if (f & FLAG_COPY)
475 kinds = GOMP_MAP_TO;
476 else
477 kinds = GOMP_MAP_ALLOC;
478
479 gomp_mutex_unlock (&acc_dev->lock);
480
481 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
482 GOMP_MAP_VARS_OPENACC);
483
484 gomp_mutex_lock (&acc_dev->lock);
485
486 d = tgt->to_free;
487 tgt->prev = acc_dev->openacc.data_environ;
488 acc_dev->openacc.data_environ = tgt;
489
490 gomp_mutex_unlock (&acc_dev->lock);
491 }
492
493 return d;
494 }
495
496 void *
497 acc_create (void *h, size_t s)
498 {
499 return present_create_copy (FLAG_CREATE, h, s);
500 }
501
502 void *
503 acc_copyin (void *h, size_t s)
504 {
505 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
506 }
507
508 void *
509 acc_present_or_create (void *h, size_t s)
510 {
511 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
512 }
513
514 void *
515 acc_present_or_copyin (void *h, size_t s)
516 {
517 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
518 }
519
520 #define FLAG_COPYOUT (1 << 0)
521
522 static void
523 delete_copyout (unsigned f, void *h, size_t s)
524 {
525 size_t host_size;
526 splay_tree_key n;
527 void *d;
528 struct goacc_thread *thr = goacc_thread ();
529 struct gomp_device_descr *acc_dev = thr->dev;
530
531 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
532 return;
533
534 gomp_mutex_lock (&acc_dev->lock);
535
536 n = lookup_host (acc_dev, h, s);
537
538 /* No need to call lazy open, as the data must already have been
539 mapped. */
540
541 if (!n)
542 {
543 gomp_mutex_unlock (&acc_dev->lock);
544 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
545 }
546
547 d = (void *) (n->tgt->tgt_start + n->tgt_offset
548 + (uintptr_t) h - n->host_start);
549
550 host_size = n->host_end - n->host_start;
551
552 if (n->host_start != (uintptr_t) h || host_size != s)
553 {
554 gomp_mutex_unlock (&acc_dev->lock);
555 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
556 (void *) n->host_start, (int) host_size, (void *) h, (int) s);
557 }
558
559 gomp_mutex_unlock (&acc_dev->lock);
560
561 if (f & FLAG_COPYOUT)
562 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
563
564 acc_unmap_data (h);
565
566 acc_dev->free_func (acc_dev->target_id, d);
567 }
568
569 void
570 acc_delete (void *h , size_t s)
571 {
572 delete_copyout (0, h, s);
573 }
574
575 void acc_copyout (void *h, size_t s)
576 {
577 delete_copyout (FLAG_COPYOUT, h, s);
578 }
579
580 static void
581 update_dev_host (int is_dev, void *h, size_t s)
582 {
583 splay_tree_key n;
584 void *d;
585
586 goacc_lazy_initialize ();
587
588 struct goacc_thread *thr = goacc_thread ();
589 struct gomp_device_descr *acc_dev = thr->dev;
590
591 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
592 return;
593
594 gomp_mutex_lock (&acc_dev->lock);
595
596 n = lookup_host (acc_dev, h, s);
597
598 if (!n)
599 {
600 gomp_mutex_unlock (&acc_dev->lock);
601 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
602 }
603
604 d = (void *) (n->tgt->tgt_start + n->tgt_offset
605 + (uintptr_t) h - n->host_start);
606
607 gomp_mutex_unlock (&acc_dev->lock);
608
609 if (is_dev)
610 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
611 else
612 acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
613 }
614
615 void
616 acc_update_device (void *h, size_t s)
617 {
618 update_dev_host (1, h, s);
619 }
620
621 void
622 acc_update_self (void *h, size_t s)
623 {
624 update_dev_host (0, h, s);
625 }
626
627 void
628 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
629 void *kinds)
630 {
631 struct target_mem_desc *tgt;
632 struct goacc_thread *thr = goacc_thread ();
633 struct gomp_device_descr *acc_dev = thr->dev;
634
635 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
636 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
637 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
638 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
639
640 gomp_mutex_lock (&acc_dev->lock);
641 tgt->prev = acc_dev->openacc.data_environ;
642 acc_dev->openacc.data_environ = tgt;
643 gomp_mutex_unlock (&acc_dev->lock);
644 }
645
646 void
647 gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
648 {
649 struct goacc_thread *thr = goacc_thread ();
650 struct gomp_device_descr *acc_dev = thr->dev;
651 splay_tree_key n;
652 struct target_mem_desc *t;
653 int minrefs = (mapnum == 1) ? 2 : 3;
654
655 gomp_mutex_lock (&acc_dev->lock);
656
657 n = lookup_host (acc_dev, h, 1);
658
659 if (!n)
660 {
661 gomp_mutex_unlock (&acc_dev->lock);
662 gomp_fatal ("%p is not a mapped block", (void *)h);
663 }
664
665 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
666
667 t = n->tgt;
668
669 struct target_mem_desc *tp;
670
671 if (t->refcount == minrefs)
672 {
673 /* This is the last reference, so pull the descriptor off the
674 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
675 freeing the device memory. */
676 t->tgt_end = 0;
677 t->to_free = 0;
678
679 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
680 tp = t, t = t->prev)
681 {
682 if (n->tgt == t)
683 {
684 if (tp)
685 tp->prev = t->prev;
686 else
687 acc_dev->openacc.data_environ = t->prev;
688 break;
689 }
690 }
691 }
692
693 if (force_copyfrom)
694 t->list[0].copy_from = 1;
695
696 gomp_mutex_unlock (&acc_dev->lock);
697
698 /* If running synchronously, unmap immediately. */
699 if (async < acc_async_noval)
700 gomp_unmap_vars (t, true);
701 else
702 {
703 gomp_copy_from_async (t);
704 acc_dev->openacc.register_async_cleanup_func (t);
705 }
706
707 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
708 }