Fortran] Use proper type for hidden is-present argument
[gcc.git] / liboffloadmic / runtime / offload_omp_host.cpp
1 /*
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 #include <omp.h>
32 //#include <stdlib.h>
33 //#include "offload.h"
34 #include "compiler_if_host.h"
35
36
37 // OpenMP API
38
39 void omp_set_default_device(int num) __GOMP_NOTHROW
40 {
41 if (num >= 0) {
42 __omp_device_num = num;
43 }
44 }
45
46 int omp_get_default_device(void) __GOMP_NOTHROW
47 {
48 return __omp_device_num;
49 }
50
51 int omp_get_num_devices() __GOMP_NOTHROW
52 {
53 __offload_init_library();
54 return mic_engines_total;
55 }
56
57 // OpenMP 4.5 APIs
58
59 // COI supports 3-dim multiD transfers
60 #define MAX_ARRAY_RANK 3
61
62 int omp_get_initial_device(
63 void
64 ) __GOMP_NOTHROW
65 {
66 return -1;
67 }
68
69 void* omp_target_alloc(
70 size_t size,
71 int device_num
72 ) __GOMP_NOTHROW
73 {
74 __offload_init_library();
75
76 OFFLOAD_TRACE(2, "omp_target_alloc(%lld, %d)\n", size, device_num);
77
78 if (device_num < -1) {
79 LIBOFFLOAD_ERROR(c_invalid_device_number);
80 exit(1);
81 }
82
83 void* result = 0;
84
85 // malloc on CPU
86 if (device_num == -1) {
87 // We do not check for malloc returning NULL because the
88 // specification of this API includes the possibility of failure.
89 // The user will check the returned result
90 result = malloc(size);
91 return result;
92 }
93
94 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
95 TARGET_MIC, device_num, 0, NULL, __func__, 0);
96 if (ofld != 0) {
97 VarDesc vars[2] = {0};
98
99 vars[0].type.src = c_data;
100 vars[0].type.dst = c_data;
101 vars[0].direction.bits = c_parameter_in;
102 vars[0].size = sizeof(size);
103 vars[0].count = 1;
104 vars[0].ptr = &size;
105
106 vars[1].type.src = c_data;
107 vars[1].type.dst = c_data;
108 vars[1].direction.bits = c_parameter_out;
109 vars[1].size = sizeof(result);
110 vars[1].count = 1;
111 vars[1].ptr = &result;
112
113 OFFLOAD_OFFLOAD(ofld, "omp_target_alloc_target",
114 0, 2, vars, NULL, 0, 0, 0);
115 }
116 return result;
117 }
118
119 void omp_target_free(
120 void *device_ptr,
121 int device_num
122 ) __GOMP_NOTHROW
123 {
124 __offload_init_library();
125
126 OFFLOAD_TRACE(2, "omp_target_free(%p, %d)\n", device_ptr, device_num);
127
128 if (device_num < -1) {
129 LIBOFFLOAD_ERROR(c_invalid_device_number);
130 exit(1);
131 }
132
133 // free on CPU
134 if (device_num == -1) {
135 free(device_ptr);
136 return;
137 }
138
139 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
140 TARGET_MIC, device_num, 0, NULL, __func__, 0);
141 if (ofld) {
142 VarDesc vars[1] = {0};
143
144 vars[0].type.src = c_data;
145 vars[0].type.dst = c_data;
146 vars[0].direction.bits = c_parameter_in;
147 vars[0].size = sizeof(device_ptr);
148 vars[0].count = 1;
149 vars[0].ptr = &device_ptr;
150
151 OFFLOAD_OFFLOAD(ofld, "omp_target_free_target",
152 0, 1, vars, NULL, 0, 0, 0);
153 }
154 }
155
156 int omp_target_is_present(
157 void *ptr,
158 int device_num
159 ) __GOMP_NOTHROW
160 {
161 __offload_init_library();
162
163 OFFLOAD_TRACE(2, "omp_target_is_present(%p, %d)\n", ptr, device_num);
164
165 if (device_num < -1) {
166 LIBOFFLOAD_ERROR(c_invalid_device_number);
167 exit(1);
168 }
169
170 if (device_num == -1) {
171 return false;
172 }
173
174 // If OpenMP allows wrap-around for device numbers, enable next line
175 //device_num %= mic_engines_total;
176
177 // lookup existing association in pointer table
178 PtrData* ptr_data = mic_engines[device_num].find_ptr_data(ptr);
179 if (ptr_data == 0) {
180 OFFLOAD_TRACE(3, "Address %p is not mapped on device %d\n",
181 ptr, device_num);
182 return false;
183 }
184
185 OFFLOAD_TRACE(3, "Address %p found mapped on device %d\n",
186 ptr, device_num);
187 return true;
188 }
189
190 int omp_target_memcpy(
191 void *dst,
192 void *src,
193 size_t length,
194 size_t dst_offset,
195 size_t src_offset,
196 int dst_device,
197 int src_device
198 ) __GOMP_NOTHROW
199 {
200 __offload_init_library();
201
202 OFFLOAD_TRACE(2, "omp_target_memcpy(%p, %p, %lld, %lld, %lld, %d, %d)\n",
203 dst, src, length, dst_offset, src_offset, dst_device, src_device);
204
205 if (dst_device < -1 || src_device < -1) {
206 LIBOFFLOAD_ERROR(c_invalid_device_number);
207 exit(1);
208 }
209
210 char* srcp = (char *)src + src_offset;
211 char* dstp = (char *)dst + dst_offset;
212
213 if (src_device == -1) {
214 // Source is CPU
215 if (dst_device == -1) {
216 // CPU -> CPU
217 memcpy(dstp, srcp, length);
218 return 0;
219 } else {
220 // CPU -> MIC
221 // COIBufferWrite
222 // If OpenMP allows wrap-around for device numbers, enable next line
223 //dst_device %= mic_engines_total;
224
225 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", dstp);
226 COIBUFFER mic_buf;
227 COIRESULT res = COI::BufferCreateFromMemory(length,
228 COI_BUFFER_NORMAL, COI_SINK_MEMORY, dstp,
229 1, &mic_engines[dst_device].get_process(),
230 &mic_buf);
231 if (res != COI_SUCCESS) {
232 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
233 return 1;
234 }
235 res = COI::BufferWrite(mic_buf, 0, srcp, length,
236 COI_COPY_UNSPECIFIED, 0, 0, 0);
237 if (res != COI_SUCCESS) {
238 LIBOFFLOAD_ERROR(c_buf_write, res);
239 return 1;
240 }
241 res = COI::BufferDestroy(mic_buf);
242 if (res != COI_SUCCESS) {
243 LIBOFFLOAD_ERROR(c_buf_destroy, res);
244 return 1;
245 }
246 return 0;
247 }
248 } else {
249 // Source is device
250 if (dst_device == -1) {
251 // MIC -> CPU
252 // COIBufferRead
253
254 // If OpenMP allows wrap-around for device numbers, enable next line
255 //src_device %= mic_engines_total;
256
257 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", srcp);
258 COIBUFFER mic_buf;
259 COIRESULT res = COI::BufferCreateFromMemory(length,
260 COI_BUFFER_NORMAL, COI_SINK_MEMORY, srcp,
261 1, &mic_engines[src_device].get_process(),
262 &mic_buf);
263 if (res != COI_SUCCESS) {
264 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
265 return 1;
266 }
267 res = COI::BufferRead(mic_buf, 0, dstp, length,
268 COI_COPY_UNSPECIFIED, 0, 0, 0);
269 if (res != COI_SUCCESS) {
270 LIBOFFLOAD_ERROR(c_buf_read, res);
271 return 1;
272 }
273 res = COI::BufferDestroy(mic_buf);
274 if (res != COI_SUCCESS) {
275 LIBOFFLOAD_ERROR(c_buf_destroy, res);
276 return 1;
277 }
278 return 0;
279 } else {
280 // some MIC -> some MIC
281 if (src_device == dst_device) {
282 // MIC local copy will be done as remote memcpy
283
284 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
285 0, NULL, __func__, 0);
286 if (ofld) {
287 VarDesc vars[3] = {0};
288
289 vars[0].type.src = c_data;
290 vars[0].type.dst = c_data;
291 vars[0].direction.bits = c_parameter_in;
292 vars[0].size = sizeof(dstp);
293 vars[0].count = 1;
294 vars[0].ptr = &dstp;
295
296 vars[1].type.src = c_data;
297 vars[1].type.dst = c_data;
298 vars[1].direction.bits = c_parameter_in;
299 vars[1].size = sizeof(srcp);
300 vars[1].count = 1;
301 vars[1].ptr = &srcp;
302
303 vars[2].type.src = c_data;
304 vars[2].type.dst = c_data;
305 vars[2].direction.bits = c_parameter_in;
306 vars[2].size = sizeof(length);
307 vars[2].count = 1;
308 vars[2].ptr = &length;
309
310 OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_target",
311 0, 3, vars, NULL, 0, 0, 0);
312 return 0;
313 } else {
314 return 1;
315 }
316 } else {
317 // MICx -> MICy
318 // Allocate CPU buffer
319 char *cpu_mem = (char *)malloc(length);
320 if (cpu_mem == 0) {
321 LIBOFFLOAD_ERROR(c_malloc);
322 return 1;
323 }
324 int retval = 1;
325 if (omp_target_memcpy(
326 cpu_mem, srcp, length, 0, 0, -1, src_device) == 0) {
327 retval = omp_target_memcpy(
328 dstp, cpu_mem, length, 0, 0, dst_device, -1);
329 }
330 free(cpu_mem);
331 return retval;
332 }
333 }
334 }
335 }
336
337 static size_t bytesize_at_this_dimension(
338 size_t element_size,
339 int num_dims,
340 const size_t* dimensions
341 )
342 {
343 if (num_dims > 1) {
344 return dimensions[1] *
345 bytesize_at_this_dimension(
346 element_size, num_dims-1, dimensions+1);
347 } else {
348 return element_size;
349 }
350 }
351
352 static void memcpy_rect(
353 char *dst,
354 char *src,
355 size_t element_size,
356 int num_dims,
357 const size_t *volume,
358 const size_t *dst_offsets,
359 const size_t *src_offsets,
360 const size_t *dst_dimensions,
361 const size_t *src_dimensions
362 )
363 {
364 if (num_dims > 1) {
365 int count = volume[0];
366 int dst_index = dst_offsets[0];
367 int src_index = src_offsets[0];
368 size_t dst_element_size =
369 bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
370 size_t src_element_size =
371 bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
372 for (; count>0; dst_index++, src_index++, count--) {
373 memcpy_rect(dst+dst_element_size*dst_index,
374 src+src_element_size*src_index,
375 element_size, num_dims-1, volume+1,
376 dst_offsets+1, src_offsets+1,
377 dst_dimensions+1, src_dimensions+1);
378 }
379 } else {
380 memcpy(dst+dst_offsets[0]*element_size,
381 src+src_offsets[0]*element_size,
382 element_size * volume[0]);
383 }
384 }
385
386 int omp_target_memcpy_rect(
387 void *dst_,
388 void *src_,
389 size_t element_size,
390 int num_dims,
391 const size_t *volume,
392 const size_t *dst_offsets,
393 const size_t *src_offsets,
394 const size_t *dst_dimensions,
395 const size_t *src_dimensions,
396 int dst_device,
397 int src_device
398 ) __GOMP_NOTHROW
399 {
400 char *dst = (char *)dst_;
401 char *src = (char *)src_;
402
403 __offload_init_library();
404
405 OFFLOAD_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d, "
406 "%p, %p, %p, %p, %p, %d, %d)\n",
407 dst, src, element_size, num_dims,
408 volume, dst_offsets, src_offsets,
409 dst_dimensions, src_dimensions, dst_device, src_device);
410
411 // MAX_ARRAY_RANK dimensions are supported
412 if (dst == 0 && src == 0) {
413 return MAX_ARRAY_RANK;
414 }
415
416 if (num_dims < 1 || num_dims > MAX_ARRAY_RANK ||
417 element_size < 1 ||
418 volume == 0 || dst_offsets == 0 || src_offsets == 0 ||
419 dst_dimensions == 0 || src_dimensions == 0) {
420 return 1;
421 }
422
423 if (dst_device < -1 || src_device < -1) {
424 LIBOFFLOAD_ERROR(c_invalid_device_number);
425 exit(1);
426 }
427
428 if (src_device == -1) {
429 // Source is CPU
430 if (dst_device == -1) {
431 // CPU -> CPU
432 memcpy_rect((char*)dst, (char*)src, element_size, num_dims, volume,
433 dst_offsets, src_offsets,
434 dst_dimensions, src_dimensions);
435 return 0;
436 } else {
437 // CPU -> MIC
438 // COIBufferWriteMultiD
439 struct arr_desc dst_desc;
440 struct arr_desc src_desc;
441
442 dst_desc.base = (int64_t)dst;
443 dst_desc.rank = num_dims;
444
445 src_desc.base = (int64_t)src;
446 src_desc.rank = num_dims;
447
448 for (int i=0; i<num_dims; i++)
449 {
450 dst_desc.dim[i].size = bytesize_at_this_dimension(
451 element_size,
452 num_dims - i,
453 dst_dimensions + i);
454 dst_desc.dim[i].lindex = 0;
455 dst_desc.dim[i].lower = dst_offsets[i];
456 dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
457 dst_desc.dim[i].stride = 1;
458
459 src_desc.dim[i].size = bytesize_at_this_dimension(
460 element_size,
461 num_dims - i,
462 src_dimensions + i);
463 src_desc.dim[i].lindex = 0;
464 src_desc.dim[i].lower = src_offsets[i];
465 src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
466 src_desc.dim[i].stride = 1;
467 }
468 __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
469 __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
470
471 // If OpenMP allows wrap-around for device numbers, enable next line
472 //dst_device %= mic_engines_total;
473
474 // Compute MIC buffer size
475 size_t dst_length = dst_dimensions[0] * bytesize_at_this_dimension(
476 element_size,
477 num_dims,
478 dst_dimensions);
479
480 OFFLOAD_TRACE(3,
481 "Creating buffer from sink memory %llx of size %lld\n",
482 dst, dst_length);
483 COIBUFFER mic_buf;
484 COIRESULT res = COI::BufferCreateFromMemory(dst_length,
485 COI_BUFFER_NORMAL, COI_SINK_MEMORY, dst,
486 1, &mic_engines[dst_device].get_process(),
487 &mic_buf);
488 if (res != COI_SUCCESS) {
489 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
490 return 1;
491 }
492 res = COI::BufferWriteMultiD(mic_buf,
493 mic_engines[dst_device].get_process(),
494 0, &dst_desc, &src_desc,
495 COI_COPY_UNSPECIFIED, 0, 0, 0);
496 if (res != COI_SUCCESS) {
497 LIBOFFLOAD_ERROR(c_buf_write, res);
498 return 1;
499 }
500 res = COI::BufferDestroy(mic_buf);
501 if (res != COI_SUCCESS) {
502 LIBOFFLOAD_ERROR(c_buf_destroy, res);
503 return 1;
504 }
505 return 0;
506 }
507 } else {
508 // Source is device
509 if (dst_device == -1) {
510 // COIBufferReadMultiD
511 struct arr_desc dst_desc;
512 struct arr_desc src_desc;
513
514 dst_desc.base = (int64_t)dst;
515 dst_desc.rank = num_dims;
516
517 src_desc.base = (int64_t)src;
518 src_desc.rank = num_dims;
519
520 for (int i=0; i<num_dims; i++)
521 {
522 dst_desc.dim[i].size = bytesize_at_this_dimension(
523 element_size,
524 num_dims - i,
525 dst_dimensions + i);
526 dst_desc.dim[i].lindex = 0;
527 dst_desc.dim[i].lower = dst_offsets[i];
528 dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
529 dst_desc.dim[i].stride = 1;
530
531 src_desc.dim[i].size = bytesize_at_this_dimension(
532 element_size,
533 num_dims - i,
534 src_dimensions + i);
535 src_desc.dim[i].lindex = 0;
536 src_desc.dim[i].lower = src_offsets[i];
537 src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
538 src_desc.dim[i].stride = 1;
539 }
540 __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
541 __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
542
543 // If OpenMP allows wrap-around for device numbers, enable next line
544 //src_device %= mic_engines_total;
545
546 // Compute MIC buffer size
547 size_t src_length = src_dimensions[0] * bytesize_at_this_dimension(
548 element_size,
549 num_dims,
550 src_dimensions);
551
552 OFFLOAD_TRACE(3,
553 "Creating buffer from sink memory %llx of size %lld\n",
554 src, src_length);
555 COIBUFFER mic_buf;
556 COIRESULT res = COI::BufferCreateFromMemory(src_length,
557 COI_BUFFER_NORMAL, COI_SINK_MEMORY, src,
558 1, &mic_engines[src_device].get_process(),
559 &mic_buf);
560 if (res != COI_SUCCESS) {
561 LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
562 return 1;
563 }
564 res = COI::BufferReadMultiD(mic_buf, 0,
565 &dst_desc, &src_desc,
566 COI_COPY_UNSPECIFIED, 0, 0, 0);
567 if (res != COI_SUCCESS) {
568 LIBOFFLOAD_ERROR(c_buf_write, res);
569 return 1;
570 }
571 res = COI::BufferDestroy(mic_buf);
572 if (res != COI_SUCCESS) {
573 LIBOFFLOAD_ERROR(c_buf_destroy, res);
574 return 1;
575 }
576 return 0;
577 } else {
578 // some MIC -> some MIC
579 if (src_device == dst_device) {
580 // MIC local copy will be done as remote memcpy_rect
581 struct parameters {
582 void *dst;
583 void *src;
584 size_t element_size;
585 int num_dims;
586 size_t array_info[MAX_ARRAY_RANK*5];
587 } parameters = {dst, src, element_size, num_dims};
588 int result;
589
590 for (int i=0; i<num_dims; i++)
591 {
592 parameters.array_info[i] = volume[i];
593 parameters.array_info[i+num_dims] = dst_offsets[i];
594 parameters.array_info[i+num_dims*2] = src_offsets[i];
595 parameters.array_info[i+num_dims*3] = dst_dimensions[i];
596 parameters.array_info[i+num_dims*4] = src_dimensions[i];
597 }
598
599 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
600 0, NULL, __func__, 0);
601 if (ofld) {
602 VarDesc vars[1] = {0};
603
604 vars[0].type.src = c_data;
605 vars[0].type.dst = c_data;
606 vars[0].direction.bits = c_parameter_in;
607 vars[0].size = sizeof(parameters) -
608 (MAX_ARRAY_RANK - num_dims) *
609 5 * sizeof(size_t);
610 vars[0].count = 1;
611 vars[0].ptr = &parameters;
612
613 OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_rect_target",
614 0, 1, vars, NULL, 0, 0, 0);
615 return 0;
616 } else {
617 return 1;
618 }
619 } else {
620 // MICx -> MICy
621
622 // Compute transfer byte-count
623 size_t dst_length = element_size;
624 for (int i=0; i<num_dims; i++) {
625 dst_length *= volume[i];
626 }
627
628 // Allocate CPU buffer
629 char *cpu_mem = (char *)malloc(dst_length);
630 if (cpu_mem == 0) {
631 LIBOFFLOAD_ERROR(c_malloc);
632 return 1;
633 }
634
635 // Create CPU offset and dimension arrays
636 // The CPU array collects the data in a contiguous block
637 size_t cpu_offsets[MAX_ARRAY_RANK];
638 size_t cpu_dimensions[MAX_ARRAY_RANK];
639 for (int i=0; i<num_dims; i++) {
640 cpu_offsets[i] = 0;
641 cpu_dimensions[i] = volume[i];
642 }
643
644 int retval = 1;
645 if (omp_target_memcpy_rect(
646 cpu_mem, src, element_size, num_dims, volume,
647 cpu_offsets, src_offsets,
648 cpu_dimensions, src_dimensions,
649 -1, src_device) == 0) {
650 retval = omp_target_memcpy_rect(
651 dst, cpu_mem, element_size, num_dims, volume,
652 dst_offsets, cpu_offsets,
653 dst_dimensions, cpu_dimensions,
654 dst_device, -1);
655 }
656 free(cpu_mem);
657 return retval;
658 }
659 }
660 }
661 }
662
663 // host_ptr is key in table that yields association on device
664 // A COIBUFFER of specified size is created from the memory at
665 // device_ptr+device_offset on device_num
666 int omp_target_associate_ptr(
667 void *host_ptr,
668 void *device_ptr,
669 size_t size,
670 size_t device_offset,
671 int device_num
672 ) __GOMP_NOTHROW
673 {
674 COIRESULT res;
675
676 __offload_init_library();
677
678 OFFLOAD_TRACE(2, "omp_target_associate_ptr(%p, %p, %lld, %lld, %d)\n",
679 host_ptr, device_ptr, size, device_offset, device_num);
680
681 if (device_num < -1) {
682 LIBOFFLOAD_ERROR(c_invalid_device_number);
683 exit(1);
684 }
685
686 // Associating to CPU is treated as failure
687 if (device_num == -1) {
688 return 1;
689 }
690
691 // An incorrect size is treated as failure
692 if (size < 0) {
693 return 1;
694 }
695
696 // If OpenMP allows wrap-around for device numbers, enable next line
697 //Engine& device = mic_engines[device_num % mic_engines_total];
698 Engine& device = mic_engines[device_num];
699
700 // Does host pointer have association already?
701 // lookup existing association in pointer table
702 PtrData* ptr_data = device.find_ptr_data(host_ptr);
703 if (ptr_data != 0) {
704 OFFLOAD_TRACE(3, "Address %p is already mapped on device %d\n",
705 host_ptr, device_num);
706 // Is current device pointer and offset same as existing?
707 if ((void*)ptr_data->mic_addr == device_ptr &&
708 (size_t)ptr_data->alloc_disp == device_offset) {
709 return 0;
710 } else {
711 return 1;
712 }
713 }
714
715 // Create association
716 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
717 host_ptr, size);
718
719 bool is_new;
720 ptr_data = device.insert_ptr_data(host_ptr, size, is_new);
721 ptr_data->is_omp_associate = true;
722
723 // create CPU buffer
724 OFFLOAD_TRACE(3,
725 "Creating buffer from source memory %p, length %lld\n",
726 host_ptr, size);
727
728 // result is not checked because we can continue without cpu
729 // buffer. In this case we will use COIBufferRead/Write
730 // instead of COIBufferCopy.
731
732 COI::BufferCreateFromMemory(size,
733 COI_BUFFER_OPENCL,
734 0,
735 host_ptr,
736 1,
737 &device.get_process(),
738 &ptr_data->cpu_buf);
739
740 // create MIC buffer
741 OFFLOAD_TRACE(3,
742 "Creating buffer from sink memory: addr %p, size %lld\n",
743 (char *)device_ptr + device_offset, size);
744 res = COI::BufferCreateFromMemory(size,
745 COI_BUFFER_NORMAL,
746 COI_SINK_MEMORY,
747 device_ptr,
748 1,
749 &device.get_process(),
750 &ptr_data->mic_buf);
751 if (res != COI_SUCCESS) {
752 ptr_data->alloc_ptr_data_lock.unlock();
753 return 1;
754 }
755
756 // make buffer valid on the device.
757 res = COI::BufferSetState(ptr_data->mic_buf,
758 device.get_process(),
759 COI_BUFFER_VALID,
760 COI_BUFFER_NO_MOVE,
761 0, 0, 0);
762 if (res != COI_SUCCESS) {
763 ptr_data->alloc_ptr_data_lock.unlock();
764 return 1;
765 }
766
767 res = COI::BufferSetState(ptr_data->mic_buf,
768 COI_PROCESS_SOURCE,
769 COI_BUFFER_INVALID,
770 COI_BUFFER_NO_MOVE,
771 0, 0, 0);
772 if (res != COI_SUCCESS) {
773 ptr_data->alloc_ptr_data_lock.unlock();
774 return 1;
775 }
776 ptr_data->alloc_disp = device_offset;
777 ptr_data->alloc_ptr_data_lock.unlock();
778
779 return 0;
780 }
781
782 int omp_target_disassociate_ptr(
783 void *host_ptr,
784 int device_num
785 ) __GOMP_NOTHROW
786 {
787 COIRESULT res;
788
789 __offload_init_library();
790
791 OFFLOAD_TRACE(2, "omp_target_disassociate_ptr(%p, %d)\n",
792 host_ptr, device_num);
793
794 if (device_num < -1) {
795 LIBOFFLOAD_ERROR(c_invalid_device_number);
796 exit(1);
797 }
798
799 // Dissociating from CPU is treated as failure
800 if (device_num == -1) {
801 return 1;
802 }
803
804 // If OpenMP allows wrap-around for device numbers, enable next line
805 //Engine& device = mic_engines[device_num % mic_engines_total];
806 Engine& device = mic_engines[device_num];
807
808 // Lookup existing association in pointer table
809 PtrData* ptr_data = device.find_ptr_data(host_ptr);
810
811 // Attempt to disassociate unassociated pointer is a failure
812 if (ptr_data == 0) {
813 return 1;
814 }
815
816 // Destroy buffers
817 if (ptr_data->cpu_buf != 0) {
818 OFFLOAD_TRACE(3, "Destroying CPU buffer %p\n", ptr_data->cpu_buf);
819 COI::BufferDestroy(ptr_data->cpu_buf);
820 }
821 if (ptr_data->mic_buf != 0) {
822 OFFLOAD_TRACE(3, "Destroying MIC buffer %p\n", ptr_data->mic_buf);
823 COI::BufferDestroy(ptr_data->mic_buf);
824 }
825
826 // Remove association from map
827 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
828 ptr_data->cpu_addr.start());
829 device.remove_ptr_data(ptr_data->cpu_addr.start());
830
831 return 0;
832 }
833
834 // End of OpenMP 4.5 APIs
835
836
837 // OpenMP API wrappers
838
839 static void omp_set_int_target(
840 TARGET_TYPE target_type,
841 int target_number,
842 int setting,
843 const char* f_name
844 )
845 {
846 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
847 f_name, 0);
848 if (ofld) {
849 VarDesc vars[1] = {0};
850
851 vars[0].type.src = c_data;
852 vars[0].type.dst = c_data;
853 vars[0].direction.bits = c_parameter_in;
854 vars[0].size = sizeof(int);
855 vars[0].count = 1;
856 vars[0].ptr = &setting;
857
858 OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
859 }
860 }
861
862 static int omp_get_int_target(
863 TARGET_TYPE target_type,
864 int target_number,
865 const char * f_name
866 )
867 {
868 int setting = 0;
869
870 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
871 f_name, 0);
872 if (ofld) {
873 VarDesc vars[1] = {0};
874
875 vars[0].type.src = c_data;
876 vars[0].type.dst = c_data;
877 vars[0].direction.bits = c_parameter_out;
878 vars[0].size = sizeof(int);
879 vars[0].count = 1;
880 vars[0].ptr = &setting;
881
882 OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
883 }
884 return setting;
885 }
886
887 void omp_set_num_threads_target(
888 TARGET_TYPE target_type,
889 int target_number,
890 int num_threads
891 )
892 {
893 omp_set_int_target(target_type, target_number, num_threads,
894 "omp_set_num_threads_target");
895 }
896
897 int omp_get_max_threads_target(
898 TARGET_TYPE target_type,
899 int target_number
900 )
901 {
902 return omp_get_int_target(target_type, target_number,
903 "omp_get_max_threads_target");
904 }
905
906 int omp_get_num_procs_target(
907 TARGET_TYPE target_type,
908 int target_number
909 )
910 {
911 return omp_get_int_target(target_type, target_number,
912 "omp_get_num_procs_target");
913 }
914
915 void omp_set_dynamic_target(
916 TARGET_TYPE target_type,
917 int target_number,
918 int num_threads
919 )
920 {
921 omp_set_int_target(target_type, target_number, num_threads,
922 "omp_set_dynamic_target");
923 }
924
925 int omp_get_dynamic_target(
926 TARGET_TYPE target_type,
927 int target_number
928 )
929 {
930 return omp_get_int_target(target_type, target_number,
931 "omp_get_dynamic_target");
932 }
933
934 void omp_set_nested_target(
935 TARGET_TYPE target_type,
936 int target_number,
937 int nested
938 )
939 {
940 omp_set_int_target(target_type, target_number, nested,
941 "omp_set_nested_target");
942 }
943
944 int omp_get_nested_target(
945 TARGET_TYPE target_type,
946 int target_number
947 )
948 {
949 return omp_get_int_target(target_type, target_number,
950 "omp_get_nested_target");
951 }
952
953 void omp_set_schedule_target(
954 TARGET_TYPE target_type,
955 int target_number,
956 omp_sched_t kind,
957 int modifier
958 )
959 {
960 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
961 __func__, 0);
962 if (ofld != 0) {
963 VarDesc vars[2] = {0};
964
965 vars[0].type.src = c_data;
966 vars[0].type.dst = c_data;
967 vars[0].direction.bits = c_parameter_in;
968 vars[0].size = sizeof(omp_sched_t);
969 vars[0].count = 1;
970 vars[0].ptr = &kind;
971
972 vars[1].type.src = c_data;
973 vars[1].type.dst = c_data;
974 vars[1].direction.bits = c_parameter_in;
975 vars[1].size = sizeof(int);
976 vars[1].count = 1;
977 vars[1].ptr = &modifier;
978
979 OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
980 0, 2, vars, NULL, 0, 0, 0);
981 }
982 }
983
984 void omp_get_schedule_target(
985 TARGET_TYPE target_type,
986 int target_number,
987 omp_sched_t *kind,
988 int *modifier
989 )
990 {
991 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
992 __func__, 0);
993 if (ofld != 0) {
994 VarDesc vars[2] = {0};
995
996 vars[0].type.src = c_data;
997 vars[0].type.dst = c_data;
998 vars[0].direction.bits = c_parameter_out;
999 vars[0].size = sizeof(omp_sched_t);
1000 vars[0].count = 1;
1001 vars[0].ptr = kind;
1002
1003 vars[1].type.src = c_data;
1004 vars[1].type.dst = c_data;
1005 vars[1].direction.bits = c_parameter_out;
1006 vars[1].size = sizeof(int);
1007 vars[1].count = 1;
1008 vars[1].ptr = modifier;
1009
1010 OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
1011 0, 2, vars, NULL, 0, 0, 0);
1012 }
1013 }
1014
1015 // lock API functions
1016
1017 void omp_init_lock_target(
1018 TARGET_TYPE target_type,
1019 int target_number,
1020 omp_lock_target_t *lock
1021 )
1022 {
1023 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1024 __func__, 0);
1025 if (ofld != 0) {
1026 VarDesc vars[1] = {0};
1027
1028 vars[0].type.src = c_data;
1029 vars[0].type.dst = c_data;
1030 vars[0].direction.bits = c_parameter_out;
1031 vars[0].size = sizeof(omp_lock_target_t);
1032 vars[0].count = 1;
1033 vars[0].ptr = lock;
1034
1035 OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
1036 0, 1, vars, NULL, 0, 0, 0);
1037 }
1038 }
1039
1040 void omp_destroy_lock_target(
1041 TARGET_TYPE target_type,
1042 int target_number,
1043 omp_lock_target_t *lock
1044 )
1045 {
1046 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1047 __func__, 0);
1048 if (ofld != 0) {
1049 VarDesc vars[1] = {0};
1050
1051 vars[0].type.src = c_data;
1052 vars[0].type.dst = c_data;
1053 vars[0].direction.bits = c_parameter_in;
1054 vars[0].size = sizeof(omp_lock_target_t);
1055 vars[0].count = 1;
1056 vars[0].ptr = lock;
1057
1058 OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
1059 0, 1, vars, NULL, 0, 0, 0);
1060 }
1061 }
1062
1063 void omp_set_lock_target(
1064 TARGET_TYPE target_type,
1065 int target_number,
1066 omp_lock_target_t *lock
1067 )
1068 {
1069 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1070 __func__, 0);
1071 if (ofld != 0) {
1072 VarDesc vars[1] = {0};
1073
1074 vars[0].type.src = c_data;
1075 vars[0].type.dst = c_data;
1076 vars[0].direction.bits = c_parameter_inout;
1077 vars[0].size = sizeof(omp_lock_target_t);
1078 vars[0].count = 1;
1079 vars[0].ptr = lock;
1080
1081 OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
1082 0, 1, vars, NULL, 0, 0, 0);
1083 }
1084 }
1085
1086 void omp_unset_lock_target(
1087 TARGET_TYPE target_type,
1088 int target_number,
1089 omp_lock_target_t *lock
1090 )
1091 {
1092 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1093 __func__, 0);
1094 if (ofld != 0) {
1095 VarDesc vars[1] = {0};
1096
1097 vars[0].type.src = c_data;
1098 vars[0].type.dst = c_data;
1099 vars[0].direction.bits = c_parameter_inout;
1100 vars[0].size = sizeof(omp_lock_target_t);
1101 vars[0].count = 1;
1102 vars[0].ptr = lock;
1103
1104 OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
1105 0, 1, vars, NULL, 0, 0, 0);
1106 }
1107 }
1108
1109 int omp_test_lock_target(
1110 TARGET_TYPE target_type,
1111 int target_number,
1112 omp_lock_target_t *lock
1113 )
1114 {
1115 int result = 0;
1116
1117 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1118 __func__, 0);
1119 if (ofld != 0) {
1120 VarDesc vars[2] = {0};
1121
1122 vars[0].type.src = c_data;
1123 vars[0].type.dst = c_data;
1124 vars[0].direction.bits = c_parameter_inout;
1125 vars[0].size = sizeof(omp_lock_target_t);
1126 vars[0].count = 1;
1127 vars[0].ptr = lock;
1128
1129 vars[1].type.src = c_data;
1130 vars[1].type.dst = c_data;
1131 vars[1].direction.bits = c_parameter_out;
1132 vars[1].size = sizeof(int);
1133 vars[1].count = 1;
1134 vars[1].ptr = &result;
1135
1136 OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
1137 0, 2, vars, NULL, 0, 0, 0);
1138 }
1139 return result;
1140 }
1141
1142 // nested lock API functions
1143
1144 void omp_init_nest_lock_target(
1145 TARGET_TYPE target_type,
1146 int target_number,
1147 omp_nest_lock_target_t *lock
1148 )
1149 {
1150 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1151 __func__, 0);
1152 if (ofld != 0) {
1153 VarDesc vars[1] = {0};
1154
1155 vars[0].type.src = c_data;
1156 vars[0].type.dst = c_data;
1157 vars[0].direction.bits = c_parameter_out;
1158 vars[0].size = sizeof(omp_nest_lock_target_t);
1159 vars[0].count = 1;
1160 vars[0].ptr = lock;
1161
1162 OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
1163 0, 1, vars, NULL, 0, 0, 0);
1164 }
1165 }
1166
1167 void omp_destroy_nest_lock_target(
1168 TARGET_TYPE target_type,
1169 int target_number,
1170 omp_nest_lock_target_t *lock
1171 )
1172 {
1173 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1174 __func__, 0);
1175 if (ofld != 0) {
1176 VarDesc vars[1] = {0};
1177
1178 vars[0].type.src = c_data;
1179 vars[0].type.dst = c_data;
1180 vars[0].direction.bits = c_parameter_in;
1181 vars[0].size = sizeof(omp_nest_lock_target_t);
1182 vars[0].count = 1;
1183 vars[0].ptr = lock;
1184
1185 OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
1186 0, 1, vars, NULL, 0, 0, 0);
1187 }
1188 }
1189
1190 void omp_set_nest_lock_target(
1191 TARGET_TYPE target_type,
1192 int target_number,
1193 omp_nest_lock_target_t *lock
1194 )
1195 {
1196 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1197 __func__, 0);
1198 if (ofld != 0) {
1199 VarDesc vars[1] = {0};
1200
1201 vars[0].type.src = c_data;
1202 vars[0].type.dst = c_data;
1203 vars[0].direction.bits = c_parameter_inout;
1204 vars[0].size = sizeof(omp_nest_lock_target_t);
1205 vars[0].count = 1;
1206 vars[0].ptr = lock;
1207
1208 OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
1209 0, 1, vars, NULL, 0, 0, 0);
1210 }
1211 }
1212
1213 void omp_unset_nest_lock_target(
1214 TARGET_TYPE target_type,
1215 int target_number,
1216 omp_nest_lock_target_t *lock
1217 )
1218 {
1219 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1220 __func__, 0);
1221 if (ofld != 0) {
1222 VarDesc vars[1] = {0};
1223
1224 vars[0].type.src = c_data;
1225 vars[0].type.dst = c_data;
1226 vars[0].direction.bits = c_parameter_inout;
1227 vars[0].size = sizeof(omp_nest_lock_target_t);
1228 vars[0].count = 1;
1229 vars[0].ptr = lock;
1230
1231 OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
1232 0, 1, vars, NULL, 0, 0, 0);
1233 }
1234 }
1235
1236 int omp_test_nest_lock_target(
1237 TARGET_TYPE target_type,
1238 int target_number,
1239 omp_nest_lock_target_t *lock
1240 )
1241 {
1242 int result = 0;
1243
1244 OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
1245 __func__, 0);
1246 if (ofld != 0) {
1247 VarDesc vars[2] = {0};
1248
1249 vars[0].type.src = c_data;
1250 vars[0].type.dst = c_data;
1251 vars[0].direction.bits = c_parameter_inout;
1252 vars[0].size = sizeof(omp_nest_lock_target_t);
1253 vars[0].count = 1;
1254 vars[0].ptr = lock;
1255
1256 vars[1].type.src = c_data;
1257 vars[1].type.dst = c_data;
1258 vars[1].direction.bits = c_parameter_out;
1259 vars[1].size = sizeof(int);
1260 vars[1].count = 1;
1261 vars[1].ptr = &result;
1262
1263 OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
1264 0, 2, vars, NULL, 0, 0, 0);
1265 }
1266 return result;
1267 }