2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
25 #include "util/bitscan.h"
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
32 using namespace clover
;
35 typedef resource::vector vector_t
;
38 vector(const size_t *p
) {
43 pitch(const vector_t
®ion
, vector_t pitch
) {
44 for (auto x
: zip(tail(pitch
),
45 map(multiplies(), region
, pitch
))) {
46 // The spec defines a value of zero as the natural pitch,
47 // i.e. the unaligned size of the previous dimension.
48 if (std::get
<0>(x
) == 0)
49 std::get
<0>(x
) = std::get
<1>(x
);
56 /// Size of a region in bytes.
59 size(const vector_t
&pitch
, const vector_t
®ion
) {
60 if (any_of(is_zero(), region
))
63 return dot(pitch
, region
- vector_t
{ 0, 1, 1 });
67 /// Common argument checking shared by memory transfer commands.
70 validate_common(command_queue
&q
,
71 const ref_vector
<event
> &deps
) {
72 if (any_of([&](const event
&ev
) {
73 return ev
.context() != q
.context();
75 throw error(CL_INVALID_CONTEXT
);
79 /// Common error checking for a buffer object argument.
82 validate_object(command_queue
&q
, buffer
&mem
, const vector_t
&origin
,
83 const vector_t
&pitch
, const vector_t
®ion
) {
84 if (mem
.context() != q
.context())
85 throw error(CL_INVALID_CONTEXT
);
87 // The region must fit within the specified pitch,
88 if (any_of(greater(), map(multiplies(), pitch
, region
), tail(pitch
)))
89 throw error(CL_INVALID_VALUE
);
91 // ...and within the specified object.
92 if (dot(pitch
, origin
) + size(pitch
, region
) > mem
.size())
93 throw error(CL_INVALID_VALUE
);
95 if (any_of(is_zero(), region
))
96 throw error(CL_INVALID_VALUE
);
100 /// Common error checking for an image argument.
103 validate_object(command_queue
&q
, image
&img
,
104 const vector_t
&orig
, const vector_t
®ion
) {
105 vector_t size
= { img
.width(), img
.height(), img
.depth() };
107 if (!q
.device().image_support())
108 throw error(CL_INVALID_OPERATION
);
110 if (img
.context() != q
.context())
111 throw error(CL_INVALID_CONTEXT
);
113 if (any_of(greater(), orig
+ region
, size
))
114 throw error(CL_INVALID_VALUE
);
116 if (any_of(is_zero(), region
))
117 throw error(CL_INVALID_VALUE
);
121 /// Common error checking for a host pointer argument.
124 validate_object(command_queue
&q
, const void *ptr
, const vector_t
&orig
,
125 const vector_t
&pitch
, const vector_t
®ion
) {
127 throw error(CL_INVALID_VALUE
);
129 // The region must fit within the specified pitch.
130 if (any_of(greater(), map(multiplies(), pitch
, region
), tail(pitch
)))
131 throw error(CL_INVALID_VALUE
);
135 /// Common argument checking for a copy between two buffer objects.
138 validate_copy(command_queue
&q
, buffer
&dst_mem
,
139 const vector_t
&dst_orig
, const vector_t
&dst_pitch
,
141 const vector_t
&src_orig
, const vector_t
&src_pitch
,
142 const vector_t
®ion
) {
143 if (dst_mem
== src_mem
) {
144 auto dst_offset
= dot(dst_pitch
, dst_orig
);
145 auto src_offset
= dot(src_pitch
, src_orig
);
147 if (interval_overlaps()(
148 dst_offset
, dst_offset
+ size(dst_pitch
, region
),
149 src_offset
, src_offset
+ size(src_pitch
, region
)))
150 throw error(CL_MEM_COPY_OVERLAP
);
155 /// Common argument checking for a copy between two image objects.
158 validate_copy(command_queue
&q
,
159 image
&dst_img
, const vector_t
&dst_orig
,
160 image
&src_img
, const vector_t
&src_orig
,
161 const vector_t
®ion
) {
162 if (dst_img
.format() != src_img
.format())
163 throw error(CL_IMAGE_FORMAT_MISMATCH
);
165 if (dst_img
== src_img
) {
166 if (all_of(interval_overlaps(),
167 dst_orig
, dst_orig
+ region
,
168 src_orig
, src_orig
+ region
))
169 throw error(CL_MEM_COPY_OVERLAP
);
174 /// Checks that the host access flags of the memory object are
175 /// within the allowed set \a flags.
178 validate_object_access(const memory_obj
&mem
, const cl_mem_flags flags
) {
179 if (mem
.flags() & ~flags
&
180 (CL_MEM_HOST_READ_ONLY
| CL_MEM_HOST_WRITE_ONLY
|
181 CL_MEM_HOST_NO_ACCESS
))
182 throw error(CL_INVALID_OPERATION
);
186 /// Checks that the mapping flags are correct.
189 validate_map_flags(const memory_obj
&mem
, const cl_map_flags flags
) {
190 if ((flags
& (CL_MAP_WRITE
| CL_MAP_READ
)) &&
191 (flags
& CL_MAP_WRITE_INVALIDATE_REGION
))
192 throw error(CL_INVALID_VALUE
);
194 if (flags
& CL_MAP_READ
)
195 validate_object_access(mem
, CL_MEM_HOST_READ_ONLY
);
197 if (flags
& (CL_MAP_WRITE
| CL_MAP_WRITE_INVALIDATE_REGION
))
198 validate_object_access(mem
, CL_MEM_HOST_WRITE_ONLY
);
202 /// Class that encapsulates the task of mapping an object of type
203 /// \a T. The return value of get() should be implicitly
204 /// convertible to \a void *.
209 get(command_queue
&q
, T obj
, cl_map_flags flags
,
210 size_t offset
, size_t size
) {
211 return { q
, obj
->resource(q
), flags
, true,
212 {{ offset
}}, {{ size
, 1, 1 }} };
217 struct _map
<void *> {
219 get(command_queue
&q
, void *obj
, cl_map_flags flags
,
220 size_t offset
, size_t size
) {
221 return (char *)obj
+ offset
;
226 struct _map
<const void *> {
228 get(command_queue
&q
, const void *obj
, cl_map_flags flags
,
229 size_t offset
, size_t size
) {
230 return (const char *)obj
+ offset
;
235 /// Software copy from \a src_obj to \a dst_obj. They can be
236 /// either pointers or memory objects.
238 template<typename T
, typename S
>
239 std::function
<void (event
&)>
240 soft_copy_op(command_queue
&q
,
241 T dst_obj
, const vector_t
&dst_orig
, const vector_t
&dst_pitch
,
242 S src_obj
, const vector_t
&src_orig
, const vector_t
&src_pitch
,
243 const vector_t
®ion
) {
244 return [=, &q
](event
&) {
245 auto dst
= _map
<T
>::get(q
, dst_obj
, CL_MAP_WRITE
,
246 dot(dst_pitch
, dst_orig
),
247 size(dst_pitch
, region
));
248 auto src
= _map
<S
>::get(q
, src_obj
, CL_MAP_READ
,
249 dot(src_pitch
, src_orig
),
250 size(src_pitch
, region
));
253 for (v
[2] = 0; v
[2] < region
[2]; ++v
[2]) {
254 for (v
[1] = 0; v
[1] < region
[1]; ++v
[1]) {
256 static_cast<char *>(dst
) + dot(dst_pitch
, v
),
257 static_cast<const char *>(src
) + dot(src_pitch
, v
),
258 src_pitch
[0] * region
[0]);
265 /// Hardware copy from \a src_obj to \a dst_obj.
267 template<typename T
, typename S
>
268 std::function
<void (event
&)>
269 hard_copy_op(command_queue
&q
, T dst_obj
, const vector_t
&dst_orig
,
270 S src_obj
, const vector_t
&src_orig
, const vector_t
®ion
) {
271 return [=, &q
](event
&) {
272 dst_obj
->resource(q
).copy(q
, dst_orig
, region
,
273 src_obj
->resource(q
), src_orig
);
279 clEnqueueReadBuffer(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
280 size_t offset
, size_t size
, void *ptr
,
281 cl_uint num_deps
, const cl_event
*d_deps
,
282 cl_event
*rd_ev
) try {
284 auto &mem
= obj
<buffer
>(d_mem
);
285 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
286 vector_t region
= { size
, 1, 1 };
287 vector_t obj_origin
= { offset
};
288 auto obj_pitch
= pitch(region
, {{ 1 }});
290 validate_common(q
, deps
);
291 validate_object(q
, ptr
, {}, obj_pitch
, region
);
292 validate_object(q
, mem
, obj_origin
, obj_pitch
, region
);
293 validate_object_access(mem
, CL_MEM_HOST_READ_ONLY
);
295 auto hev
= create
<hard_event
>(
296 q
, CL_COMMAND_READ_BUFFER
, deps
,
297 soft_copy_op(q
, ptr
, {}, obj_pitch
,
298 &mem
, obj_origin
, obj_pitch
,
302 hev().wait_signalled();
304 ret_object(rd_ev
, hev
);
312 clEnqueueWriteBuffer(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
313 size_t offset
, size_t size
, const void *ptr
,
314 cl_uint num_deps
, const cl_event
*d_deps
,
315 cl_event
*rd_ev
) try {
317 auto &mem
= obj
<buffer
>(d_mem
);
318 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
319 vector_t region
= { size
, 1, 1 };
320 vector_t obj_origin
= { offset
};
321 auto obj_pitch
= pitch(region
, {{ 1 }});
323 validate_common(q
, deps
);
324 validate_object(q
, mem
, obj_origin
, obj_pitch
, region
);
325 validate_object(q
, ptr
, {}, obj_pitch
, region
);
326 validate_object_access(mem
, CL_MEM_HOST_WRITE_ONLY
);
328 auto hev
= create
<hard_event
>(
329 q
, CL_COMMAND_WRITE_BUFFER
, deps
,
330 soft_copy_op(q
, &mem
, obj_origin
, obj_pitch
,
335 hev().wait_signalled();
337 ret_object(rd_ev
, hev
);
345 clEnqueueReadBufferRect(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
346 const size_t *p_obj_origin
,
347 const size_t *p_host_origin
,
348 const size_t *p_region
,
349 size_t obj_row_pitch
, size_t obj_slice_pitch
,
350 size_t host_row_pitch
, size_t host_slice_pitch
,
352 cl_uint num_deps
, const cl_event
*d_deps
,
353 cl_event
*rd_ev
) try {
355 auto &mem
= obj
<buffer
>(d_mem
);
356 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
357 auto region
= vector(p_region
);
358 auto obj_origin
= vector(p_obj_origin
);
359 auto obj_pitch
= pitch(region
, {{ 1, obj_row_pitch
, obj_slice_pitch
}});
360 auto host_origin
= vector(p_host_origin
);
361 auto host_pitch
= pitch(region
, {{ 1, host_row_pitch
, host_slice_pitch
}});
363 validate_common(q
, deps
);
364 validate_object(q
, ptr
, host_origin
, host_pitch
, region
);
365 validate_object(q
, mem
, obj_origin
, obj_pitch
, region
);
366 validate_object_access(mem
, CL_MEM_HOST_READ_ONLY
);
368 auto hev
= create
<hard_event
>(
369 q
, CL_COMMAND_READ_BUFFER_RECT
, deps
,
370 soft_copy_op(q
, ptr
, host_origin
, host_pitch
,
371 &mem
, obj_origin
, obj_pitch
,
375 hev().wait_signalled();
377 ret_object(rd_ev
, hev
);
385 clEnqueueWriteBufferRect(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
386 const size_t *p_obj_origin
,
387 const size_t *p_host_origin
,
388 const size_t *p_region
,
389 size_t obj_row_pitch
, size_t obj_slice_pitch
,
390 size_t host_row_pitch
, size_t host_slice_pitch
,
392 cl_uint num_deps
, const cl_event
*d_deps
,
393 cl_event
*rd_ev
) try {
395 auto &mem
= obj
<buffer
>(d_mem
);
396 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
397 auto region
= vector(p_region
);
398 auto obj_origin
= vector(p_obj_origin
);
399 auto obj_pitch
= pitch(region
, {{ 1, obj_row_pitch
, obj_slice_pitch
}});
400 auto host_origin
= vector(p_host_origin
);
401 auto host_pitch
= pitch(region
, {{ 1, host_row_pitch
, host_slice_pitch
}});
403 validate_common(q
, deps
);
404 validate_object(q
, mem
, obj_origin
, obj_pitch
, region
);
405 validate_object(q
, ptr
, host_origin
, host_pitch
, region
);
406 validate_object_access(mem
, CL_MEM_HOST_WRITE_ONLY
);
408 auto hev
= create
<hard_event
>(
409 q
, CL_COMMAND_WRITE_BUFFER_RECT
, deps
,
410 soft_copy_op(q
, &mem
, obj_origin
, obj_pitch
,
411 ptr
, host_origin
, host_pitch
,
415 hev().wait_signalled();
417 ret_object(rd_ev
, hev
);
425 clEnqueueCopyBuffer(cl_command_queue d_q
, cl_mem d_src_mem
, cl_mem d_dst_mem
,
426 size_t src_offset
, size_t dst_offset
, size_t size
,
427 cl_uint num_deps
, const cl_event
*d_deps
,
428 cl_event
*rd_ev
) try {
430 auto &src_mem
= obj
<buffer
>(d_src_mem
);
431 auto &dst_mem
= obj
<buffer
>(d_dst_mem
);
432 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
433 vector_t region
= { size
, 1, 1 };
434 vector_t dst_origin
= { dst_offset
};
435 auto dst_pitch
= pitch(region
, {{ 1 }});
436 vector_t src_origin
= { src_offset
};
437 auto src_pitch
= pitch(region
, {{ 1 }});
439 validate_common(q
, deps
);
440 validate_object(q
, dst_mem
, dst_origin
, dst_pitch
, region
);
441 validate_object(q
, src_mem
, src_origin
, src_pitch
, region
);
442 validate_copy(q
, dst_mem
, dst_origin
, dst_pitch
,
443 src_mem
, src_origin
, src_pitch
, region
);
445 auto hev
= create
<hard_event
>(
446 q
, CL_COMMAND_COPY_BUFFER
, deps
,
447 hard_copy_op(q
, &dst_mem
, dst_origin
,
448 &src_mem
, src_origin
, region
));
450 ret_object(rd_ev
, hev
);
458 clEnqueueCopyBufferRect(cl_command_queue d_q
, cl_mem d_src_mem
,
460 const size_t *p_src_origin
, const size_t *p_dst_origin
,
461 const size_t *p_region
,
462 size_t src_row_pitch
, size_t src_slice_pitch
,
463 size_t dst_row_pitch
, size_t dst_slice_pitch
,
464 cl_uint num_deps
, const cl_event
*d_deps
,
465 cl_event
*rd_ev
) try {
467 auto &src_mem
= obj
<buffer
>(d_src_mem
);
468 auto &dst_mem
= obj
<buffer
>(d_dst_mem
);
469 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
470 auto region
= vector(p_region
);
471 auto dst_origin
= vector(p_dst_origin
);
472 auto dst_pitch
= pitch(region
, {{ 1, dst_row_pitch
, dst_slice_pitch
}});
473 auto src_origin
= vector(p_src_origin
);
474 auto src_pitch
= pitch(region
, {{ 1, src_row_pitch
, src_slice_pitch
}});
476 validate_common(q
, deps
);
477 validate_object(q
, dst_mem
, dst_origin
, dst_pitch
, region
);
478 validate_object(q
, src_mem
, src_origin
, src_pitch
, region
);
479 validate_copy(q
, dst_mem
, dst_origin
, dst_pitch
,
480 src_mem
, src_origin
, src_pitch
, region
);
482 auto hev
= create
<hard_event
>(
483 q
, CL_COMMAND_COPY_BUFFER_RECT
, deps
,
484 soft_copy_op(q
, &dst_mem
, dst_origin
, dst_pitch
,
485 &src_mem
, src_origin
, src_pitch
,
488 ret_object(rd_ev
, hev
);
496 clEnqueueReadImage(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
497 const size_t *p_origin
, const size_t *p_region
,
498 size_t row_pitch
, size_t slice_pitch
, void *ptr
,
499 cl_uint num_deps
, const cl_event
*d_deps
,
500 cl_event
*rd_ev
) try {
502 auto &img
= obj
<image
>(d_mem
);
503 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
504 auto region
= vector(p_region
);
505 auto dst_pitch
= pitch(region
, {{ img
.pixel_size(),
506 row_pitch
, slice_pitch
}});
507 auto src_origin
= vector(p_origin
);
508 auto src_pitch
= pitch(region
, {{ img
.pixel_size(),
509 img
.row_pitch(), img
.slice_pitch() }});
511 validate_common(q
, deps
);
512 validate_object(q
, ptr
, {}, dst_pitch
, region
);
513 validate_object(q
, img
, src_origin
, region
);
514 validate_object_access(img
, CL_MEM_HOST_READ_ONLY
);
516 auto hev
= create
<hard_event
>(
517 q
, CL_COMMAND_READ_IMAGE
, deps
,
518 soft_copy_op(q
, ptr
, {}, dst_pitch
,
519 &img
, src_origin
, src_pitch
,
523 hev().wait_signalled();
525 ret_object(rd_ev
, hev
);
533 clEnqueueWriteImage(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
534 const size_t *p_origin
, const size_t *p_region
,
535 size_t row_pitch
, size_t slice_pitch
, const void *ptr
,
536 cl_uint num_deps
, const cl_event
*d_deps
,
537 cl_event
*rd_ev
) try {
539 auto &img
= obj
<image
>(d_mem
);
540 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
541 auto region
= vector(p_region
);
542 auto dst_origin
= vector(p_origin
);
543 auto dst_pitch
= pitch(region
, {{ img
.pixel_size(),
544 img
.row_pitch(), img
.slice_pitch() }});
545 auto src_pitch
= pitch(region
, {{ img
.pixel_size(),
546 row_pitch
, slice_pitch
}});
548 validate_common(q
, deps
);
549 validate_object(q
, img
, dst_origin
, region
);
550 validate_object(q
, ptr
, {}, src_pitch
, region
);
551 validate_object_access(img
, CL_MEM_HOST_WRITE_ONLY
);
553 auto hev
= create
<hard_event
>(
554 q
, CL_COMMAND_WRITE_IMAGE
, deps
,
555 soft_copy_op(q
, &img
, dst_origin
, dst_pitch
,
560 hev().wait_signalled();
562 ret_object(rd_ev
, hev
);
570 clEnqueueCopyImage(cl_command_queue d_q
, cl_mem d_src_mem
, cl_mem d_dst_mem
,
571 const size_t *p_src_origin
, const size_t *p_dst_origin
,
572 const size_t *p_region
,
573 cl_uint num_deps
, const cl_event
*d_deps
,
574 cl_event
*rd_ev
) try {
576 auto &src_img
= obj
<image
>(d_src_mem
);
577 auto &dst_img
= obj
<image
>(d_dst_mem
);
578 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
579 auto region
= vector(p_region
);
580 auto dst_origin
= vector(p_dst_origin
);
581 auto src_origin
= vector(p_src_origin
);
583 validate_common(q
, deps
);
584 validate_object(q
, dst_img
, dst_origin
, region
);
585 validate_object(q
, src_img
, src_origin
, region
);
586 validate_copy(q
, dst_img
, dst_origin
, src_img
, src_origin
, region
);
588 auto hev
= create
<hard_event
>(
589 q
, CL_COMMAND_COPY_IMAGE
, deps
,
590 hard_copy_op(q
, &dst_img
, dst_origin
,
591 &src_img
, src_origin
,
594 ret_object(rd_ev
, hev
);
602 clEnqueueCopyImageToBuffer(cl_command_queue d_q
,
603 cl_mem d_src_mem
, cl_mem d_dst_mem
,
604 const size_t *p_src_origin
, const size_t *p_region
,
606 cl_uint num_deps
, const cl_event
*d_deps
,
607 cl_event
*rd_ev
) try {
609 auto &src_img
= obj
<image
>(d_src_mem
);
610 auto &dst_mem
= obj
<buffer
>(d_dst_mem
);
611 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
612 auto region
= vector(p_region
);
613 vector_t dst_origin
= { dst_offset
};
614 auto dst_pitch
= pitch(region
, {{ src_img
.pixel_size() }});
615 auto src_origin
= vector(p_src_origin
);
616 auto src_pitch
= pitch(region
, {{ src_img
.pixel_size(),
618 src_img
.slice_pitch() }});
620 validate_common(q
, deps
);
621 validate_object(q
, dst_mem
, dst_origin
, dst_pitch
, region
);
622 validate_object(q
, src_img
, src_origin
, region
);
624 auto hev
= create
<hard_event
>(
625 q
, CL_COMMAND_COPY_IMAGE_TO_BUFFER
, deps
,
626 soft_copy_op(q
, &dst_mem
, dst_origin
, dst_pitch
,
627 &src_img
, src_origin
, src_pitch
,
630 ret_object(rd_ev
, hev
);
638 clEnqueueCopyBufferToImage(cl_command_queue d_q
,
639 cl_mem d_src_mem
, cl_mem d_dst_mem
,
641 const size_t *p_dst_origin
, const size_t *p_region
,
642 cl_uint num_deps
, const cl_event
*d_deps
,
643 cl_event
*rd_ev
) try {
645 auto &src_mem
= obj
<buffer
>(d_src_mem
);
646 auto &dst_img
= obj
<image
>(d_dst_mem
);
647 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
648 auto region
= vector(p_region
);
649 auto dst_origin
= vector(p_dst_origin
);
650 auto dst_pitch
= pitch(region
, {{ dst_img
.pixel_size(),
652 dst_img
.slice_pitch() }});
653 vector_t src_origin
= { src_offset
};
654 auto src_pitch
= pitch(region
, {{ dst_img
.pixel_size() }});
656 validate_common(q
, deps
);
657 validate_object(q
, dst_img
, dst_origin
, region
);
658 validate_object(q
, src_mem
, src_origin
, src_pitch
, region
);
660 auto hev
= create
<hard_event
>(
661 q
, CL_COMMAND_COPY_BUFFER_TO_IMAGE
, deps
,
662 soft_copy_op(q
, &dst_img
, dst_origin
, dst_pitch
,
663 &src_mem
, src_origin
, src_pitch
,
666 ret_object(rd_ev
, hev
);
674 clEnqueueMapBuffer(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
675 cl_map_flags flags
, size_t offset
, size_t size
,
676 cl_uint num_deps
, const cl_event
*d_deps
,
677 cl_event
*rd_ev
, cl_int
*r_errcode
) try {
679 auto &mem
= obj
<buffer
>(d_mem
);
680 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
681 vector_t region
= { size
, 1, 1 };
682 vector_t obj_origin
= { offset
};
683 auto obj_pitch
= pitch(region
, {{ 1 }});
685 validate_common(q
, deps
);
686 validate_object(q
, mem
, obj_origin
, obj_pitch
, region
);
687 validate_map_flags(mem
, flags
);
689 void *map
= mem
.resource(q
).add_map(q
, flags
, blocking
, obj_origin
, region
);
691 auto hev
= create
<hard_event
>(q
, CL_COMMAND_MAP_BUFFER
, deps
);
693 hev().wait_signalled();
695 ret_object(rd_ev
, hev
);
696 ret_error(r_errcode
, CL_SUCCESS
);
700 ret_error(r_errcode
, e
);
705 clEnqueueMapImage(cl_command_queue d_q
, cl_mem d_mem
, cl_bool blocking
,
707 const size_t *p_origin
, const size_t *p_region
,
708 size_t *row_pitch
, size_t *slice_pitch
,
709 cl_uint num_deps
, const cl_event
*d_deps
,
710 cl_event
*rd_ev
, cl_int
*r_errcode
) try {
712 auto &img
= obj
<image
>(d_mem
);
713 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
714 auto region
= vector(p_region
);
715 auto origin
= vector(p_origin
);
717 validate_common(q
, deps
);
718 validate_object(q
, img
, origin
, region
);
719 validate_map_flags(img
, flags
);
721 void *map
= img
.resource(q
).add_map(q
, flags
, blocking
, origin
, region
);
723 auto hev
= create
<hard_event
>(q
, CL_COMMAND_MAP_IMAGE
, deps
);
725 hev().wait_signalled();
727 ret_object(rd_ev
, hev
);
728 ret_error(r_errcode
, CL_SUCCESS
);
732 ret_error(r_errcode
, e
);
737 clEnqueueUnmapMemObject(cl_command_queue d_q
, cl_mem d_mem
, void *ptr
,
738 cl_uint num_deps
, const cl_event
*d_deps
,
739 cl_event
*rd_ev
) try {
741 auto &mem
= obj(d_mem
);
742 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
744 validate_common(q
, deps
);
746 auto hev
= create
<hard_event
>(
747 q
, CL_COMMAND_UNMAP_MEM_OBJECT
, deps
,
748 [=, &q
, &mem
](event
&) {
749 mem
.resource(q
).del_map(ptr
);
752 ret_object(rd_ev
, hev
);
760 clEnqueueMigrateMemObjects(cl_command_queue command_queue
,
761 cl_uint num_mem_objects
,
762 const cl_mem
*mem_objects
,
763 cl_mem_migration_flags flags
,
764 cl_uint num_events_in_wait_list
,
765 const cl_event
*event_wait_list
,
767 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
768 return CL_INVALID_VALUE
;
772 clover::EnqueueSVMFree(cl_command_queue d_q
,
773 cl_uint num_svm_pointers
,
774 void *svm_pointers
[],
775 void (CL_CALLBACK
*pfn_free_func
) (
776 cl_command_queue queue
, cl_uint num_svm_pointers
,
777 void *svm_pointers
[], void *user_data
),
779 cl_uint num_events_in_wait_list
,
780 const cl_event
*event_wait_list
,
784 if (bool(num_svm_pointers
) != bool(svm_pointers
))
785 return CL_INVALID_VALUE
;
788 bool can_emulate
= q
.device().has_system_svm();
789 auto deps
= objs
<wait_list_tag
>(event_wait_list
, num_events_in_wait_list
);
791 validate_common(q
, deps
);
793 std::vector
<void *> svm_pointers_cpy(svm_pointers
,
794 svm_pointers
+ num_svm_pointers
);
795 if (!pfn_free_func
) {
797 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
798 return CL_INVALID_VALUE
;
800 pfn_free_func
= [](cl_command_queue
, cl_uint num_svm_pointers
,
801 void *svm_pointers
[], void *) {
802 for (void *p
: range(svm_pointers
, num_svm_pointers
))
807 auto hev
= create
<hard_event
>(q
, cmd
, deps
,
808 [=](clover::event
&) mutable {
809 pfn_free_func(d_q
, num_svm_pointers
, svm_pointers_cpy
.data(),
813 ret_object(event
, hev
);
821 clEnqueueSVMFree(cl_command_queue d_q
,
822 cl_uint num_svm_pointers
,
823 void *svm_pointers
[],
824 void (CL_CALLBACK
*pfn_free_func
) (
825 cl_command_queue queue
, cl_uint num_svm_pointers
,
826 void *svm_pointers
[], void *user_data
),
828 cl_uint num_events_in_wait_list
,
829 const cl_event
*event_wait_list
,
832 return EnqueueSVMFree(d_q
, num_svm_pointers
, svm_pointers
,
833 pfn_free_func
, user_data
, num_events_in_wait_list
,
834 event_wait_list
, event
, CL_COMMAND_SVM_FREE
);
838 clover::EnqueueSVMMemcpy(cl_command_queue d_q
,
839 cl_bool blocking_copy
,
843 cl_uint num_events_in_wait_list
,
844 const cl_event
*event_wait_list
,
848 if (dst_ptr
== nullptr || src_ptr
== nullptr)
849 return CL_INVALID_VALUE
;
851 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr
) -
852 reinterpret_cast<ptrdiff_t>(src_ptr
))) < size
)
853 return CL_MEM_COPY_OVERLAP
;
856 bool can_emulate
= q
.device().has_system_svm();
857 auto deps
= objs
<wait_list_tag
>(event_wait_list
, num_events_in_wait_list
);
859 validate_common(q
, deps
);
862 auto hev
= create
<hard_event
>(q
, cmd
, deps
,
863 [=](clover::event
&) {
864 memcpy(dst_ptr
, src_ptr
, size
);
869 ret_object(event
, hev
);
873 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
874 return CL_INVALID_VALUE
;
881 clEnqueueSVMMemcpy(cl_command_queue d_q
,
882 cl_bool blocking_copy
,
886 cl_uint num_events_in_wait_list
,
887 const cl_event
*event_wait_list
,
890 return EnqueueSVMMemcpy(d_q
, blocking_copy
, dst_ptr
, src_ptr
,
891 size
, num_events_in_wait_list
, event_wait_list
,
892 event
, CL_COMMAND_SVM_MEMCPY
);
896 clover::EnqueueSVMMemFill(cl_command_queue d_q
,
901 cl_uint num_events_in_wait_list
,
902 const cl_event
*event_wait_list
,
906 if (svm_ptr
== nullptr || pattern
== nullptr ||
907 !util_is_power_of_two_nonzero(pattern_size
) ||
908 pattern_size
> 128 ||
909 !ptr_is_aligned(svm_ptr
, pattern_size
) ||
911 return CL_INVALID_VALUE
;
914 bool can_emulate
= q
.device().has_system_svm();
915 auto deps
= objs
<wait_list_tag
>(event_wait_list
, num_events_in_wait_list
);
917 validate_common(q
, deps
);
920 auto hev
= create
<hard_event
>(q
, cmd
, deps
,
921 [=](clover::event
&) {
923 for (size_t s
= size
; s
; s
-= pattern_size
) {
924 memcpy(ptr
, pattern
, pattern_size
);
925 ptr
= static_cast<uint8_t*>(ptr
) + pattern_size
;
929 ret_object(event
, hev
);
933 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
934 return CL_INVALID_VALUE
;
941 clEnqueueSVMMemFill(cl_command_queue d_q
,
946 cl_uint num_events_in_wait_list
,
947 const cl_event
*event_wait_list
,
950 return EnqueueSVMMemFill(d_q
, svm_ptr
, pattern
, pattern_size
,
951 size
, num_events_in_wait_list
, event_wait_list
,
952 event
, CL_COMMAND_SVM_MEMFILL
);
956 clover::EnqueueSVMMap(cl_command_queue d_q
,
957 cl_bool blocking_map
,
958 cl_map_flags map_flags
,
961 cl_uint num_events_in_wait_list
,
962 const cl_event
*event_wait_list
,
966 if (svm_ptr
== nullptr || size
== 0)
967 return CL_INVALID_VALUE
;
970 bool can_emulate
= q
.device().has_system_svm();
971 auto deps
= objs
<wait_list_tag
>(event_wait_list
, num_events_in_wait_list
);
973 validate_common(q
, deps
);
976 auto hev
= create
<hard_event
>(q
, cmd
, deps
,
977 [](clover::event
&) { });
979 ret_object(event
, hev
);
983 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
984 return CL_INVALID_VALUE
;
991 clEnqueueSVMMap(cl_command_queue d_q
,
992 cl_bool blocking_map
,
993 cl_map_flags map_flags
,
996 cl_uint num_events_in_wait_list
,
997 const cl_event
*event_wait_list
,
1000 return EnqueueSVMMap(d_q
, blocking_map
, map_flags
, svm_ptr
, size
,
1001 num_events_in_wait_list
, event_wait_list
, event
,
1002 CL_COMMAND_SVM_MAP
);
1006 clover::EnqueueSVMUnmap(cl_command_queue d_q
,
1008 cl_uint num_events_in_wait_list
,
1009 const cl_event
*event_wait_list
,
1013 if (svm_ptr
== nullptr)
1014 return CL_INVALID_VALUE
;
1017 bool can_emulate
= q
.device().has_system_svm();
1018 auto deps
= objs
<wait_list_tag
>(event_wait_list
, num_events_in_wait_list
);
1020 validate_common(q
, deps
);
1023 auto hev
= create
<hard_event
>(q
, cmd
, deps
,
1024 [](clover::event
&) { });
1026 ret_object(event
, hev
);
1030 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1031 return CL_INVALID_VALUE
;
1033 } catch (error
&e
) {
1038 clEnqueueSVMUnmap(cl_command_queue d_q
,
1040 cl_uint num_events_in_wait_list
,
1041 const cl_event
*event_wait_list
,
1044 return EnqueueSVMUnmap(d_q
, svm_ptr
, num_events_in_wait_list
,
1045 event_wait_list
, event
, CL_COMMAND_SVM_UNMAP
);
1049 clEnqueueSVMMigrateMem(cl_command_queue d_q
,
1050 cl_uint num_svm_pointers
,
1051 const void **svm_pointers
,
1052 const size_t *sizes
,
1053 const cl_mem_migration_flags flags
,
1054 cl_uint num_events_in_wait_list
,
1055 const cl_event
*event_wait_list
,
1057 CLOVER_NOT_SUPPORTED_UNTIL("2.1");
1058 return CL_INVALID_VALUE
;