gallium: rename 'state tracker' to 'frontend'
[mesa.git] / src / gallium / frontends / clover / api / transfer.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "util/bitscan.h"
26
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31
32 using namespace clover;
33
34 namespace {
35 typedef resource::vector vector_t;
36
37 vector_t
38 vector(const size_t *p) {
39 return range(p, 3);
40 }
41
42 vector_t
43 pitch(const vector_t &region, vector_t pitch) {
44 for (auto x : zip(tail(pitch),
45 map(multiplies(), region, pitch))) {
46 // The spec defines a value of zero as the natural pitch,
47 // i.e. the unaligned size of the previous dimension.
48 if (std::get<0>(x) == 0)
49 std::get<0>(x) = std::get<1>(x);
50 }
51
52 return pitch;
53 }
54
55 ///
56 /// Size of a region in bytes.
57 ///
58 size_t
59 size(const vector_t &pitch, const vector_t &region) {
60 if (any_of(is_zero(), region))
61 return 0;
62 else
63 return dot(pitch, region - vector_t{ 0, 1, 1 });
64 }
65
66 ///
67 /// Common argument checking shared by memory transfer commands.
68 ///
69 void
70 validate_common(command_queue &q,
71 const ref_vector<event> &deps) {
72 if (any_of([&](const event &ev) {
73 return ev.context() != q.context();
74 }, deps))
75 throw error(CL_INVALID_CONTEXT);
76 }
77
78 ///
79 /// Common error checking for a buffer object argument.
80 ///
81 void
82 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
83 const vector_t &pitch, const vector_t &region) {
84 if (mem.context() != q.context())
85 throw error(CL_INVALID_CONTEXT);
86
87 // The region must fit within the specified pitch,
88 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
89 throw error(CL_INVALID_VALUE);
90
91 // ...and within the specified object.
92 if (dot(pitch, origin) + size(pitch, region) > mem.size())
93 throw error(CL_INVALID_VALUE);
94
95 if (any_of(is_zero(), region))
96 throw error(CL_INVALID_VALUE);
97 }
98
99 ///
100 /// Common error checking for an image argument.
101 ///
102 void
103 validate_object(command_queue &q, image &img,
104 const vector_t &orig, const vector_t &region) {
105 vector_t size = { img.width(), img.height(), img.depth() };
106
107 if (!q.device().image_support())
108 throw error(CL_INVALID_OPERATION);
109
110 if (img.context() != q.context())
111 throw error(CL_INVALID_CONTEXT);
112
113 if (any_of(greater(), orig + region, size))
114 throw error(CL_INVALID_VALUE);
115
116 if (any_of(is_zero(), region))
117 throw error(CL_INVALID_VALUE);
118 }
119
120 ///
121 /// Common error checking for a host pointer argument.
122 ///
123 void
124 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
125 const vector_t &pitch, const vector_t &region) {
126 if (!ptr)
127 throw error(CL_INVALID_VALUE);
128
129 // The region must fit within the specified pitch.
130 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
131 throw error(CL_INVALID_VALUE);
132 }
133
134 ///
135 /// Common argument checking for a copy between two buffer objects.
136 ///
137 void
138 validate_copy(command_queue &q, buffer &dst_mem,
139 const vector_t &dst_orig, const vector_t &dst_pitch,
140 buffer &src_mem,
141 const vector_t &src_orig, const vector_t &src_pitch,
142 const vector_t &region) {
143 if (dst_mem == src_mem) {
144 auto dst_offset = dot(dst_pitch, dst_orig);
145 auto src_offset = dot(src_pitch, src_orig);
146
147 if (interval_overlaps()(
148 dst_offset, dst_offset + size(dst_pitch, region),
149 src_offset, src_offset + size(src_pitch, region)))
150 throw error(CL_MEM_COPY_OVERLAP);
151 }
152 }
153
154 ///
155 /// Common argument checking for a copy between two image objects.
156 ///
157 void
158 validate_copy(command_queue &q,
159 image &dst_img, const vector_t &dst_orig,
160 image &src_img, const vector_t &src_orig,
161 const vector_t &region) {
162 if (dst_img.format() != src_img.format())
163 throw error(CL_IMAGE_FORMAT_MISMATCH);
164
165 if (dst_img == src_img) {
166 if (all_of(interval_overlaps(),
167 dst_orig, dst_orig + region,
168 src_orig, src_orig + region))
169 throw error(CL_MEM_COPY_OVERLAP);
170 }
171 }
172
173 ///
174 /// Checks that the host access flags of the memory object are
175 /// within the allowed set \a flags.
176 ///
177 void
178 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
179 if (mem.flags() & ~flags &
180 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
181 CL_MEM_HOST_NO_ACCESS))
182 throw error(CL_INVALID_OPERATION);
183 }
184
185 ///
186 /// Checks that the mapping flags are correct.
187 ///
188 void
189 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
190 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
191 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
192 throw error(CL_INVALID_VALUE);
193
194 if (flags & CL_MAP_READ)
195 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
196
197 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
198 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
199 }
200
201 ///
202 /// Class that encapsulates the task of mapping an object of type
203 /// \a T. The return value of get() should be implicitly
204 /// convertible to \a void *.
205 ///
206 template<typename T>
207 struct _map {
208 static mapping
209 get(command_queue &q, T obj, cl_map_flags flags,
210 size_t offset, size_t size) {
211 return { q, obj->resource(q), flags, true,
212 {{ offset }}, {{ size, 1, 1 }} };
213 }
214 };
215
216 template<>
217 struct _map<void *> {
218 static void *
219 get(command_queue &q, void *obj, cl_map_flags flags,
220 size_t offset, size_t size) {
221 return (char *)obj + offset;
222 }
223 };
224
225 template<>
226 struct _map<const void *> {
227 static const void *
228 get(command_queue &q, const void *obj, cl_map_flags flags,
229 size_t offset, size_t size) {
230 return (const char *)obj + offset;
231 }
232 };
233
234 ///
235 /// Software copy from \a src_obj to \a dst_obj. They can be
236 /// either pointers or memory objects.
237 ///
238 template<typename T, typename S>
239 std::function<void (event &)>
240 soft_copy_op(command_queue &q,
241 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
242 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
243 const vector_t &region) {
244 return [=, &q](event &) {
245 auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
246 dot(dst_pitch, dst_orig),
247 size(dst_pitch, region));
248 auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
249 dot(src_pitch, src_orig),
250 size(src_pitch, region));
251 vector_t v = {};
252
253 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
254 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
255 std::memcpy(
256 static_cast<char *>(dst) + dot(dst_pitch, v),
257 static_cast<const char *>(src) + dot(src_pitch, v),
258 src_pitch[0] * region[0]);
259 }
260 }
261 };
262 }
263
264 ///
265 /// Hardware copy from \a src_obj to \a dst_obj.
266 ///
267 template<typename T, typename S>
268 std::function<void (event &)>
269 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
270 S src_obj, const vector_t &src_orig, const vector_t &region) {
271 return [=, &q](event &) {
272 dst_obj->resource(q).copy(q, dst_orig, region,
273 src_obj->resource(q), src_orig);
274 };
275 }
276 }
277
278 CLOVER_API cl_int
279 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
280 size_t offset, size_t size, void *ptr,
281 cl_uint num_deps, const cl_event *d_deps,
282 cl_event *rd_ev) try {
283 auto &q = obj(d_q);
284 auto &mem = obj<buffer>(d_mem);
285 auto deps = objs<wait_list_tag>(d_deps, num_deps);
286 vector_t region = { size, 1, 1 };
287 vector_t obj_origin = { offset };
288 auto obj_pitch = pitch(region, {{ 1 }});
289
290 validate_common(q, deps);
291 validate_object(q, ptr, {}, obj_pitch, region);
292 validate_object(q, mem, obj_origin, obj_pitch, region);
293 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
294
295 auto hev = create<hard_event>(
296 q, CL_COMMAND_READ_BUFFER, deps,
297 soft_copy_op(q, ptr, {}, obj_pitch,
298 &mem, obj_origin, obj_pitch,
299 region));
300
301 if (blocking)
302 hev().wait_signalled();
303
304 ret_object(rd_ev, hev);
305 return CL_SUCCESS;
306
307 } catch (error &e) {
308 return e.get();
309 }
310
311 CLOVER_API cl_int
312 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
313 size_t offset, size_t size, const void *ptr,
314 cl_uint num_deps, const cl_event *d_deps,
315 cl_event *rd_ev) try {
316 auto &q = obj(d_q);
317 auto &mem = obj<buffer>(d_mem);
318 auto deps = objs<wait_list_tag>(d_deps, num_deps);
319 vector_t region = { size, 1, 1 };
320 vector_t obj_origin = { offset };
321 auto obj_pitch = pitch(region, {{ 1 }});
322
323 validate_common(q, deps);
324 validate_object(q, mem, obj_origin, obj_pitch, region);
325 validate_object(q, ptr, {}, obj_pitch, region);
326 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
327
328 auto hev = create<hard_event>(
329 q, CL_COMMAND_WRITE_BUFFER, deps,
330 soft_copy_op(q, &mem, obj_origin, obj_pitch,
331 ptr, {}, obj_pitch,
332 region));
333
334 if (blocking)
335 hev().wait_signalled();
336
337 ret_object(rd_ev, hev);
338 return CL_SUCCESS;
339
340 } catch (error &e) {
341 return e.get();
342 }
343
344 CLOVER_API cl_int
345 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
346 const size_t *p_obj_origin,
347 const size_t *p_host_origin,
348 const size_t *p_region,
349 size_t obj_row_pitch, size_t obj_slice_pitch,
350 size_t host_row_pitch, size_t host_slice_pitch,
351 void *ptr,
352 cl_uint num_deps, const cl_event *d_deps,
353 cl_event *rd_ev) try {
354 auto &q = obj(d_q);
355 auto &mem = obj<buffer>(d_mem);
356 auto deps = objs<wait_list_tag>(d_deps, num_deps);
357 auto region = vector(p_region);
358 auto obj_origin = vector(p_obj_origin);
359 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
360 auto host_origin = vector(p_host_origin);
361 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
362
363 validate_common(q, deps);
364 validate_object(q, ptr, host_origin, host_pitch, region);
365 validate_object(q, mem, obj_origin, obj_pitch, region);
366 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
367
368 auto hev = create<hard_event>(
369 q, CL_COMMAND_READ_BUFFER_RECT, deps,
370 soft_copy_op(q, ptr, host_origin, host_pitch,
371 &mem, obj_origin, obj_pitch,
372 region));
373
374 if (blocking)
375 hev().wait_signalled();
376
377 ret_object(rd_ev, hev);
378 return CL_SUCCESS;
379
380 } catch (error &e) {
381 return e.get();
382 }
383
384 CLOVER_API cl_int
385 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
386 const size_t *p_obj_origin,
387 const size_t *p_host_origin,
388 const size_t *p_region,
389 size_t obj_row_pitch, size_t obj_slice_pitch,
390 size_t host_row_pitch, size_t host_slice_pitch,
391 const void *ptr,
392 cl_uint num_deps, const cl_event *d_deps,
393 cl_event *rd_ev) try {
394 auto &q = obj(d_q);
395 auto &mem = obj<buffer>(d_mem);
396 auto deps = objs<wait_list_tag>(d_deps, num_deps);
397 auto region = vector(p_region);
398 auto obj_origin = vector(p_obj_origin);
399 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
400 auto host_origin = vector(p_host_origin);
401 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
402
403 validate_common(q, deps);
404 validate_object(q, mem, obj_origin, obj_pitch, region);
405 validate_object(q, ptr, host_origin, host_pitch, region);
406 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
407
408 auto hev = create<hard_event>(
409 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
410 soft_copy_op(q, &mem, obj_origin, obj_pitch,
411 ptr, host_origin, host_pitch,
412 region));
413
414 if (blocking)
415 hev().wait_signalled();
416
417 ret_object(rd_ev, hev);
418 return CL_SUCCESS;
419
420 } catch (error &e) {
421 return e.get();
422 }
423
424 CLOVER_API cl_int
425 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
426 size_t src_offset, size_t dst_offset, size_t size,
427 cl_uint num_deps, const cl_event *d_deps,
428 cl_event *rd_ev) try {
429 auto &q = obj(d_q);
430 auto &src_mem = obj<buffer>(d_src_mem);
431 auto &dst_mem = obj<buffer>(d_dst_mem);
432 auto deps = objs<wait_list_tag>(d_deps, num_deps);
433 vector_t region = { size, 1, 1 };
434 vector_t dst_origin = { dst_offset };
435 auto dst_pitch = pitch(region, {{ 1 }});
436 vector_t src_origin = { src_offset };
437 auto src_pitch = pitch(region, {{ 1 }});
438
439 validate_common(q, deps);
440 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
441 validate_object(q, src_mem, src_origin, src_pitch, region);
442 validate_copy(q, dst_mem, dst_origin, dst_pitch,
443 src_mem, src_origin, src_pitch, region);
444
445 auto hev = create<hard_event>(
446 q, CL_COMMAND_COPY_BUFFER, deps,
447 hard_copy_op(q, &dst_mem, dst_origin,
448 &src_mem, src_origin, region));
449
450 ret_object(rd_ev, hev);
451 return CL_SUCCESS;
452
453 } catch (error &e) {
454 return e.get();
455 }
456
457 CLOVER_API cl_int
458 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
459 cl_mem d_dst_mem,
460 const size_t *p_src_origin, const size_t *p_dst_origin,
461 const size_t *p_region,
462 size_t src_row_pitch, size_t src_slice_pitch,
463 size_t dst_row_pitch, size_t dst_slice_pitch,
464 cl_uint num_deps, const cl_event *d_deps,
465 cl_event *rd_ev) try {
466 auto &q = obj(d_q);
467 auto &src_mem = obj<buffer>(d_src_mem);
468 auto &dst_mem = obj<buffer>(d_dst_mem);
469 auto deps = objs<wait_list_tag>(d_deps, num_deps);
470 auto region = vector(p_region);
471 auto dst_origin = vector(p_dst_origin);
472 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
473 auto src_origin = vector(p_src_origin);
474 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
475
476 validate_common(q, deps);
477 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
478 validate_object(q, src_mem, src_origin, src_pitch, region);
479 validate_copy(q, dst_mem, dst_origin, dst_pitch,
480 src_mem, src_origin, src_pitch, region);
481
482 auto hev = create<hard_event>(
483 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
484 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
485 &src_mem, src_origin, src_pitch,
486 region));
487
488 ret_object(rd_ev, hev);
489 return CL_SUCCESS;
490
491 } catch (error &e) {
492 return e.get();
493 }
494
495 CLOVER_API cl_int
496 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
497 const size_t *p_origin, const size_t *p_region,
498 size_t row_pitch, size_t slice_pitch, void *ptr,
499 cl_uint num_deps, const cl_event *d_deps,
500 cl_event *rd_ev) try {
501 auto &q = obj(d_q);
502 auto &img = obj<image>(d_mem);
503 auto deps = objs<wait_list_tag>(d_deps, num_deps);
504 auto region = vector(p_region);
505 auto dst_pitch = pitch(region, {{ img.pixel_size(),
506 row_pitch, slice_pitch }});
507 auto src_origin = vector(p_origin);
508 auto src_pitch = pitch(region, {{ img.pixel_size(),
509 img.row_pitch(), img.slice_pitch() }});
510
511 validate_common(q, deps);
512 validate_object(q, ptr, {}, dst_pitch, region);
513 validate_object(q, img, src_origin, region);
514 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
515
516 auto hev = create<hard_event>(
517 q, CL_COMMAND_READ_IMAGE, deps,
518 soft_copy_op(q, ptr, {}, dst_pitch,
519 &img, src_origin, src_pitch,
520 region));
521
522 if (blocking)
523 hev().wait_signalled();
524
525 ret_object(rd_ev, hev);
526 return CL_SUCCESS;
527
528 } catch (error &e) {
529 return e.get();
530 }
531
532 CLOVER_API cl_int
533 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
534 const size_t *p_origin, const size_t *p_region,
535 size_t row_pitch, size_t slice_pitch, const void *ptr,
536 cl_uint num_deps, const cl_event *d_deps,
537 cl_event *rd_ev) try {
538 auto &q = obj(d_q);
539 auto &img = obj<image>(d_mem);
540 auto deps = objs<wait_list_tag>(d_deps, num_deps);
541 auto region = vector(p_region);
542 auto dst_origin = vector(p_origin);
543 auto dst_pitch = pitch(region, {{ img.pixel_size(),
544 img.row_pitch(), img.slice_pitch() }});
545 auto src_pitch = pitch(region, {{ img.pixel_size(),
546 row_pitch, slice_pitch }});
547
548 validate_common(q, deps);
549 validate_object(q, img, dst_origin, region);
550 validate_object(q, ptr, {}, src_pitch, region);
551 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
552
553 auto hev = create<hard_event>(
554 q, CL_COMMAND_WRITE_IMAGE, deps,
555 soft_copy_op(q, &img, dst_origin, dst_pitch,
556 ptr, {}, src_pitch,
557 region));
558
559 if (blocking)
560 hev().wait_signalled();
561
562 ret_object(rd_ev, hev);
563 return CL_SUCCESS;
564
565 } catch (error &e) {
566 return e.get();
567 }
568
569 CLOVER_API cl_int
570 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
571 const size_t *p_src_origin, const size_t *p_dst_origin,
572 const size_t *p_region,
573 cl_uint num_deps, const cl_event *d_deps,
574 cl_event *rd_ev) try {
575 auto &q = obj(d_q);
576 auto &src_img = obj<image>(d_src_mem);
577 auto &dst_img = obj<image>(d_dst_mem);
578 auto deps = objs<wait_list_tag>(d_deps, num_deps);
579 auto region = vector(p_region);
580 auto dst_origin = vector(p_dst_origin);
581 auto src_origin = vector(p_src_origin);
582
583 validate_common(q, deps);
584 validate_object(q, dst_img, dst_origin, region);
585 validate_object(q, src_img, src_origin, region);
586 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
587
588 auto hev = create<hard_event>(
589 q, CL_COMMAND_COPY_IMAGE, deps,
590 hard_copy_op(q, &dst_img, dst_origin,
591 &src_img, src_origin,
592 region));
593
594 ret_object(rd_ev, hev);
595 return CL_SUCCESS;
596
597 } catch (error &e) {
598 return e.get();
599 }
600
601 CLOVER_API cl_int
602 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
603 cl_mem d_src_mem, cl_mem d_dst_mem,
604 const size_t *p_src_origin, const size_t *p_region,
605 size_t dst_offset,
606 cl_uint num_deps, const cl_event *d_deps,
607 cl_event *rd_ev) try {
608 auto &q = obj(d_q);
609 auto &src_img = obj<image>(d_src_mem);
610 auto &dst_mem = obj<buffer>(d_dst_mem);
611 auto deps = objs<wait_list_tag>(d_deps, num_deps);
612 auto region = vector(p_region);
613 vector_t dst_origin = { dst_offset };
614 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
615 auto src_origin = vector(p_src_origin);
616 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
617 src_img.row_pitch(),
618 src_img.slice_pitch() }});
619
620 validate_common(q, deps);
621 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
622 validate_object(q, src_img, src_origin, region);
623
624 auto hev = create<hard_event>(
625 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
626 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
627 &src_img, src_origin, src_pitch,
628 region));
629
630 ret_object(rd_ev, hev);
631 return CL_SUCCESS;
632
633 } catch (error &e) {
634 return e.get();
635 }
636
637 CLOVER_API cl_int
638 clEnqueueCopyBufferToImage(cl_command_queue d_q,
639 cl_mem d_src_mem, cl_mem d_dst_mem,
640 size_t src_offset,
641 const size_t *p_dst_origin, const size_t *p_region,
642 cl_uint num_deps, const cl_event *d_deps,
643 cl_event *rd_ev) try {
644 auto &q = obj(d_q);
645 auto &src_mem = obj<buffer>(d_src_mem);
646 auto &dst_img = obj<image>(d_dst_mem);
647 auto deps = objs<wait_list_tag>(d_deps, num_deps);
648 auto region = vector(p_region);
649 auto dst_origin = vector(p_dst_origin);
650 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
651 dst_img.row_pitch(),
652 dst_img.slice_pitch() }});
653 vector_t src_origin = { src_offset };
654 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
655
656 validate_common(q, deps);
657 validate_object(q, dst_img, dst_origin, region);
658 validate_object(q, src_mem, src_origin, src_pitch, region);
659
660 auto hev = create<hard_event>(
661 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
662 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
663 &src_mem, src_origin, src_pitch,
664 region));
665
666 ret_object(rd_ev, hev);
667 return CL_SUCCESS;
668
669 } catch (error &e) {
670 return e.get();
671 }
672
673 CLOVER_API void *
674 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
675 cl_map_flags flags, size_t offset, size_t size,
676 cl_uint num_deps, const cl_event *d_deps,
677 cl_event *rd_ev, cl_int *r_errcode) try {
678 auto &q = obj(d_q);
679 auto &mem = obj<buffer>(d_mem);
680 auto deps = objs<wait_list_tag>(d_deps, num_deps);
681 vector_t region = { size, 1, 1 };
682 vector_t obj_origin = { offset };
683 auto obj_pitch = pitch(region, {{ 1 }});
684
685 validate_common(q, deps);
686 validate_object(q, mem, obj_origin, obj_pitch, region);
687 validate_map_flags(mem, flags);
688
689 void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
690
691 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
692 if (blocking)
693 hev().wait_signalled();
694
695 ret_object(rd_ev, hev);
696 ret_error(r_errcode, CL_SUCCESS);
697 return map;
698
699 } catch (error &e) {
700 ret_error(r_errcode, e);
701 return NULL;
702 }
703
704 CLOVER_API void *
705 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
706 cl_map_flags flags,
707 const size_t *p_origin, const size_t *p_region,
708 size_t *row_pitch, size_t *slice_pitch,
709 cl_uint num_deps, const cl_event *d_deps,
710 cl_event *rd_ev, cl_int *r_errcode) try {
711 auto &q = obj(d_q);
712 auto &img = obj<image>(d_mem);
713 auto deps = objs<wait_list_tag>(d_deps, num_deps);
714 auto region = vector(p_region);
715 auto origin = vector(p_origin);
716
717 validate_common(q, deps);
718 validate_object(q, img, origin, region);
719 validate_map_flags(img, flags);
720
721 void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
722
723 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
724 if (blocking)
725 hev().wait_signalled();
726
727 ret_object(rd_ev, hev);
728 ret_error(r_errcode, CL_SUCCESS);
729 return map;
730
731 } catch (error &e) {
732 ret_error(r_errcode, e);
733 return NULL;
734 }
735
736 CLOVER_API cl_int
737 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
738 cl_uint num_deps, const cl_event *d_deps,
739 cl_event *rd_ev) try {
740 auto &q = obj(d_q);
741 auto &mem = obj(d_mem);
742 auto deps = objs<wait_list_tag>(d_deps, num_deps);
743
744 validate_common(q, deps);
745
746 auto hev = create<hard_event>(
747 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
748 [=, &q, &mem](event &) {
749 mem.resource(q).del_map(ptr);
750 });
751
752 ret_object(rd_ev, hev);
753 return CL_SUCCESS;
754
755 } catch (error &e) {
756 return e.get();
757 }
758
759 CLOVER_API cl_int
760 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
761 cl_uint num_mem_objects,
762 const cl_mem *mem_objects,
763 cl_mem_migration_flags flags,
764 cl_uint num_events_in_wait_list,
765 const cl_event *event_wait_list,
766 cl_event *event) {
767 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
768 return CL_INVALID_VALUE;
769 }
770
771 cl_int
772 clover::EnqueueSVMFree(cl_command_queue d_q,
773 cl_uint num_svm_pointers,
774 void *svm_pointers[],
775 void (CL_CALLBACK *pfn_free_func) (
776 cl_command_queue queue, cl_uint num_svm_pointers,
777 void *svm_pointers[], void *user_data),
778 void *user_data,
779 cl_uint num_events_in_wait_list,
780 const cl_event *event_wait_list,
781 cl_event *event,
782 cl_int cmd) try {
783
784 if (bool(num_svm_pointers) != bool(svm_pointers))
785 return CL_INVALID_VALUE;
786
787 auto &q = obj(d_q);
788 bool can_emulate = q.device().has_system_svm();
789 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
790
791 validate_common(q, deps);
792
793 std::vector<void *> svm_pointers_cpy(svm_pointers,
794 svm_pointers + num_svm_pointers);
795 if (!pfn_free_func) {
796 if (!can_emulate) {
797 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
798 return CL_INVALID_VALUE;
799 }
800 pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
801 void *svm_pointers[], void *) {
802 for (void *p : range(svm_pointers, num_svm_pointers))
803 free(p);
804 };
805 }
806
807 auto hev = create<hard_event>(q, cmd, deps,
808 [=](clover::event &) mutable {
809 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
810 user_data);
811 });
812
813 ret_object(event, hev);
814 return CL_SUCCESS;
815
816 } catch (error &e) {
817 return e.get();
818 }
819
820 CLOVER_API cl_int
821 clEnqueueSVMFree(cl_command_queue d_q,
822 cl_uint num_svm_pointers,
823 void *svm_pointers[],
824 void (CL_CALLBACK *pfn_free_func) (
825 cl_command_queue queue, cl_uint num_svm_pointers,
826 void *svm_pointers[], void *user_data),
827 void *user_data,
828 cl_uint num_events_in_wait_list,
829 const cl_event *event_wait_list,
830 cl_event *event) {
831
832 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
833 pfn_free_func, user_data, num_events_in_wait_list,
834 event_wait_list, event, CL_COMMAND_SVM_FREE);
835 }
836
837 cl_int
838 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
839 cl_bool blocking_copy,
840 void *dst_ptr,
841 const void *src_ptr,
842 size_t size,
843 cl_uint num_events_in_wait_list,
844 const cl_event *event_wait_list,
845 cl_event *event,
846 cl_int cmd) try {
847
848 if (dst_ptr == nullptr || src_ptr == nullptr)
849 return CL_INVALID_VALUE;
850
851 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
852 reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
853 return CL_MEM_COPY_OVERLAP;
854
855 auto &q = obj(d_q);
856 bool can_emulate = q.device().has_system_svm();
857 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
858
859 validate_common(q, deps);
860
861 if (can_emulate) {
862 auto hev = create<hard_event>(q, cmd, deps,
863 [=](clover::event &) {
864 memcpy(dst_ptr, src_ptr, size);
865 });
866
867 if (blocking_copy)
868 hev().wait();
869 ret_object(event, hev);
870 return CL_SUCCESS;
871 }
872
873 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
874 return CL_INVALID_VALUE;
875
876 } catch (error &e) {
877 return e.get();
878 }
879
880 CLOVER_API cl_int
881 clEnqueueSVMMemcpy(cl_command_queue d_q,
882 cl_bool blocking_copy,
883 void *dst_ptr,
884 const void *src_ptr,
885 size_t size,
886 cl_uint num_events_in_wait_list,
887 const cl_event *event_wait_list,
888 cl_event *event) {
889
890 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
891 size, num_events_in_wait_list, event_wait_list,
892 event, CL_COMMAND_SVM_MEMCPY);
893 }
894
895 cl_int
896 clover::EnqueueSVMMemFill(cl_command_queue d_q,
897 void *svm_ptr,
898 const void *pattern,
899 size_t pattern_size,
900 size_t size,
901 cl_uint num_events_in_wait_list,
902 const cl_event *event_wait_list,
903 cl_event *event,
904 cl_int cmd) try {
905
906 if (svm_ptr == nullptr || pattern == nullptr ||
907 !util_is_power_of_two_nonzero(pattern_size) ||
908 pattern_size > 128 ||
909 !ptr_is_aligned(svm_ptr, pattern_size) ||
910 size % pattern_size)
911 return CL_INVALID_VALUE;
912
913 auto &q = obj(d_q);
914 bool can_emulate = q.device().has_system_svm();
915 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
916
917 validate_common(q, deps);
918
919 if (can_emulate) {
920 auto hev = create<hard_event>(q, cmd, deps,
921 [=](clover::event &) {
922 void *ptr = svm_ptr;
923 for (size_t s = size; s; s -= pattern_size) {
924 memcpy(ptr, pattern, pattern_size);
925 ptr = static_cast<uint8_t*>(ptr) + pattern_size;
926 }
927 });
928
929 ret_object(event, hev);
930 return CL_SUCCESS;
931 }
932
933 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
934 return CL_INVALID_VALUE;
935
936 } catch (error &e) {
937 return e.get();
938 }
939
940 CLOVER_API cl_int
941 clEnqueueSVMMemFill(cl_command_queue d_q,
942 void *svm_ptr,
943 const void *pattern,
944 size_t pattern_size,
945 size_t size,
946 cl_uint num_events_in_wait_list,
947 const cl_event *event_wait_list,
948 cl_event *event) {
949
950 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
951 size, num_events_in_wait_list, event_wait_list,
952 event, CL_COMMAND_SVM_MEMFILL);
953 }
954
955 cl_int
956 clover::EnqueueSVMMap(cl_command_queue d_q,
957 cl_bool blocking_map,
958 cl_map_flags map_flags,
959 void *svm_ptr,
960 size_t size,
961 cl_uint num_events_in_wait_list,
962 const cl_event *event_wait_list,
963 cl_event *event,
964 cl_int cmd) try {
965
966 if (svm_ptr == nullptr || size == 0)
967 return CL_INVALID_VALUE;
968
969 auto &q = obj(d_q);
970 bool can_emulate = q.device().has_system_svm();
971 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
972
973 validate_common(q, deps);
974
975 if (can_emulate) {
976 auto hev = create<hard_event>(q, cmd, deps,
977 [](clover::event &) { });
978
979 ret_object(event, hev);
980 return CL_SUCCESS;
981 }
982
983 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
984 return CL_INVALID_VALUE;
985
986 } catch (error &e) {
987 return e.get();
988 }
989
990 CLOVER_API cl_int
991 clEnqueueSVMMap(cl_command_queue d_q,
992 cl_bool blocking_map,
993 cl_map_flags map_flags,
994 void *svm_ptr,
995 size_t size,
996 cl_uint num_events_in_wait_list,
997 const cl_event *event_wait_list,
998 cl_event *event) {
999
1000 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1001 num_events_in_wait_list, event_wait_list, event,
1002 CL_COMMAND_SVM_MAP);
1003 }
1004
1005 cl_int
1006 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1007 void *svm_ptr,
1008 cl_uint num_events_in_wait_list,
1009 const cl_event *event_wait_list,
1010 cl_event *event,
1011 cl_int cmd) try {
1012
1013 if (svm_ptr == nullptr)
1014 return CL_INVALID_VALUE;
1015
1016 auto &q = obj(d_q);
1017 bool can_emulate = q.device().has_system_svm();
1018 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1019
1020 validate_common(q, deps);
1021
1022 if (can_emulate) {
1023 auto hev = create<hard_event>(q, cmd, deps,
1024 [](clover::event &) { });
1025
1026 ret_object(event, hev);
1027 return CL_SUCCESS;
1028 }
1029
1030 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1031 return CL_INVALID_VALUE;
1032
1033 } catch (error &e) {
1034 return e.get();
1035 }
1036
1037 CLOVER_API cl_int
1038 clEnqueueSVMUnmap(cl_command_queue d_q,
1039 void *svm_ptr,
1040 cl_uint num_events_in_wait_list,
1041 const cl_event *event_wait_list,
1042 cl_event *event) {
1043
1044 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1045 event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1046 }
1047
1048 CLOVER_API cl_int
1049 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1050 cl_uint num_svm_pointers,
1051 const void **svm_pointers,
1052 const size_t *sizes,
1053 const cl_mem_migration_flags flags,
1054 cl_uint num_events_in_wait_list,
1055 const cl_event *event_wait_list,
1056 cl_event *event) {
1057 CLOVER_NOT_SUPPORTED_UNTIL("2.1");
1058 return CL_INVALID_VALUE;
1059 }