2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
27 using namespace clover
;
30 clCreateKernel(cl_program prog
, const char *name
,
31 cl_int
*errcode_ret
) try {
33 throw error(CL_INVALID_PROGRAM
);
36 throw error(CL_INVALID_VALUE
);
38 if (prog
->binaries().empty())
39 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
41 auto sym
= prog
->binaries().begin()->second
.sym(name
);
43 ret_error(errcode_ret
, CL_SUCCESS
);
44 return new kernel(*prog
, name
, { sym
.args
.begin(), sym
.args
.end() });
46 } catch (module::noent_error
&e
) {
47 ret_error(errcode_ret
, CL_INVALID_KERNEL_NAME
);
51 ret_error(errcode_ret
, e
);
56 clCreateKernelsInProgram(cl_program prog
, cl_uint count
,
57 cl_kernel
*kerns
, cl_uint
*count_ret
) {
59 throw error(CL_INVALID_PROGRAM
);
61 if (prog
->binaries().empty())
62 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
64 auto &syms
= prog
->binaries().begin()->second
.syms
;
66 if (kerns
&& count
< syms
.size())
67 throw error(CL_INVALID_VALUE
);
70 std::transform(syms
.begin(), syms
.end(), kerns
,
71 [=](const module::symbol
&sym
) {
72 return new kernel(*prog
, compat::string(sym
.name
),
73 { sym
.args
.begin(), sym
.args
.end() });
77 *count_ret
= syms
.size();
83 clRetainKernel(cl_kernel kern
) {
85 return CL_INVALID_KERNEL
;
92 clReleaseKernel(cl_kernel kern
) {
94 return CL_INVALID_KERNEL
;
103 clSetKernelArg(cl_kernel kern
, cl_uint idx
, size_t size
,
104 const void *value
) try {
106 throw error(CL_INVALID_KERNEL
);
108 if (idx
>= kern
->args
.size())
109 throw error(CL_INVALID_ARG_INDEX
);
111 kern
->args
[idx
]->set(size
, value
);
120 clGetKernelInfo(cl_kernel kern
, cl_kernel_info param
,
121 size_t size
, void *r_buf
, size_t *r_size
) try {
122 property_buffer buf
{ r_buf
, size
, r_size
};
125 return CL_INVALID_KERNEL
;
128 case CL_KERNEL_FUNCTION_NAME
:
129 buf
.as_string() = kern
->name();
132 case CL_KERNEL_NUM_ARGS
:
133 buf
.as_scalar
<cl_uint
>() = kern
->args
.size();
136 case CL_KERNEL_REFERENCE_COUNT
:
137 buf
.as_scalar
<cl_uint
>() = kern
->ref_count();
140 case CL_KERNEL_CONTEXT
:
141 buf
.as_scalar
<cl_context
>() = &kern
->prog
.ctx
;
144 case CL_KERNEL_PROGRAM
:
145 buf
.as_scalar
<cl_program
>() = &kern
->prog
;
149 throw error(CL_INVALID_VALUE
);
159 clGetKernelWorkGroupInfo(cl_kernel kern
, cl_device_id dev
,
160 cl_kernel_work_group_info param
,
161 size_t size
, void *r_buf
, size_t *r_size
) try {
162 property_buffer buf
{ r_buf
, size
, r_size
};
165 return CL_INVALID_KERNEL
;
167 if ((!dev
&& kern
->prog
.binaries().size() != 1) ||
168 (dev
&& !kern
->prog
.binaries().count(pobj(dev
))))
169 return CL_INVALID_DEVICE
;
172 case CL_KERNEL_WORK_GROUP_SIZE
:
173 buf
.as_scalar
<size_t>() = kern
->max_block_size();
176 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE
:
177 buf
.as_vector
<size_t>() = kern
->block_size();
180 case CL_KERNEL_LOCAL_MEM_SIZE
:
181 buf
.as_scalar
<cl_ulong
>() = kern
->mem_local();
184 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
:
185 buf
.as_scalar
<size_t>() = 1;
188 case CL_KERNEL_PRIVATE_MEM_SIZE
:
189 buf
.as_scalar
<cl_ulong
>() = kern
->mem_private();
193 throw error(CL_INVALID_VALUE
);
204 /// Common argument checking shared by kernel invocation commands.
207 kernel_validate(cl_command_queue q
, cl_kernel kern
,
208 cl_uint dims
, const size_t *grid_offset
,
209 const size_t *grid_size
, const size_t *block_size
,
210 cl_uint num_deps
, const cl_event
*deps
,
213 throw error(CL_INVALID_COMMAND_QUEUE
);
216 throw error(CL_INVALID_KERNEL
);
218 if (&kern
->prog
.ctx
!= &q
->ctx
||
219 any_of([&](const cl_event ev
) {
220 return &obj(ev
).ctx
!= &q
->ctx
;
221 }, range(deps
, num_deps
)))
222 throw error(CL_INVALID_CONTEXT
);
224 if (bool(num_deps
) != bool(deps
) ||
225 any_of(is_zero(), range(deps
, num_deps
)))
226 throw error(CL_INVALID_EVENT_WAIT_LIST
);
228 if (any_of([](std::unique_ptr
<kernel::argument
> &arg
) {
231 throw error(CL_INVALID_KERNEL_ARGS
);
233 if (!kern
->prog
.binaries().count(&q
->dev
))
234 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
236 if (dims
< 1 || dims
> q
->dev
.max_block_size().size())
237 throw error(CL_INVALID_WORK_DIMENSION
);
239 if (!grid_size
|| any_of(is_zero(), range(grid_size
, dims
)))
240 throw error(CL_INVALID_GLOBAL_WORK_SIZE
);
243 if (any_of([](size_t b
, size_t max
) {
244 return b
== 0 || b
> max
;
245 }, range(block_size
, dims
),
246 q
->dev
.max_block_size()))
247 throw error(CL_INVALID_WORK_ITEM_SIZE
);
249 if (any_of(modulus(), range(grid_size
, dims
),
250 range(block_size
, dims
)))
251 throw error(CL_INVALID_WORK_GROUP_SIZE
);
253 if (fold(multiplies(), 1u, range(block_size
, dims
)) >
254 q
->dev
.max_threads_per_block())
255 throw error(CL_INVALID_WORK_GROUP_SIZE
);
260 /// Common event action shared by kernel invocation commands.
262 std::function
<void (event
&)>
263 kernel_op(cl_command_queue q
, cl_kernel kern
,
264 const std::vector
<size_t> &grid_offset
,
265 const std::vector
<size_t> &grid_size
,
266 const std::vector
<size_t> &block_size
) {
267 const std::vector
<size_t> reduced_grid_size
=
268 map(divides(), grid_size
, block_size
);
270 return [=](event
&) {
271 kern
->launch(*q
, grid_offset
, reduced_grid_size
, block_size
);
276 opt_vector(const size_t *p
, unsigned n
, size_t x
) {
285 clEnqueueNDRangeKernel(cl_command_queue q
, cl_kernel kern
,
286 cl_uint dims
, const size_t *pgrid_offset
,
287 const size_t *pgrid_size
, const size_t *pblock_size
,
288 cl_uint num_deps
, const cl_event
*d_deps
,
290 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
291 auto grid_offset
= opt_vector(pgrid_offset
, dims
, 0);
292 auto grid_size
= opt_vector(pgrid_size
, dims
, 1);
293 auto block_size
= opt_vector(pblock_size
, dims
, 1);
295 kernel_validate(q
, kern
, dims
, pgrid_offset
, pgrid_size
, pblock_size
,
296 num_deps
, d_deps
, ev
);
298 hard_event
*hev
= new hard_event(
299 *q
, CL_COMMAND_NDRANGE_KERNEL
, deps
,
300 kernel_op(q
, kern
, grid_offset
, grid_size
, block_size
));
310 clEnqueueTask(cl_command_queue q
, cl_kernel kern
,
311 cl_uint num_deps
, const cl_event
*d_deps
,
313 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
314 const std::vector
<size_t> grid_offset
= { 0 };
315 const std::vector
<size_t> grid_size
= { 1 };
316 const std::vector
<size_t> block_size
= { 1 };
318 kernel_validate(q
, kern
, 1, grid_offset
.data(), grid_size
.data(),
319 block_size
.data(), num_deps
, d_deps
, ev
);
321 hard_event
*hev
= new hard_event(
322 *q
, CL_COMMAND_TASK
, deps
,
323 kernel_op(q
, kern
, grid_offset
, grid_size
, block_size
));
333 clEnqueueNativeKernel(cl_command_queue q
, void (*func
)(void *),
334 void *args
, size_t args_size
,
335 cl_uint obj_count
, const cl_mem
*obj_list
,
336 const void **obj_args
, cl_uint num_deps
,
337 const cl_event
*deps
, cl_event
*ev
) {
338 return CL_INVALID_OPERATION
;