2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
27 using namespace clover
;
30 clCreateKernel(cl_program prog
, const char *name
,
31 cl_int
*errcode_ret
) try {
33 throw error(CL_INVALID_PROGRAM
);
36 throw error(CL_INVALID_VALUE
);
38 if (prog
->binaries().empty())
39 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
41 auto sym
= prog
->binaries().begin()->second
.sym(name
);
43 ret_error(errcode_ret
, CL_SUCCESS
);
44 return new kernel(*prog
, name
, { sym
.args
.begin(), sym
.args
.end() });
46 } catch (module::noent_error
&e
) {
47 ret_error(errcode_ret
, CL_INVALID_KERNEL_NAME
);
51 ret_error(errcode_ret
, e
);
56 clCreateKernelsInProgram(cl_program prog
, cl_uint count
,
57 cl_kernel
*kerns
, cl_uint
*count_ret
) {
59 throw error(CL_INVALID_PROGRAM
);
61 if (prog
->binaries().empty())
62 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
64 auto &syms
= prog
->binaries().begin()->second
.syms
;
66 if (kerns
&& count
< syms
.size())
67 throw error(CL_INVALID_VALUE
);
70 std::transform(syms
.begin(), syms
.end(), kerns
,
71 [=](const module::symbol
&sym
) {
72 return new kernel(*prog
, compat::string(sym
.name
),
73 { sym
.args
.begin(), sym
.args
.end() });
77 *count_ret
= syms
.size();
83 clRetainKernel(cl_kernel kern
) {
85 return CL_INVALID_KERNEL
;
92 clReleaseKernel(cl_kernel kern
) {
94 return CL_INVALID_KERNEL
;
103 clSetKernelArg(cl_kernel kern
, cl_uint idx
, size_t size
,
104 const void *value
) try {
106 throw error(CL_INVALID_KERNEL
);
108 if (idx
>= kern
->args
.size())
109 throw error(CL_INVALID_ARG_INDEX
);
111 kern
->args
[idx
]->set(size
, value
);
120 clGetKernelInfo(cl_kernel kern
, cl_kernel_info param
,
121 size_t size
, void *buf
, size_t *size_ret
) {
123 return CL_INVALID_KERNEL
;
126 case CL_KERNEL_FUNCTION_NAME
:
127 return string_property(buf
, size
, size_ret
, kern
->name());
129 case CL_KERNEL_NUM_ARGS
:
130 return scalar_property
<cl_uint
>(buf
, size
, size_ret
,
133 case CL_KERNEL_REFERENCE_COUNT
:
134 return scalar_property
<cl_uint
>(buf
, size
, size_ret
,
137 case CL_KERNEL_CONTEXT
:
138 return scalar_property
<cl_context
>(buf
, size
, size_ret
,
141 case CL_KERNEL_PROGRAM
:
142 return scalar_property
<cl_program
>(buf
, size
, size_ret
,
146 return CL_INVALID_VALUE
;
151 clGetKernelWorkGroupInfo(cl_kernel kern
, cl_device_id dev
,
152 cl_kernel_work_group_info param
,
153 size_t size
, void *buf
, size_t *size_ret
) {
155 return CL_INVALID_KERNEL
;
157 if ((!dev
&& kern
->prog
.binaries().size() != 1) ||
158 (dev
&& !kern
->prog
.binaries().count(dev
)))
159 return CL_INVALID_DEVICE
;
162 case CL_KERNEL_WORK_GROUP_SIZE
:
163 return scalar_property
<size_t>(buf
, size
, size_ret
,
164 kern
->max_block_size());
166 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE
:
167 return vector_property
<size_t>(buf
, size
, size_ret
,
170 case CL_KERNEL_LOCAL_MEM_SIZE
:
171 return scalar_property
<cl_ulong
>(buf
, size
, size_ret
,
174 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
:
175 return scalar_property
<size_t>(buf
, size
, size_ret
, 1);
177 case CL_KERNEL_PRIVATE_MEM_SIZE
:
178 return scalar_property
<cl_ulong
>(buf
, size
, size_ret
,
179 kern
->mem_private());
182 return CL_INVALID_VALUE
;
188 /// Common argument checking shared by kernel invocation commands.
191 kernel_validate(cl_command_queue q
, cl_kernel kern
,
192 cl_uint dims
, const size_t *grid_offset
,
193 const size_t *grid_size
, const size_t *block_size
,
194 cl_uint num_deps
, const cl_event
*deps
,
197 throw error(CL_INVALID_COMMAND_QUEUE
);
200 throw error(CL_INVALID_KERNEL
);
202 if (&kern
->prog
.ctx
!= &q
->ctx
||
203 any_of([&](const cl_event ev
) {
204 return &ev
->ctx
!= &q
->ctx
;
205 }, deps
, deps
+ num_deps
))
206 throw error(CL_INVALID_CONTEXT
);
208 if (bool(num_deps
) != bool(deps
) ||
209 any_of(is_zero
<cl_event
>, deps
, deps
+ num_deps
))
210 throw error(CL_INVALID_EVENT_WAIT_LIST
);
212 if (any_of([](std::unique_ptr
<kernel::argument
> &arg
) {
214 }, kern
->args
.begin(), kern
->args
.end()))
215 throw error(CL_INVALID_KERNEL_ARGS
);
217 if (!kern
->prog
.binaries().count(&q
->dev
))
218 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
220 if (dims
< 1 || dims
> q
->dev
.max_block_size().size())
221 throw error(CL_INVALID_WORK_DIMENSION
);
223 if (!grid_size
|| any_of(is_zero
<size_t>, grid_size
, grid_size
+ dims
))
224 throw error(CL_INVALID_GLOBAL_WORK_SIZE
);
227 if (any_of([](size_t b
, size_t max
) {
228 return b
== 0 || b
> max
;
229 }, block_size
, block_size
+ dims
,
230 q
->dev
.max_block_size().begin()))
231 throw error(CL_INVALID_WORK_ITEM_SIZE
);
233 if (any_of([](size_t b
, size_t g
) {
235 }, block_size
, block_size
+ dims
, grid_size
))
236 throw error(CL_INVALID_WORK_GROUP_SIZE
);
238 if (fold(std::multiplies
<size_t>(), 1u,
239 block_size
, block_size
+ dims
) >
240 q
->dev
.max_threads_per_block())
241 throw error(CL_INVALID_WORK_GROUP_SIZE
);
246 /// Common event action shared by kernel invocation commands.
248 std::function
<void (event
&)>
249 kernel_op(cl_command_queue q
, cl_kernel kern
,
250 const std::vector
<size_t> &grid_offset
,
251 const std::vector
<size_t> &grid_size
,
252 const std::vector
<size_t> &block_size
) {
253 const std::vector
<size_t> reduced_grid_size
= map(
254 std::divides
<size_t>(), grid_size
.begin(), grid_size
.end(),
257 return [=](event
&) {
258 kern
->launch(*q
, grid_offset
, reduced_grid_size
, block_size
);
263 opt_vector(const size_t *p
, unsigned n
, size_t x
) {
272 clEnqueueNDRangeKernel(cl_command_queue q
, cl_kernel kern
,
273 cl_uint dims
, const size_t *pgrid_offset
,
274 const size_t *pgrid_size
, const size_t *pblock_size
,
275 cl_uint num_deps
, const cl_event
*deps
,
277 auto grid_offset
= opt_vector(pgrid_offset
, dims
, 0);
278 auto grid_size
= opt_vector(pgrid_size
, dims
, 1);
279 auto block_size
= opt_vector(pblock_size
, dims
, 1);
281 kernel_validate(q
, kern
, dims
, pgrid_offset
, pgrid_size
, pblock_size
,
284 hard_event
*hev
= new hard_event(
285 *q
, CL_COMMAND_NDRANGE_KERNEL
, { deps
, deps
+ num_deps
},
286 kernel_op(q
, kern
, grid_offset
, grid_size
, block_size
));
296 clEnqueueTask(cl_command_queue q
, cl_kernel kern
,
297 cl_uint num_deps
, const cl_event
*deps
,
299 const std::vector
<size_t> grid_offset
= { 0 };
300 const std::vector
<size_t> grid_size
= { 1 };
301 const std::vector
<size_t> block_size
= { 1 };
303 kernel_validate(q
, kern
, 1, grid_offset
.data(), grid_size
.data(),
304 block_size
.data(), num_deps
, deps
, ev
);
306 hard_event
*hev
= new hard_event(
307 *q
, CL_COMMAND_TASK
, { deps
, deps
+ num_deps
},
308 kernel_op(q
, kern
, grid_offset
, grid_size
, block_size
));
318 clEnqueueNativeKernel(cl_command_queue q
, void (*func
)(void *),
319 void *args
, size_t args_size
,
320 cl_uint obj_count
, const cl_mem
*obj_list
,
321 const void **obj_args
, cl_uint num_deps
,
322 const cl_event
*deps
, cl_event
*ev
) {
323 return CL_INVALID_OPERATION
;