2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
27 using namespace clover
;
30 clCreateKernel(cl_program d_prog
, const char *name
, cl_int
*r_errcode
) try {
31 auto &prog
= obj(d_prog
);
34 throw error(CL_INVALID_VALUE
);
36 auto &sym
= find(name_equals(name
), prog
.symbols());
38 ret_error(r_errcode
, CL_SUCCESS
);
39 return new kernel(prog
, name
, range(sym
.args
));
41 } catch (std::out_of_range
&e
) {
42 ret_error(r_errcode
, CL_INVALID_KERNEL_NAME
);
46 ret_error(r_errcode
, e
);
51 clCreateKernelsInProgram(cl_program d_prog
, cl_uint count
,
52 cl_kernel
*rd_kerns
, cl_uint
*r_count
) try {
53 auto &prog
= obj(d_prog
);
54 auto &syms
= prog
.symbols();
56 if (rd_kerns
&& count
< syms
.size())
57 throw error(CL_INVALID_VALUE
);
60 copy(map([&](const module::symbol
&sym
) {
61 return desc(new kernel(prog
,
62 std::string(sym
.name
.begin(),
69 *r_count
= syms
.size();
78 clRetainKernel(cl_kernel d_kern
) try {
87 clReleaseKernel(cl_kernel d_kern
) try {
88 if (obj(d_kern
).release())
98 clSetKernelArg(cl_kernel d_kern
, cl_uint idx
, size_t size
,
99 const void *value
) try {
100 obj(d_kern
).args().at(idx
).set(size
, value
);
103 } catch (std::out_of_range
&e
) {
104 return CL_INVALID_ARG_INDEX
;
111 clGetKernelInfo(cl_kernel d_kern
, cl_kernel_info param
,
112 size_t size
, void *r_buf
, size_t *r_size
) try {
113 property_buffer buf
{ r_buf
, size
, r_size
};
114 auto &kern
= obj(d_kern
);
117 case CL_KERNEL_FUNCTION_NAME
:
118 buf
.as_string() = kern
.name();
121 case CL_KERNEL_NUM_ARGS
:
122 buf
.as_scalar
<cl_uint
>() = kern
.args().size();
125 case CL_KERNEL_REFERENCE_COUNT
:
126 buf
.as_scalar
<cl_uint
>() = kern
.ref_count();
129 case CL_KERNEL_CONTEXT
:
130 buf
.as_scalar
<cl_context
>() = desc(kern
.program().context());
133 case CL_KERNEL_PROGRAM
:
134 buf
.as_scalar
<cl_program
>() = desc(kern
.program());
138 throw error(CL_INVALID_VALUE
);
148 clGetKernelWorkGroupInfo(cl_kernel d_kern
, cl_device_id d_dev
,
149 cl_kernel_work_group_info param
,
150 size_t size
, void *r_buf
, size_t *r_size
) try {
151 property_buffer buf
{ r_buf
, size
, r_size
};
152 auto &kern
= obj(d_kern
);
153 auto &dev
= (d_dev
? *pobj(d_dev
) : unique(kern
.program().devices()));
155 if (!count(dev
, kern
.program().devices()))
156 throw error(CL_INVALID_DEVICE
);
159 case CL_KERNEL_WORK_GROUP_SIZE
:
160 buf
.as_scalar
<size_t>() = dev
.max_threads_per_block();
163 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE
:
164 buf
.as_vector
<size_t>() = kern
.required_block_size();
167 case CL_KERNEL_LOCAL_MEM_SIZE
:
168 buf
.as_scalar
<cl_ulong
>() = kern
.mem_local();
171 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
:
172 buf
.as_scalar
<size_t>() = dev
.subgroup_size();
175 case CL_KERNEL_PRIVATE_MEM_SIZE
:
176 buf
.as_scalar
<cl_ulong
>() = kern
.mem_private();
180 throw error(CL_INVALID_VALUE
);
188 } catch (std::out_of_range
&e
) {
189 return CL_INVALID_DEVICE
;
193 clGetKernelArgInfo(cl_kernel d_kern
,
194 cl_uint idx
, cl_kernel_arg_info param
,
195 size_t size
, void *r_buf
, size_t *r_size
) {
196 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
197 return CL_KERNEL_ARG_INFO_NOT_AVAILABLE
;
202 /// Common argument checking shared by kernel invocation commands.
205 validate_common(const command_queue
&q
, kernel
&kern
,
206 const ref_vector
<event
> &deps
) {
207 if (kern
.program().context() != q
.context() ||
208 any_of([&](const event
&ev
) {
209 return ev
.context() != q
.context();
211 throw error(CL_INVALID_CONTEXT
);
213 if (any_of([](kernel::argument
&arg
) {
216 throw error(CL_INVALID_KERNEL_ARGS
);
218 if (!count(q
.device(), kern
.program().devices()))
219 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
223 validate_grid_size(const command_queue
&q
, cl_uint dims
,
224 const size_t *d_grid_size
) {
225 auto grid_size
= range(d_grid_size
, dims
);
227 if (dims
< 1 || dims
> q
.device().max_block_size().size())
228 throw error(CL_INVALID_WORK_DIMENSION
);
230 if (!d_grid_size
|| any_of(is_zero(), grid_size
))
231 throw error(CL_INVALID_GLOBAL_WORK_SIZE
);
237 validate_grid_offset(const command_queue
&q
, cl_uint dims
,
238 const size_t *d_grid_offset
) {
240 return range(d_grid_offset
, dims
);
242 return std::vector
<size_t>(dims
, 0);
246 validate_block_size(const command_queue
&q
, const kernel
&kern
,
247 cl_uint dims
, const size_t *d_grid_size
,
248 const size_t *d_block_size
) {
249 auto grid_size
= range(d_grid_size
, dims
);
252 auto block_size
= range(d_block_size
, dims
);
254 if (any_of(is_zero(), block_size
) ||
255 any_of(greater(), block_size
, q
.device().max_block_size()))
256 throw error(CL_INVALID_WORK_ITEM_SIZE
);
258 if (any_of(modulus(), grid_size
, block_size
))
259 throw error(CL_INVALID_WORK_GROUP_SIZE
);
261 if (fold(multiplies(), 1u, block_size
) >
262 q
.device().max_threads_per_block())
263 throw error(CL_INVALID_WORK_GROUP_SIZE
);
268 return kern
.optimal_block_size(q
, grid_size
);
274 clEnqueueNDRangeKernel(cl_command_queue d_q
, cl_kernel d_kern
,
275 cl_uint dims
, const size_t *d_grid_offset
,
276 const size_t *d_grid_size
, const size_t *d_block_size
,
277 cl_uint num_deps
, const cl_event
*d_deps
,
278 cl_event
*rd_ev
) try {
280 auto &kern
= obj(d_kern
);
281 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
282 auto grid_size
= validate_grid_size(q
, dims
, d_grid_size
);
283 auto grid_offset
= validate_grid_offset(q
, dims
, d_grid_offset
);
284 auto block_size
= validate_block_size(q
, kern
, dims
,
285 d_grid_size
, d_block_size
);
287 validate_common(q
, kern
, deps
);
289 auto hev
= create
<hard_event
>(
290 q
, CL_COMMAND_NDRANGE_KERNEL
, deps
,
291 [=, &kern
, &q
](event
&) {
292 kern
.launch(q
, grid_offset
, grid_size
, block_size
);
295 ret_object(rd_ev
, hev
);
303 clEnqueueTask(cl_command_queue d_q
, cl_kernel d_kern
,
304 cl_uint num_deps
, const cl_event
*d_deps
,
305 cl_event
*rd_ev
) try {
307 auto &kern
= obj(d_kern
);
308 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
310 validate_common(q
, kern
, deps
);
312 auto hev
= create
<hard_event
>(
313 q
, CL_COMMAND_TASK
, deps
,
314 [=, &kern
, &q
](event
&) {
315 kern
.launch(q
, { 0 }, { 1 }, { 1 });
318 ret_object(rd_ev
, hev
);
326 clEnqueueNativeKernel(cl_command_queue d_q
, void (*func
)(void *),
327 void *args
, size_t args_size
,
328 cl_uint num_mems
, const cl_mem
*d_mems
,
329 const void **mem_handles
, cl_uint num_deps
,
330 const cl_event
*d_deps
, cl_event
*rd_ev
) {
331 return CL_INVALID_OPERATION
;