2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
27 using namespace clover
;
30 clCreateKernel(cl_program d_prog
, const char *name
, cl_int
*r_errcode
) try {
31 auto &prog
= obj(d_prog
);
34 throw error(CL_INVALID_VALUE
);
36 auto &sym
= find(name_equals(name
), prog
.symbols());
38 ret_error(r_errcode
, CL_SUCCESS
);
39 return new kernel(prog
, name
, range(sym
.args
));
41 } catch (std::out_of_range
&e
) {
42 ret_error(r_errcode
, CL_INVALID_KERNEL_NAME
);
46 ret_error(r_errcode
, e
);
51 clCreateKernelsInProgram(cl_program d_prog
, cl_uint count
,
52 cl_kernel
*rd_kerns
, cl_uint
*r_count
) try {
53 auto &prog
= obj(d_prog
);
54 auto &syms
= prog
.symbols();
56 if (rd_kerns
&& count
< syms
.size())
57 throw error(CL_INVALID_VALUE
);
60 copy(map([&](const module::symbol
&sym
) {
61 return desc(new kernel(prog
, compat::string(sym
.name
),
67 *r_count
= syms
.size();
76 clRetainKernel(cl_kernel d_kern
) try {
85 clReleaseKernel(cl_kernel d_kern
) try {
86 if (obj(d_kern
).release())
96 clSetKernelArg(cl_kernel d_kern
, cl_uint idx
, size_t size
,
97 const void *value
) try {
98 obj(d_kern
).args().at(idx
).set(size
, value
);
101 } catch (std::out_of_range
&e
) {
102 return CL_INVALID_ARG_INDEX
;
109 clGetKernelInfo(cl_kernel d_kern
, cl_kernel_info param
,
110 size_t size
, void *r_buf
, size_t *r_size
) try {
111 property_buffer buf
{ r_buf
, size
, r_size
};
112 auto &kern
= obj(d_kern
);
115 case CL_KERNEL_FUNCTION_NAME
:
116 buf
.as_string() = kern
.name();
119 case CL_KERNEL_NUM_ARGS
:
120 buf
.as_scalar
<cl_uint
>() = kern
.args().size();
123 case CL_KERNEL_REFERENCE_COUNT
:
124 buf
.as_scalar
<cl_uint
>() = kern
.ref_count();
127 case CL_KERNEL_CONTEXT
:
128 buf
.as_scalar
<cl_context
>() = desc(kern
.prog
.ctx
);
131 case CL_KERNEL_PROGRAM
:
132 buf
.as_scalar
<cl_program
>() = desc(kern
.prog
);
136 throw error(CL_INVALID_VALUE
);
146 clGetKernelWorkGroupInfo(cl_kernel d_kern
, cl_device_id d_dev
,
147 cl_kernel_work_group_info param
,
148 size_t size
, void *r_buf
, size_t *r_size
) try {
149 property_buffer buf
{ r_buf
, size
, r_size
};
150 auto &kern
= obj(d_kern
);
151 auto &dev
= (d_dev
? *pobj(d_dev
) : unique(kern
.prog
.devices()));
153 if (!count(dev
, kern
.prog
.devices()))
154 throw error(CL_INVALID_DEVICE
);
157 case CL_KERNEL_WORK_GROUP_SIZE
:
158 buf
.as_scalar
<size_t>() = dev
.max_threads_per_block();
161 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE
:
162 buf
.as_vector
<size_t>() = kern
.block_size();
165 case CL_KERNEL_LOCAL_MEM_SIZE
:
166 buf
.as_scalar
<cl_ulong
>() = kern
.mem_local();
169 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
:
170 buf
.as_scalar
<size_t>() = 1;
173 case CL_KERNEL_PRIVATE_MEM_SIZE
:
174 buf
.as_scalar
<cl_ulong
>() = kern
.mem_private();
178 throw error(CL_INVALID_VALUE
);
186 } catch (std::out_of_range
&e
) {
187 return CL_INVALID_DEVICE
;
192 /// Common argument checking shared by kernel invocation commands.
195 validate_common(const command_queue
&q
, kernel
&kern
,
196 const ref_vector
<event
> &deps
) {
197 if (kern
.prog
.ctx
!= q
.ctx
||
198 any_of([&](const event
&ev
) {
199 return ev
.ctx
!= q
.ctx
;
201 throw error(CL_INVALID_CONTEXT
);
203 if (any_of([](kernel::argument
&arg
) {
206 throw error(CL_INVALID_KERNEL_ARGS
);
208 if (!count(q
.dev
, kern
.prog
.devices()))
209 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
213 validate_grid_size(const command_queue
&q
, cl_uint dims
,
214 const size_t *d_grid_size
) {
215 auto grid_size
= range(d_grid_size
, dims
);
217 if (dims
< 1 || dims
> q
.dev
.max_block_size().size())
218 throw error(CL_INVALID_WORK_DIMENSION
);
220 if (!d_grid_size
|| any_of(is_zero(), grid_size
))
221 throw error(CL_INVALID_GLOBAL_WORK_SIZE
);
224 auto block_size
= range(d_block_size
, dims
);
226 if (any_of(is_zero(), block_size
) ||
227 any_of(greater(), block_size
, q
.dev
.max_block_size()))
228 throw error(CL_INVALID_WORK_ITEM_SIZE
);
230 if (any_of(modulus(), grid_size
, block_size
))
231 throw error(CL_INVALID_WORK_GROUP_SIZE
);
233 if (fold(multiplies(), 1u, block_size
) >
234 q
.dev
.max_threads_per_block())
235 throw error(CL_INVALID_WORK_GROUP_SIZE
);
240 pad_vector(const size_t *p
, unsigned n
, size_t x
) {
249 clEnqueueNDRangeKernel(cl_command_queue d_q
, cl_kernel d_kern
,
250 cl_uint dims
, const size_t *d_grid_offset
,
251 const size_t *d_grid_size
, const size_t *d_block_size
,
252 cl_uint num_deps
, const cl_event
*d_deps
,
253 cl_event
*rd_ev
) try {
255 auto &kern
= obj(d_kern
);
256 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
258 validate_common(q
, kern
, deps
);
259 validate_grid(q
, dims
, d_grid_size
, d_block_size
);
261 auto grid_offset
= pad_vector(d_grid_offset
, dims
, 0);
262 auto grid_size
= pad_vector(d_grid_size
, dims
, 1);
263 auto block_size
= pad_vector(d_block_size
, dims
, 1);
264 hard_event
*hev
= new hard_event(
265 q
, CL_COMMAND_NDRANGE_KERNEL
, deps
,
266 [=, &kern
, &q
](event
&) {
267 kern
.launch(q
, grid_offset
, grid_size
, block_size
);
270 ret_object(rd_ev
, hev
);
278 clEnqueueTask(cl_command_queue d_q
, cl_kernel d_kern
,
279 cl_uint num_deps
, const cl_event
*d_deps
,
280 cl_event
*rd_ev
) try {
282 auto &kern
= obj(d_kern
);
283 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
285 validate_common(q
, kern
, deps
);
287 hard_event
*hev
= new hard_event(
288 q
, CL_COMMAND_TASK
, deps
,
289 [=, &kern
, &q
](event
&) {
290 kern
.launch(q
, { 0 }, { 1 }, { 1 });
293 ret_object(rd_ev
, hev
);
301 clEnqueueNativeKernel(cl_command_queue d_q
, void (*func
)(void *),
302 void *args
, size_t args_size
,
303 cl_uint num_mems
, const cl_mem
*d_mems
,
304 const void **mem_handles
, cl_uint num_deps
,
305 const cl_event
*d_deps
, cl_event
*rd_ev
) {
306 return CL_INVALID_OPERATION
;