2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
27 using namespace clover
;
30 clCreateKernel(cl_program d_prog
, const char *name
, cl_int
*r_errcode
) try {
31 auto &prog
= obj(d_prog
);
34 throw error(CL_INVALID_VALUE
);
36 auto &sym
= find(name_equals(name
), prog
.symbols());
38 ret_error(r_errcode
, CL_SUCCESS
);
39 return new kernel(prog
, name
, range(sym
.args
));
41 } catch (std::out_of_range
&e
) {
42 ret_error(r_errcode
, CL_INVALID_KERNEL_NAME
);
46 ret_error(r_errcode
, e
);
51 clCreateKernelsInProgram(cl_program d_prog
, cl_uint count
,
52 cl_kernel
*rd_kerns
, cl_uint
*r_count
) try {
53 auto &prog
= obj(d_prog
);
54 auto &syms
= prog
.symbols();
56 if (rd_kerns
&& count
< syms
.size())
57 throw error(CL_INVALID_VALUE
);
60 copy(map([&](const module::symbol
&sym
) {
61 return desc(new kernel(prog
,
62 std::string(sym
.name
.begin(),
69 *r_count
= syms
.size();
78 clRetainKernel(cl_kernel d_kern
) try {
87 clReleaseKernel(cl_kernel d_kern
) try {
88 if (obj(d_kern
).release())
98 clSetKernelArg(cl_kernel d_kern
, cl_uint idx
, size_t size
,
99 const void *value
) try {
100 obj(d_kern
).args().at(idx
).set(size
, value
);
103 } catch (std::out_of_range
&e
) {
104 return CL_INVALID_ARG_INDEX
;
111 clGetKernelInfo(cl_kernel d_kern
, cl_kernel_info param
,
112 size_t size
, void *r_buf
, size_t *r_size
) try {
113 property_buffer buf
{ r_buf
, size
, r_size
};
114 auto &kern
= obj(d_kern
);
117 case CL_KERNEL_FUNCTION_NAME
:
118 buf
.as_string() = kern
.name();
121 case CL_KERNEL_NUM_ARGS
:
122 buf
.as_scalar
<cl_uint
>() = kern
.args().size();
125 case CL_KERNEL_REFERENCE_COUNT
:
126 buf
.as_scalar
<cl_uint
>() = kern
.ref_count();
129 case CL_KERNEL_CONTEXT
:
130 buf
.as_scalar
<cl_context
>() = desc(kern
.program().context());
133 case CL_KERNEL_PROGRAM
:
134 buf
.as_scalar
<cl_program
>() = desc(kern
.program());
138 throw error(CL_INVALID_VALUE
);
148 clGetKernelWorkGroupInfo(cl_kernel d_kern
, cl_device_id d_dev
,
149 cl_kernel_work_group_info param
,
150 size_t size
, void *r_buf
, size_t *r_size
) try {
151 property_buffer buf
{ r_buf
, size
, r_size
};
152 auto &kern
= obj(d_kern
);
153 auto &dev
= (d_dev
? *pobj(d_dev
) : unique(kern
.program().devices()));
155 if (!count(dev
, kern
.program().devices()))
156 throw error(CL_INVALID_DEVICE
);
159 case CL_KERNEL_WORK_GROUP_SIZE
:
160 buf
.as_scalar
<size_t>() = dev
.max_threads_per_block();
163 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE
:
164 buf
.as_vector
<size_t>() = kern
.required_block_size();
167 case CL_KERNEL_LOCAL_MEM_SIZE
:
168 buf
.as_scalar
<cl_ulong
>() = kern
.mem_local();
171 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
:
172 buf
.as_scalar
<size_t>() = dev
.subgroup_size();
175 case CL_KERNEL_PRIVATE_MEM_SIZE
:
176 buf
.as_scalar
<cl_ulong
>() = kern
.mem_private();
180 throw error(CL_INVALID_VALUE
);
188 } catch (std::out_of_range
&e
) {
189 return CL_INVALID_DEVICE
;
193 clGetKernelArgInfo(cl_kernel d_kern
,
194 cl_uint idx
, cl_kernel_arg_info param
,
195 size_t size
, void *r_buf
, size_t *r_size
) {
196 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
197 return CL_KERNEL_ARG_INFO_NOT_AVAILABLE
;
202 /// Common argument checking shared by kernel invocation commands.
205 validate_common(const command_queue
&q
, kernel
&kern
,
206 const ref_vector
<event
> &deps
) {
207 if (kern
.program().context() != q
.context() ||
208 any_of([&](const event
&ev
) {
209 return ev
.context() != q
.context();
211 throw error(CL_INVALID_CONTEXT
);
213 if (any_of([](kernel::argument
&arg
) {
216 throw error(CL_INVALID_KERNEL_ARGS
);
218 // If the command queue's device is not associated to the program, we get
219 // a module, with no sections, which will also fail the following test.
220 auto &m
= kern
.program().build(q
.device()).binary
;
221 if (!any_of(type_equals(module::section::text_executable
), m
.secs
))
222 throw error(CL_INVALID_PROGRAM_EXECUTABLE
);
226 validate_grid_size(const command_queue
&q
, cl_uint dims
,
227 const size_t *d_grid_size
) {
228 auto grid_size
= range(d_grid_size
, dims
);
230 if (dims
< 1 || dims
> q
.device().max_block_size().size())
231 throw error(CL_INVALID_WORK_DIMENSION
);
233 if (!d_grid_size
|| any_of(is_zero(), grid_size
))
234 throw error(CL_INVALID_GLOBAL_WORK_SIZE
);
240 validate_grid_offset(const command_queue
&q
, cl_uint dims
,
241 const size_t *d_grid_offset
) {
243 return range(d_grid_offset
, dims
);
245 return std::vector
<size_t>(dims
, 0);
249 validate_block_size(const command_queue
&q
, const kernel
&kern
,
250 cl_uint dims
, const size_t *d_grid_size
,
251 const size_t *d_block_size
) {
252 auto grid_size
= range(d_grid_size
, dims
);
255 auto block_size
= range(d_block_size
, dims
);
257 if (any_of(is_zero(), block_size
) ||
258 any_of(greater(), block_size
, q
.device().max_block_size()))
259 throw error(CL_INVALID_WORK_ITEM_SIZE
);
261 if (any_of(modulus(), grid_size
, block_size
))
262 throw error(CL_INVALID_WORK_GROUP_SIZE
);
264 if (fold(multiplies(), 1u, block_size
) >
265 q
.device().max_threads_per_block())
266 throw error(CL_INVALID_WORK_GROUP_SIZE
);
271 return kern
.optimal_block_size(q
, grid_size
);
277 clEnqueueNDRangeKernel(cl_command_queue d_q
, cl_kernel d_kern
,
278 cl_uint dims
, const size_t *d_grid_offset
,
279 const size_t *d_grid_size
, const size_t *d_block_size
,
280 cl_uint num_deps
, const cl_event
*d_deps
,
281 cl_event
*rd_ev
) try {
283 auto &kern
= obj(d_kern
);
284 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
285 auto grid_size
= validate_grid_size(q
, dims
, d_grid_size
);
286 auto grid_offset
= validate_grid_offset(q
, dims
, d_grid_offset
);
287 auto block_size
= validate_block_size(q
, kern
, dims
,
288 d_grid_size
, d_block_size
);
290 validate_common(q
, kern
, deps
);
292 auto hev
= create
<hard_event
>(
293 q
, CL_COMMAND_NDRANGE_KERNEL
, deps
,
294 [=, &kern
, &q
](event
&) {
295 kern
.launch(q
, grid_offset
, grid_size
, block_size
);
298 ret_object(rd_ev
, hev
);
306 clEnqueueTask(cl_command_queue d_q
, cl_kernel d_kern
,
307 cl_uint num_deps
, const cl_event
*d_deps
,
308 cl_event
*rd_ev
) try {
310 auto &kern
= obj(d_kern
);
311 auto deps
= objs
<wait_list_tag
>(d_deps
, num_deps
);
313 validate_common(q
, kern
, deps
);
315 auto hev
= create
<hard_event
>(
316 q
, CL_COMMAND_TASK
, deps
,
317 [=, &kern
, &q
](event
&) {
318 kern
.launch(q
, { 0 }, { 1 }, { 1 });
321 ret_object(rd_ev
, hev
);
329 clEnqueueNativeKernel(cl_command_queue d_q
, void (*func
)(void *),
330 void *args
, size_t args_size
,
331 cl_uint num_mems
, const cl_mem
*d_mems
,
332 const void **mem_handles
, cl_uint num_deps
,
333 const cl_event
*d_deps
, cl_event
*rd_ev
) {
334 return CL_INVALID_OPERATION
;
338 clSetKernelArgSVMPointer(cl_kernel d_kern
,
340 const void *arg_value
) try {
341 obj(d_kern
).args().at(arg_index
).set_svm(arg_value
);
344 } catch (std::out_of_range
&e
) {
345 return CL_INVALID_ARG_INDEX
;
352 clSetKernelExecInfo(cl_kernel d_kern
,
353 cl_kernel_exec_info param_name
,
354 size_t param_value_size
,
355 const void *param_value
) try {
356 auto &kern
= obj(d_kern
);
357 const bool has_system_svm
= all_of(std::mem_fn(&device::has_system_svm
),
358 kern
.program().context().devices());
361 return CL_INVALID_VALUE
;
363 switch (param_name
) {
364 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
:
365 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM
: {
366 if (param_value_size
!= sizeof(cl_bool
))
367 return CL_INVALID_VALUE
;
369 cl_bool val
= *static_cast<const cl_bool
*>(param_value
);
370 if (val
== CL_TRUE
&& !has_system_svm
)
371 return CL_INVALID_OPERATION
;
376 case CL_KERNEL_EXEC_INFO_SVM_PTRS
:
377 case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM
:
381 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
382 return CL_INVALID_VALUE
;
385 return CL_INVALID_VALUE
;