f2f19690de7b3428d499a96787379716b8be9210
[mesa.git] / src / gallium / state_trackers / clover / api / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
26
27 using namespace clover;
28
29 CLOVER_API cl_kernel
30 clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
31 auto &prog = obj(d_prog);
32
33 if (!name)
34 throw error(CL_INVALID_VALUE);
35
36 auto &sym = find(name_equals(name), prog.symbols());
37
38 ret_error(r_errcode, CL_SUCCESS);
39 return new kernel(prog, name, range(sym.args));
40
41 } catch (std::out_of_range &e) {
42 ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
43 return NULL;
44
45 } catch (error &e) {
46 ret_error(r_errcode, e);
47 return NULL;
48 }
49
50 CLOVER_API cl_int
51 clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
52 cl_kernel *rd_kerns, cl_uint *r_count) try {
53 auto &prog = obj(d_prog);
54 auto &syms = prog.symbols();
55
56 if (rd_kerns && count < syms.size())
57 throw error(CL_INVALID_VALUE);
58
59 if (rd_kerns)
60 copy(map([&](const module::symbol &sym) {
61 return desc(new kernel(prog, compat::string(sym.name),
62 range(sym.args)));
63 }, syms),
64 rd_kerns);
65
66 if (r_count)
67 *r_count = syms.size();
68
69 return CL_SUCCESS;
70
71 } catch (error &e) {
72 return e.get();
73 }
74
75 CLOVER_API cl_int
76 clRetainKernel(cl_kernel d_kern) try {
77 obj(d_kern).retain();
78 return CL_SUCCESS;
79
80 } catch (error &e) {
81 return e.get();
82 }
83
84 CLOVER_API cl_int
85 clReleaseKernel(cl_kernel d_kern) try {
86 if (obj(d_kern).release())
87 delete pobj(d_kern);
88
89 return CL_SUCCESS;
90
91 } catch (error &e) {
92 return e.get();
93 }
94
95 CLOVER_API cl_int
96 clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
97 const void *value) try {
98 obj(d_kern).args().at(idx).set(size, value);
99 return CL_SUCCESS;
100
101 } catch (std::out_of_range &e) {
102 return CL_INVALID_ARG_INDEX;
103
104 } catch (error &e) {
105 return e.get();
106 }
107
108 CLOVER_API cl_int
109 clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
110 size_t size, void *r_buf, size_t *r_size) try {
111 property_buffer buf { r_buf, size, r_size };
112 auto &kern = obj(d_kern);
113
114 switch (param) {
115 case CL_KERNEL_FUNCTION_NAME:
116 buf.as_string() = kern.name();
117 break;
118
119 case CL_KERNEL_NUM_ARGS:
120 buf.as_scalar<cl_uint>() = kern.args().size();
121 break;
122
123 case CL_KERNEL_REFERENCE_COUNT:
124 buf.as_scalar<cl_uint>() = kern.ref_count();
125 break;
126
127 case CL_KERNEL_CONTEXT:
128 buf.as_scalar<cl_context>() = desc(kern.prog.ctx);
129 break;
130
131 case CL_KERNEL_PROGRAM:
132 buf.as_scalar<cl_program>() = desc(kern.prog);
133 break;
134
135 default:
136 throw error(CL_INVALID_VALUE);
137 }
138
139 return CL_SUCCESS;
140
141 } catch (error &e) {
142 return e.get();
143 }
144
145 CLOVER_API cl_int
146 clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
147 cl_kernel_work_group_info param,
148 size_t size, void *r_buf, size_t *r_size) try {
149 property_buffer buf { r_buf, size, r_size };
150 auto &kern = obj(d_kern);
151 auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.prog.devices()));
152
153 if (!count(dev, kern.prog.devices()))
154 throw error(CL_INVALID_DEVICE);
155
156 switch (param) {
157 case CL_KERNEL_WORK_GROUP_SIZE:
158 buf.as_scalar<size_t>() = dev.max_threads_per_block();
159 break;
160
161 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
162 buf.as_vector<size_t>() = kern.block_size();
163 break;
164
165 case CL_KERNEL_LOCAL_MEM_SIZE:
166 buf.as_scalar<cl_ulong>() = kern.mem_local();
167 break;
168
169 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
170 buf.as_scalar<size_t>() = 1;
171 break;
172
173 case CL_KERNEL_PRIVATE_MEM_SIZE:
174 buf.as_scalar<cl_ulong>() = kern.mem_private();
175 break;
176
177 default:
178 throw error(CL_INVALID_VALUE);
179 }
180
181 return CL_SUCCESS;
182
183 } catch (error &e) {
184 return e.get();
185
186 } catch (std::out_of_range &e) {
187 return CL_INVALID_DEVICE;
188 }
189
190 namespace {
191 ///
192 /// Common argument checking shared by kernel invocation commands.
193 ///
194 void
195 validate_common(const command_queue &q, kernel &kern,
196 const ref_vector<event> &deps) {
197 if (kern.prog.ctx != q.ctx ||
198 any_of([&](const event &ev) {
199 return ev.ctx != q.ctx;
200 }, deps))
201 throw error(CL_INVALID_CONTEXT);
202
203 if (any_of([](kernel::argument &arg) {
204 return !arg.set();
205 }, kern.args()))
206 throw error(CL_INVALID_KERNEL_ARGS);
207
208 if (!count(q.dev, kern.prog.devices()))
209 throw error(CL_INVALID_PROGRAM_EXECUTABLE);
210 }
211
212 std::vector<size_t>
213 validate_grid_size(const command_queue &q, cl_uint dims,
214 const size_t *d_grid_size) {
215 auto grid_size = range(d_grid_size, dims);
216
217 if (dims < 1 || dims > q.dev.max_block_size().size())
218 throw error(CL_INVALID_WORK_DIMENSION);
219
220 if (!d_grid_size || any_of(is_zero(), grid_size))
221 throw error(CL_INVALID_GLOBAL_WORK_SIZE);
222
223 if (d_block_size) {
224 auto block_size = range(d_block_size, dims);
225
226 if (any_of(is_zero(), block_size) ||
227 any_of(greater(), block_size, q.dev.max_block_size()))
228 throw error(CL_INVALID_WORK_ITEM_SIZE);
229
230 if (any_of(modulus(), grid_size, block_size))
231 throw error(CL_INVALID_WORK_GROUP_SIZE);
232
233 if (fold(multiplies(), 1u, block_size) >
234 q.dev.max_threads_per_block())
235 throw error(CL_INVALID_WORK_GROUP_SIZE);
236 }
237 }
238
239 std::vector<size_t>
240 pad_vector(const size_t *p, unsigned n, size_t x) {
241 if (p)
242 return { p, p + n };
243 else
244 return { n, x };
245 }
246 }
247
248 CLOVER_API cl_int
249 clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
250 cl_uint dims, const size_t *d_grid_offset,
251 const size_t *d_grid_size, const size_t *d_block_size,
252 cl_uint num_deps, const cl_event *d_deps,
253 cl_event *rd_ev) try {
254 auto &q = obj(d_q);
255 auto &kern = obj(d_kern);
256 auto deps = objs<wait_list_tag>(d_deps, num_deps);
257
258 validate_common(q, kern, deps);
259 validate_grid(q, dims, d_grid_size, d_block_size);
260
261 auto grid_offset = pad_vector(d_grid_offset, dims, 0);
262 auto grid_size = pad_vector(d_grid_size, dims, 1);
263 auto block_size = pad_vector(d_block_size, dims, 1);
264 hard_event *hev = new hard_event(
265 q, CL_COMMAND_NDRANGE_KERNEL, deps,
266 [=, &kern, &q](event &) {
267 kern.launch(q, grid_offset, grid_size, block_size);
268 });
269
270 ret_object(rd_ev, hev);
271 return CL_SUCCESS;
272
273 } catch (error &e) {
274 return e.get();
275 }
276
277 CLOVER_API cl_int
278 clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
279 cl_uint num_deps, const cl_event *d_deps,
280 cl_event *rd_ev) try {
281 auto &q = obj(d_q);
282 auto &kern = obj(d_kern);
283 auto deps = objs<wait_list_tag>(d_deps, num_deps);
284
285 validate_common(q, kern, deps);
286
287 hard_event *hev = new hard_event(
288 q, CL_COMMAND_TASK, deps,
289 [=, &kern, &q](event &) {
290 kern.launch(q, { 0 }, { 1 }, { 1 });
291 });
292
293 ret_object(rd_ev, hev);
294 return CL_SUCCESS;
295
296 } catch (error &e) {
297 return e.get();
298 }
299
300 CLOVER_API cl_int
301 clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
302 void *args, size_t args_size,
303 cl_uint num_mems, const cl_mem *d_mems,
304 const void **mem_handles, cl_uint num_deps,
305 const cl_event *d_deps, cl_event *rd_ev) {
306 return CL_INVALID_OPERATION;
307 }