3335ee6a7134cee66d28c41f5c05cc9f70bb3e71
[mesa.git] / src / gallium / state_trackers / clover / api / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
26
27 using namespace clover;
28
29 PUBLIC cl_kernel
30 clCreateKernel(cl_program prog, const char *name,
31 cl_int *errcode_ret) try {
32 if (!prog)
33 throw error(CL_INVALID_PROGRAM);
34
35 if (!name)
36 throw error(CL_INVALID_VALUE);
37
38 if (prog->binaries().empty())
39 throw error(CL_INVALID_PROGRAM_EXECUTABLE);
40
41 auto sym = prog->binaries().begin()->second.sym(name);
42
43 ret_error(errcode_ret, CL_SUCCESS);
44 return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
45
46 } catch (module::noent_error &e) {
47 ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
48 return NULL;
49
50 } catch(error &e) {
51 ret_error(errcode_ret, e);
52 return NULL;
53 }
54
55 PUBLIC cl_int
56 clCreateKernelsInProgram(cl_program prog, cl_uint count,
57 cl_kernel *kerns, cl_uint *count_ret) {
58 if (!prog)
59 throw error(CL_INVALID_PROGRAM);
60
61 if (prog->binaries().empty())
62 throw error(CL_INVALID_PROGRAM_EXECUTABLE);
63
64 auto &syms = prog->binaries().begin()->second.syms;
65
66 if (kerns && count < syms.size())
67 throw error(CL_INVALID_VALUE);
68
69 if (kerns)
70 std::transform(syms.begin(), syms.end(), kerns,
71 [=](const module::symbol &sym) {
72 return new kernel(*prog, compat::string(sym.name),
73 { sym.args.begin(), sym.args.end() });
74 });
75
76 if (count_ret)
77 *count_ret = syms.size();
78
79 return CL_SUCCESS;
80 }
81
82 PUBLIC cl_int
83 clRetainKernel(cl_kernel kern) {
84 if (!kern)
85 return CL_INVALID_KERNEL;
86
87 kern->retain();
88 return CL_SUCCESS;
89 }
90
91 PUBLIC cl_int
92 clReleaseKernel(cl_kernel kern) {
93 if (!kern)
94 return CL_INVALID_KERNEL;
95
96 if (kern->release())
97 delete kern;
98
99 return CL_SUCCESS;
100 }
101
102 PUBLIC cl_int
103 clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
104 const void *value) try {
105 if (!kern)
106 throw error(CL_INVALID_KERNEL);
107
108 if (idx >= kern->args.size())
109 throw error(CL_INVALID_ARG_INDEX);
110
111 kern->args[idx]->set(size, value);
112
113 return CL_SUCCESS;
114
115 } catch(error &e) {
116 return e.get();
117 }
118
119 PUBLIC cl_int
120 clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
121 size_t size, void *r_buf, size_t *r_size) try {
122 property_buffer buf { r_buf, size, r_size };
123
124 if (!kern)
125 return CL_INVALID_KERNEL;
126
127 switch (param) {
128 case CL_KERNEL_FUNCTION_NAME:
129 buf.as_string() = kern->name();
130 break;
131
132 case CL_KERNEL_NUM_ARGS:
133 buf.as_scalar<cl_uint>() = kern->args.size();
134 break;
135
136 case CL_KERNEL_REFERENCE_COUNT:
137 buf.as_scalar<cl_uint>() = kern->ref_count();
138 break;
139
140 case CL_KERNEL_CONTEXT:
141 buf.as_scalar<cl_context>() = &kern->prog.ctx;
142 break;
143
144 case CL_KERNEL_PROGRAM:
145 buf.as_scalar<cl_program>() = &kern->prog;
146 break;
147
148 default:
149 throw error(CL_INVALID_VALUE);
150 }
151
152 return CL_SUCCESS;
153
154 } catch (error &e) {
155 return e.get();
156 }
157
158 PUBLIC cl_int
159 clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
160 cl_kernel_work_group_info param,
161 size_t size, void *r_buf, size_t *r_size) try {
162 property_buffer buf { r_buf, size, r_size };
163
164 if (!kern)
165 return CL_INVALID_KERNEL;
166
167 if ((!dev && kern->prog.binaries().size() != 1) ||
168 (dev && !kern->prog.binaries().count(pobj(dev))))
169 return CL_INVALID_DEVICE;
170
171 switch (param) {
172 case CL_KERNEL_WORK_GROUP_SIZE:
173 buf.as_scalar<size_t>() = kern->max_block_size();
174 break;
175
176 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
177 buf.as_vector<size_t>() = kern->block_size();
178 break;
179
180 case CL_KERNEL_LOCAL_MEM_SIZE:
181 buf.as_scalar<cl_ulong>() = kern->mem_local();
182 break;
183
184 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
185 buf.as_scalar<size_t>() = 1;
186 break;
187
188 case CL_KERNEL_PRIVATE_MEM_SIZE:
189 buf.as_scalar<cl_ulong>() = kern->mem_private();
190 break;
191
192 default:
193 throw error(CL_INVALID_VALUE);
194 }
195
196 return CL_SUCCESS;
197
198 } catch (error &e) {
199 return e.get();
200 }
201
202 namespace {
203 ///
204 /// Common argument checking shared by kernel invocation commands.
205 ///
206 void
207 kernel_validate(cl_command_queue q, cl_kernel kern,
208 cl_uint dims, const size_t *grid_offset,
209 const size_t *grid_size, const size_t *block_size,
210 cl_uint num_deps, const cl_event *deps,
211 cl_event *ev) {
212 if (!q)
213 throw error(CL_INVALID_COMMAND_QUEUE);
214
215 if (!kern)
216 throw error(CL_INVALID_KERNEL);
217
218 if (&kern->prog.ctx != &q->ctx ||
219 any_of([&](const cl_event ev) {
220 return &obj(ev).ctx != &q->ctx;
221 }, range(deps, num_deps)))
222 throw error(CL_INVALID_CONTEXT);
223
224 if (bool(num_deps) != bool(deps) ||
225 any_of(is_zero(), range(deps, num_deps)))
226 throw error(CL_INVALID_EVENT_WAIT_LIST);
227
228 if (any_of([](std::unique_ptr<kernel::argument> &arg) {
229 return !arg->set();
230 }, kern->args))
231 throw error(CL_INVALID_KERNEL_ARGS);
232
233 if (!kern->prog.binaries().count(&q->dev))
234 throw error(CL_INVALID_PROGRAM_EXECUTABLE);
235
236 if (dims < 1 || dims > q->dev.max_block_size().size())
237 throw error(CL_INVALID_WORK_DIMENSION);
238
239 if (!grid_size || any_of(is_zero(), range(grid_size, dims)))
240 throw error(CL_INVALID_GLOBAL_WORK_SIZE);
241
242 if (block_size) {
243 if (any_of([](size_t b, size_t max) {
244 return b == 0 || b > max;
245 }, range(block_size, dims),
246 q->dev.max_block_size()))
247 throw error(CL_INVALID_WORK_ITEM_SIZE);
248
249 if (any_of(modulus(), range(grid_size, dims),
250 range(block_size, dims)))
251 throw error(CL_INVALID_WORK_GROUP_SIZE);
252
253 if (fold(multiplies(), 1u, range(block_size, dims)) >
254 q->dev.max_threads_per_block())
255 throw error(CL_INVALID_WORK_GROUP_SIZE);
256 }
257 }
258
259 ///
260 /// Common event action shared by kernel invocation commands.
261 ///
262 std::function<void (event &)>
263 kernel_op(cl_command_queue q, cl_kernel kern,
264 const std::vector<size_t> &grid_offset,
265 const std::vector<size_t> &grid_size,
266 const std::vector<size_t> &block_size) {
267 const std::vector<size_t> reduced_grid_size =
268 map(divides(), grid_size, block_size);
269
270 return [=](event &) {
271 kern->launch(*q, grid_offset, reduced_grid_size, block_size);
272 };
273 }
274
275 std::vector<size_t>
276 opt_vector(const size_t *p, unsigned n, size_t x) {
277 if (p)
278 return { p, p + n };
279 else
280 return { n, x };
281 }
282 }
283
284 PUBLIC cl_int
285 clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
286 cl_uint dims, const size_t *pgrid_offset,
287 const size_t *pgrid_size, const size_t *pblock_size,
288 cl_uint num_deps, const cl_event *d_deps,
289 cl_event *ev) try {
290 auto deps = objs<wait_list_tag>(d_deps, num_deps);
291 auto grid_offset = opt_vector(pgrid_offset, dims, 0);
292 auto grid_size = opt_vector(pgrid_size, dims, 1);
293 auto block_size = opt_vector(pblock_size, dims, 1);
294
295 kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
296 num_deps, d_deps, ev);
297
298 hard_event *hev = new hard_event(
299 *q, CL_COMMAND_NDRANGE_KERNEL, deps,
300 kernel_op(q, kern, grid_offset, grid_size, block_size));
301
302 ret_object(ev, hev);
303 return CL_SUCCESS;
304
305 } catch(error &e) {
306 return e.get();
307 }
308
309 PUBLIC cl_int
310 clEnqueueTask(cl_command_queue q, cl_kernel kern,
311 cl_uint num_deps, const cl_event *d_deps,
312 cl_event *ev) try {
313 auto deps = objs<wait_list_tag>(d_deps, num_deps);
314 const std::vector<size_t> grid_offset = { 0 };
315 const std::vector<size_t> grid_size = { 1 };
316 const std::vector<size_t> block_size = { 1 };
317
318 kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
319 block_size.data(), num_deps, d_deps, ev);
320
321 hard_event *hev = new hard_event(
322 *q, CL_COMMAND_TASK, deps,
323 kernel_op(q, kern, grid_offset, grid_size, block_size));
324
325 ret_object(ev, hev);
326 return CL_SUCCESS;
327
328 } catch(error &e) {
329 return e.get();
330 }
331
332 PUBLIC cl_int
333 clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
334 void *args, size_t args_size,
335 cl_uint obj_count, const cl_mem *obj_list,
336 const void **obj_args, cl_uint num_deps,
337 const cl_event *deps, cl_event *ev) {
338 return CL_INVALID_OPERATION;
339 }