clover: handle setKernelArg errors
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/factor.hpp"
26 #include "util/u_math.h"
27 #include "pipe/p_context.h"
28
29 using namespace clover;
30
31 kernel::kernel(clover::program &prog, const std::string &name,
32 const std::vector<module::argument> &margs) :
33 program(prog), _name(name), exec(*this),
34 program_ref(prog._kernel_ref_counter) {
35 for (auto &marg : margs) {
36 if (marg.semantic == module::argument::general)
37 _args.emplace_back(argument::create(marg));
38 }
39 }
40
41 template<typename V>
42 static inline std::vector<uint>
43 pad_vector(command_queue &q, const V &v, uint x) {
44 std::vector<uint> w { v.begin(), v.end() };
45 w.resize(q.device().max_block_size().size(), x);
46 return w;
47 }
48
49 void
50 kernel::launch(command_queue &q,
51 const std::vector<size_t> &grid_offset,
52 const std::vector<size_t> &grid_size,
53 const std::vector<size_t> &block_size) {
54 const auto m = program().binary(q.device());
55 const auto reduced_grid_size =
56 map(divides(), grid_size, block_size);
57 void *st = exec.bind(&q, grid_offset);
58
59 // The handles are created during exec_context::bind(), so we need make
60 // sure to call exec_context::bind() before retrieving them.
61 std::vector<uint32_t *> g_handles = map([&](size_t h) {
62 return (uint32_t *)&exec.input[h];
63 }, exec.g_handles);
64
65 q.pipe->bind_compute_state(q.pipe, st);
66 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
67 0, exec.samplers.size(),
68 exec.samplers.data());
69
70 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
71 exec.sviews.size(), exec.sviews.data());
72 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
73 exec.resources.data());
74 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
75 exec.g_buffers.data(), g_handles.data());
76
77 q.pipe->launch_grid(q.pipe,
78 pad_vector(q, block_size, 1).data(),
79 pad_vector(q, reduced_grid_size, 1).data(),
80 find(name_equals(_name), m.syms).offset,
81 exec.input.data());
82
83 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
84 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
85 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
86 exec.sviews.size(), NULL);
87 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
88 exec.samplers.size(), NULL);
89 exec.unbind();
90 }
91
92 size_t
93 kernel::mem_local() const {
94 size_t sz = 0;
95
96 for (auto &arg : args()) {
97 if (dynamic_cast<local_argument *>(&arg))
98 sz += arg.storage();
99 }
100
101 return sz;
102 }
103
104 size_t
105 kernel::mem_private() const {
106 return 0;
107 }
108
109 const std::string &
110 kernel::name() const {
111 return _name;
112 }
113
114 std::vector<size_t>
115 kernel::optimal_block_size(const command_queue &q,
116 const std::vector<size_t> &grid_size) const {
117 return factor::find_grid_optimal_factor<size_t>(
118 q.device().max_threads_per_block(), q.device().max_block_size(),
119 grid_size);
120 }
121
122 std::vector<size_t>
123 kernel::required_block_size() const {
124 return { 0, 0, 0 };
125 }
126
127 kernel::argument_range
128 kernel::args() {
129 return map(derefs(), _args);
130 }
131
132 kernel::const_argument_range
133 kernel::args() const {
134 return map(derefs(), _args);
135 }
136
137 const module &
138 kernel::module(const command_queue &q) const {
139 return program().binary(q.device());
140 }
141
142 kernel::exec_context::exec_context(kernel &kern) :
143 kern(kern), q(NULL), mem_local(0), st(NULL), cs() {
144 }
145
146 kernel::exec_context::~exec_context() {
147 if (st)
148 q->pipe->delete_compute_state(q->pipe, st);
149 }
150
151 void *
152 kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
153 const std::vector<size_t> &grid_offset) {
154 std::swap(q, _q);
155
156 // Bind kernel arguments.
157 auto &m = kern.program().binary(q->device());
158 auto margs = find(name_equals(kern.name()), m.syms).args;
159 auto msec = find(type_equals(module::section::text), m.secs);
160 auto explicit_arg = kern._args.begin();
161
162 for (auto &marg : margs) {
163 switch (marg.semantic) {
164 case module::argument::general:
165 (*(explicit_arg++))->bind(*this, marg);
166 break;
167
168 case module::argument::grid_dimension: {
169 const cl_uint dimension = grid_offset.size();
170 auto arg = argument::create(marg);
171
172 arg->set(sizeof(dimension), &dimension);
173 arg->bind(*this, marg);
174 break;
175 }
176 case module::argument::grid_offset: {
177 for (cl_uint x : pad_vector(*q, grid_offset, 1)) {
178 auto arg = argument::create(marg);
179
180 arg->set(sizeof(x), &x);
181 arg->bind(*this, marg);
182 }
183 break;
184 }
185 case module::argument::image_size: {
186 auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
187 std::vector<cl_uint> image_size{
188 static_cast<cl_uint>(img->width()),
189 static_cast<cl_uint>(img->height()),
190 static_cast<cl_uint>(img->depth())};
191 for (auto x : image_size) {
192 auto arg = argument::create(marg);
193
194 arg->set(sizeof(x), &x);
195 arg->bind(*this, marg);
196 }
197 break;
198 }
199 case module::argument::image_format: {
200 auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
201 cl_image_format fmt = img->format();
202 std::vector<cl_uint> image_format{
203 static_cast<cl_uint>(fmt.image_channel_data_type),
204 static_cast<cl_uint>(fmt.image_channel_order)};
205 for (auto x : image_format) {
206 auto arg = argument::create(marg);
207
208 arg->set(sizeof(x), &x);
209 arg->bind(*this, marg);
210 }
211 break;
212 }
213 }
214 }
215
216 // Create a new compute state if anything changed.
217 if (!st || q != _q ||
218 cs.req_local_mem != mem_local ||
219 cs.req_input_mem != input.size()) {
220 if (st)
221 _q->pipe->delete_compute_state(_q->pipe, st);
222
223 cs.prog = &(msec.data[0]);
224 cs.req_local_mem = mem_local;
225 cs.req_input_mem = input.size();
226 st = q->pipe->create_compute_state(q->pipe, &cs);
227 }
228
229 return st;
230 }
231
232 void
233 kernel::exec_context::unbind() {
234 for (auto &arg : kern.args())
235 arg.unbind(*this);
236
237 input.clear();
238 samplers.clear();
239 sviews.clear();
240 resources.clear();
241 g_buffers.clear();
242 g_handles.clear();
243 mem_local = 0;
244 }
245
246 namespace {
247 template<typename T>
248 std::vector<uint8_t>
249 bytes(const T& x) {
250 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
251 }
252
253 ///
254 /// Transform buffer \a v from the native byte order into the byte
255 /// order specified by \a e.
256 ///
257 template<typename T>
258 void
259 byteswap(T &v, pipe_endian e) {
260 if (PIPE_ENDIAN_NATIVE != e)
261 std::reverse(v.begin(), v.end());
262 }
263
264 ///
265 /// Pad buffer \a v to the next multiple of \a n.
266 ///
267 template<typename T>
268 void
269 align(T &v, size_t n) {
270 v.resize(util_align_npot(v.size(), n));
271 }
272
273 bool
274 msb(const std::vector<uint8_t> &s) {
275 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
276 return s.back() & 0x80;
277 else
278 return s.front() & 0x80;
279 }
280
281 ///
282 /// Resize buffer \a v to size \a n using sign or zero extension
283 /// according to \a ext.
284 ///
285 template<typename T>
286 void
287 extend(T &v, enum module::argument::ext_type ext, size_t n) {
288 const size_t m = std::min(v.size(), n);
289 const bool sign_ext = (ext == module::argument::sign_ext);
290 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
291 T w(n, fill);
292
293 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
294 std::copy_n(v.begin(), m, w.begin());
295 else
296 std::copy_n(v.end() - m, m, w.end() - m);
297
298 std::swap(v, w);
299 }
300
301 ///
302 /// Append buffer \a w to \a v.
303 ///
304 template<typename T>
305 void
306 insert(T &v, const T &w) {
307 v.insert(v.end(), w.begin(), w.end());
308 }
309
310 ///
311 /// Append \a n elements to the end of buffer \a v.
312 ///
313 template<typename T>
314 size_t
315 allocate(T &v, size_t n) {
316 size_t pos = v.size();
317 v.resize(pos + n);
318 return pos;
319 }
320 }
321
322 std::unique_ptr<kernel::argument>
323 kernel::argument::create(const module::argument &marg) {
324 switch (marg.type) {
325 case module::argument::scalar:
326 return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size));
327
328 case module::argument::global:
329 return std::unique_ptr<kernel::argument>(new global_argument);
330
331 case module::argument::local:
332 return std::unique_ptr<kernel::argument>(new local_argument);
333
334 case module::argument::constant:
335 return std::unique_ptr<kernel::argument>(new constant_argument);
336
337 case module::argument::image2d_rd:
338 case module::argument::image3d_rd:
339 return std::unique_ptr<kernel::argument>(new image_rd_argument);
340
341 case module::argument::image2d_wr:
342 case module::argument::image3d_wr:
343 return std::unique_ptr<kernel::argument>(new image_wr_argument);
344
345 case module::argument::sampler:
346 return std::unique_ptr<kernel::argument>(new sampler_argument);
347
348 }
349 throw error(CL_INVALID_KERNEL_DEFINITION);
350 }
351
352 kernel::argument::argument() : _set(false) {
353 }
354
355 bool
356 kernel::argument::set() const {
357 return _set;
358 }
359
360 size_t
361 kernel::argument::storage() const {
362 return 0;
363 }
364
365 kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
366 }
367
368 void
369 kernel::scalar_argument::set(size_t size, const void *value) {
370 if (!value)
371 throw error(CL_INVALID_ARG_VALUE);
372
373 if (size != this->size)
374 throw error(CL_INVALID_ARG_SIZE);
375
376 v = { (uint8_t *)value, (uint8_t *)value + size };
377 _set = true;
378 }
379
380 void
381 kernel::scalar_argument::bind(exec_context &ctx,
382 const module::argument &marg) {
383 auto w = v;
384
385 extend(w, marg.ext_type, marg.target_size);
386 byteswap(w, ctx.q->device().endianness());
387 align(ctx.input, marg.target_align);
388 insert(ctx.input, w);
389 }
390
391 void
392 kernel::scalar_argument::unbind(exec_context &ctx) {
393 }
394
395 void
396 kernel::global_argument::set(size_t size, const void *value) {
397 if (size != sizeof(cl_mem))
398 throw error(CL_INVALID_ARG_SIZE);
399
400 buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
401 _set = true;
402 }
403
404 void
405 kernel::global_argument::bind(exec_context &ctx,
406 const module::argument &marg) {
407 align(ctx.input, marg.target_align);
408
409 if (buf) {
410 const resource &r = buf->resource(*ctx.q);
411 ctx.g_handles.push_back(ctx.input.size());
412 ctx.g_buffers.push_back(r.pipe);
413
414 // How to handle multi-demensional offsets?
415 // We don't need to. Buffer offsets are always
416 // one-dimensional.
417 auto v = bytes(r.offset[0]);
418 extend(v, marg.ext_type, marg.target_size);
419 byteswap(v, ctx.q->device().endianness());
420 insert(ctx.input, v);
421 } else {
422 // Null pointer.
423 allocate(ctx.input, marg.target_size);
424 }
425 }
426
427 void
428 kernel::global_argument::unbind(exec_context &ctx) {
429 }
430
431 size_t
432 kernel::local_argument::storage() const {
433 return _storage;
434 }
435
436 void
437 kernel::local_argument::set(size_t size, const void *value) {
438 if (value)
439 throw error(CL_INVALID_ARG_VALUE);
440
441 if (!size)
442 throw error(CL_INVALID_ARG_SIZE);
443
444 _storage = size;
445 _set = true;
446 }
447
448 void
449 kernel::local_argument::bind(exec_context &ctx,
450 const module::argument &marg) {
451 auto v = bytes(ctx.mem_local);
452
453 extend(v, module::argument::zero_ext, marg.target_size);
454 byteswap(v, ctx.q->device().endianness());
455 align(ctx.input, marg.target_align);
456 insert(ctx.input, v);
457
458 ctx.mem_local += _storage;
459 }
460
461 void
462 kernel::local_argument::unbind(exec_context &ctx) {
463 }
464
465 void
466 kernel::constant_argument::set(size_t size, const void *value) {
467 if (size != sizeof(cl_mem))
468 throw error(CL_INVALID_ARG_SIZE);
469
470 buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
471 _set = true;
472 }
473
474 void
475 kernel::constant_argument::bind(exec_context &ctx,
476 const module::argument &marg) {
477 align(ctx.input, marg.target_align);
478
479 if (buf) {
480 resource &r = buf->resource(*ctx.q);
481 auto v = bytes(ctx.resources.size() << 24 | r.offset[0]);
482
483 extend(v, module::argument::zero_ext, marg.target_size);
484 byteswap(v, ctx.q->device().endianness());
485 insert(ctx.input, v);
486
487 st = r.bind_surface(*ctx.q, false);
488 ctx.resources.push_back(st);
489 } else {
490 // Null pointer.
491 allocate(ctx.input, marg.target_size);
492 }
493 }
494
495 void
496 kernel::constant_argument::unbind(exec_context &ctx) {
497 if (buf)
498 buf->resource(*ctx.q).unbind_surface(*ctx.q, st);
499 }
500
501 void
502 kernel::image_rd_argument::set(size_t size, const void *value) {
503 if (!value)
504 throw error(CL_INVALID_ARG_VALUE);
505
506 if (size != sizeof(cl_mem))
507 throw error(CL_INVALID_ARG_SIZE);
508
509 img = &obj<image>(*(cl_mem *)value);
510 _set = true;
511 }
512
513 void
514 kernel::image_rd_argument::bind(exec_context &ctx,
515 const module::argument &marg) {
516 auto v = bytes(ctx.sviews.size());
517
518 extend(v, module::argument::zero_ext, marg.target_size);
519 byteswap(v, ctx.q->device().endianness());
520 align(ctx.input, marg.target_align);
521 insert(ctx.input, v);
522
523 st = img->resource(*ctx.q).bind_sampler_view(*ctx.q);
524 ctx.sviews.push_back(st);
525 }
526
527 void
528 kernel::image_rd_argument::unbind(exec_context &ctx) {
529 img->resource(*ctx.q).unbind_sampler_view(*ctx.q, st);
530 }
531
532 void
533 kernel::image_wr_argument::set(size_t size, const void *value) {
534 if (!value)
535 throw error(CL_INVALID_ARG_VALUE);
536
537 if (size != sizeof(cl_mem))
538 throw error(CL_INVALID_ARG_SIZE);
539
540 img = &obj<image>(*(cl_mem *)value);
541 _set = true;
542 }
543
544 void
545 kernel::image_wr_argument::bind(exec_context &ctx,
546 const module::argument &marg) {
547 auto v = bytes(ctx.resources.size());
548
549 extend(v, module::argument::zero_ext, marg.target_size);
550 byteswap(v, ctx.q->device().endianness());
551 align(ctx.input, marg.target_align);
552 insert(ctx.input, v);
553
554 st = img->resource(*ctx.q).bind_surface(*ctx.q, true);
555 ctx.resources.push_back(st);
556 }
557
558 void
559 kernel::image_wr_argument::unbind(exec_context &ctx) {
560 img->resource(*ctx.q).unbind_surface(*ctx.q, st);
561 }
562
563 void
564 kernel::sampler_argument::set(size_t size, const void *value) {
565 if (!value)
566 throw error(CL_INVALID_SAMPLER);
567
568 if (size != sizeof(cl_sampler))
569 throw error(CL_INVALID_ARG_SIZE);
570
571 s = &obj(*(cl_sampler *)value);
572 _set = true;
573 }
574
575 void
576 kernel::sampler_argument::bind(exec_context &ctx,
577 const module::argument &marg) {
578 st = s->bind(*ctx.q);
579 ctx.samplers.push_back(st);
580 }
581
582 void
583 kernel::sampler_argument::unbind(exec_context &ctx) {
584 s->unbind(*ctx.q, st);
585 }