clover: Respect kernel argument alignment restrictions.
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/u_math.h"
26 #include "pipe/p_context.h"
27
28 using namespace clover;
29
30 _cl_kernel::_cl_kernel(clover::program &prog,
31 const std::string &name,
32 const std::vector<clover::module::argument> &margs) :
33 prog(prog), __name(name), exec(*this) {
34 for (auto marg : margs) {
35 if (marg.type == module::argument::scalar)
36 args.emplace_back(new scalar_argument(marg.size));
37 else if (marg.type == module::argument::global)
38 args.emplace_back(new global_argument);
39 else if (marg.type == module::argument::local)
40 args.emplace_back(new local_argument);
41 else if (marg.type == module::argument::constant)
42 args.emplace_back(new constant_argument);
43 else if (marg.type == module::argument::image2d_rd ||
44 marg.type == module::argument::image3d_rd)
45 args.emplace_back(new image_rd_argument);
46 else if (marg.type == module::argument::image2d_wr ||
47 marg.type == module::argument::image3d_wr)
48 args.emplace_back(new image_wr_argument);
49 else if (marg.type == module::argument::sampler)
50 args.emplace_back(new sampler_argument);
51 else
52 throw error(CL_INVALID_KERNEL_DEFINITION);
53 }
54 }
55
56 template<typename T, typename V>
57 static inline std::vector<T>
58 pad_vector(clover::command_queue &q, const V &v, T x) {
59 std::vector<T> w { v.begin(), v.end() };
60 w.resize(q.dev.max_block_size().size(), x);
61 return w;
62 }
63
64 void
65 _cl_kernel::launch(clover::command_queue &q,
66 const std::vector<size_t> &grid_offset,
67 const std::vector<size_t> &grid_size,
68 const std::vector<size_t> &block_size) {
69 void *st = exec.bind(&q);
70 auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
71 exec.g_handles.begin(), exec.g_handles.end());
72
73 q.pipe->bind_compute_state(q.pipe, st);
74 q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(),
75 exec.samplers.data());
76 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
77 exec.sviews.data());
78 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
79 exec.resources.data());
80 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
81 exec.g_buffers.data(), g_handles.data());
82
83 q.pipe->launch_grid(q.pipe,
84 pad_vector<uint>(q, block_size, 1).data(),
85 pad_vector<uint>(q, grid_size, 1).data(),
86 module(q).sym(__name).offset,
87 exec.input.data());
88
89 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
90 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
91 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
92 q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL);
93 exec.unbind();
94 }
95
96 size_t
97 _cl_kernel::mem_local() const {
98 size_t sz = 0;
99
100 for (auto &arg : args) {
101 if (dynamic_cast<local_argument *>(arg.get()))
102 sz += arg->storage();
103 }
104
105 return sz;
106 }
107
108 size_t
109 _cl_kernel::mem_private() const {
110 return 0;
111 }
112
113 size_t
114 _cl_kernel::max_block_size() const {
115 return SIZE_MAX;
116 }
117
118 const std::string &
119 _cl_kernel::name() const {
120 return __name;
121 }
122
123 std::vector<size_t>
124 _cl_kernel::block_size() const {
125 return { 0, 0, 0 };
126 }
127
128 const clover::module &
129 _cl_kernel::module(const clover::command_queue &q) const {
130 return prog.binaries().find(&q.dev)->second;
131 }
132
133 _cl_kernel::exec_context::exec_context(clover::kernel &kern) :
134 kern(kern), q(NULL), mem_local(0), st(NULL) {
135 }
136
137 _cl_kernel::exec_context::~exec_context() {
138 if (st)
139 q->pipe->delete_compute_state(q->pipe, st);
140 }
141
142 void *
143 _cl_kernel::exec_context::bind(clover::command_queue *__q) {
144 std::swap(q, __q);
145
146 // Bind kernel arguments.
147 auto margs = kern.module(*q).sym(kern.name()).args;
148 for_each([=](std::unique_ptr<kernel::argument> &karg,
149 const module::argument &marg) {
150 karg->bind(*this, marg);
151 }, kern.args.begin(), kern.args.end(), margs.begin());
152
153 // Create a new compute state if anything changed.
154 if (!st || q != __q ||
155 cs.req_local_mem != mem_local ||
156 cs.req_input_mem != input.size()) {
157 if (st)
158 __q->pipe->delete_compute_state(__q->pipe, st);
159
160 cs.prog = kern.module(*q).sec(module::section::text).data.begin();
161 cs.req_local_mem = mem_local;
162 cs.req_input_mem = input.size();
163 st = q->pipe->create_compute_state(q->pipe, &cs);
164 }
165
166 return st;
167 }
168
169 void
170 _cl_kernel::exec_context::unbind() {
171 for (auto &arg : kern.args)
172 arg->unbind(*this);
173
174 input.clear();
175 samplers.clear();
176 sviews.clear();
177 resources.clear();
178 g_buffers.clear();
179 g_handles.clear();
180 mem_local = 0;
181 }
182
183 namespace {
184 template<typename T>
185 std::vector<uint8_t>
186 bytes(const T& x) {
187 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
188 }
189
190 ///
191 /// Transform buffer \a v from the native byte order into the byte
192 /// order specified by \a e.
193 ///
194 template<typename T>
195 void
196 byteswap(T &v, pipe_endian e) {
197 if (PIPE_ENDIAN_NATIVE != e)
198 std::reverse(v.begin(), v.end());
199 }
200
201 ///
202 /// Pad buffer \a v to the next multiple of \a n.
203 ///
204 template<typename T>
205 void
206 align(T &v, size_t n) {
207 v.resize(util_align_npot(v.size(), n));
208 }
209
210 bool
211 msb(const std::vector<uint8_t> &s) {
212 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
213 return s.back() & 0x80;
214 else
215 return s.front() & 0x80;
216 }
217
218 ///
219 /// Resize buffer \a v to size \a n using sign or zero extension
220 /// according to \a ext.
221 ///
222 template<typename T>
223 void
224 extend(T &v, enum clover::module::argument::ext_type ext, size_t n) {
225 const size_t m = std::min(v.size(), n);
226 const bool sign_ext = (ext == module::argument::sign_ext);
227 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
228 T w(n, fill);
229
230 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
231 std::copy_n(v.begin(), m, w.begin());
232 else
233 std::copy_n(v.end() - m, m, w.end() - m);
234
235 std::swap(v, w);
236 }
237
238 ///
239 /// Append buffer \a w to \a v.
240 ///
241 template<typename T>
242 void
243 insert(T &v, const T &w) {
244 v.insert(v.end(), w.begin(), w.end());
245 }
246
247 ///
248 /// Append \a n elements to the end of buffer \a v.
249 ///
250 template<typename T>
251 size_t
252 allocate(T &v, size_t n) {
253 size_t pos = v.size();
254 v.resize(pos + n);
255 return pos;
256 }
257 }
258
259 _cl_kernel::argument::argument() : __set(false) {
260 }
261
262 bool
263 _cl_kernel::argument::set() const {
264 return __set;
265 }
266
267 size_t
268 _cl_kernel::argument::storage() const {
269 return 0;
270 }
271
272 _cl_kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
273 }
274
275 void
276 _cl_kernel::scalar_argument::set(size_t size, const void *value) {
277 if (size != this->size)
278 throw error(CL_INVALID_ARG_SIZE);
279
280 v = { (uint8_t *)value, (uint8_t *)value + size };
281 __set = true;
282 }
283
284 void
285 _cl_kernel::scalar_argument::bind(exec_context &ctx,
286 const clover::module::argument &marg) {
287 auto w = v;
288
289 extend(w, marg.ext_type, marg.target_size);
290 byteswap(w, ctx.q->dev.endianness());
291 align(ctx.input, marg.target_align);
292 insert(ctx.input, w);
293 }
294
295 void
296 _cl_kernel::scalar_argument::unbind(exec_context &ctx) {
297 }
298
299 void
300 _cl_kernel::global_argument::set(size_t size, const void *value) {
301 if (size != sizeof(cl_mem))
302 throw error(CL_INVALID_ARG_SIZE);
303
304 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
305 if (!obj)
306 throw error(CL_INVALID_MEM_OBJECT);
307
308 __set = true;
309 }
310
311 void
312 _cl_kernel::global_argument::bind(exec_context &ctx,
313 const clover::module::argument &marg) {
314 align(ctx.input, marg.target_align);
315 ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
316 ctx.g_buffers.push_back(obj->resource(ctx.q).pipe);
317 }
318
319 void
320 _cl_kernel::global_argument::unbind(exec_context &ctx) {
321 }
322
323 size_t
324 _cl_kernel::local_argument::storage() const {
325 return __storage;
326 }
327
328 void
329 _cl_kernel::local_argument::set(size_t size, const void *value) {
330 if (value)
331 throw error(CL_INVALID_ARG_VALUE);
332
333 __storage = size;
334 __set = true;
335 }
336
337 void
338 _cl_kernel::local_argument::bind(exec_context &ctx,
339 const clover::module::argument &marg) {
340 auto v = bytes(ctx.mem_local);
341
342 extend(v, module::argument::zero_ext, marg.target_size);
343 byteswap(v, ctx.q->dev.endianness());
344 align(ctx.input, marg.target_align);
345 insert(ctx.input, v);
346
347 ctx.mem_local += __storage;
348 }
349
350 void
351 _cl_kernel::local_argument::unbind(exec_context &ctx) {
352 }
353
354 void
355 _cl_kernel::constant_argument::set(size_t size, const void *value) {
356 if (size != sizeof(cl_mem))
357 throw error(CL_INVALID_ARG_SIZE);
358
359 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
360 if (!obj)
361 throw error(CL_INVALID_MEM_OBJECT);
362
363 __set = true;
364 }
365
366 void
367 _cl_kernel::constant_argument::bind(exec_context &ctx,
368 const clover::module::argument &marg) {
369 auto v = bytes(ctx.resources.size() << 24);
370
371 extend(v, module::argument::zero_ext, marg.target_size);
372 byteswap(v, ctx.q->dev.endianness());
373 align(ctx.input, marg.target_align);
374 insert(ctx.input, v);
375
376 st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
377 ctx.resources.push_back(st);
378 }
379
380 void
381 _cl_kernel::constant_argument::unbind(exec_context &ctx) {
382 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
383 }
384
385 void
386 _cl_kernel::image_rd_argument::set(size_t size, const void *value) {
387 if (size != sizeof(cl_mem))
388 throw error(CL_INVALID_ARG_SIZE);
389
390 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
391 if (!obj)
392 throw error(CL_INVALID_MEM_OBJECT);
393
394 __set = true;
395 }
396
397 void
398 _cl_kernel::image_rd_argument::bind(exec_context &ctx,
399 const clover::module::argument &marg) {
400 auto v = bytes(ctx.sviews.size());
401
402 extend(v, module::argument::zero_ext, marg.target_size);
403 byteswap(v, ctx.q->dev.endianness());
404 align(ctx.input, marg.target_align);
405 insert(ctx.input, v);
406
407 st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
408 ctx.sviews.push_back(st);
409 }
410
411 void
412 _cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
413 obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
414 }
415
416 void
417 _cl_kernel::image_wr_argument::set(size_t size, const void *value) {
418 if (size != sizeof(cl_mem))
419 throw error(CL_INVALID_ARG_SIZE);
420
421 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
422 if (!obj)
423 throw error(CL_INVALID_MEM_OBJECT);
424
425 __set = true;
426 }
427
428 void
429 _cl_kernel::image_wr_argument::bind(exec_context &ctx,
430 const clover::module::argument &marg) {
431 auto v = bytes(ctx.resources.size());
432
433 extend(v, module::argument::zero_ext, marg.target_size);
434 byteswap(v, ctx.q->dev.endianness());
435 align(ctx.input, marg.target_align);
436 insert(ctx.input, v);
437
438 st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
439 ctx.resources.push_back(st);
440 }
441
442 void
443 _cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
444 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
445 }
446
447 void
448 _cl_kernel::sampler_argument::set(size_t size, const void *value) {
449 if (size != sizeof(cl_sampler))
450 throw error(CL_INVALID_ARG_SIZE);
451
452 obj = *(cl_sampler *)value;
453 __set = true;
454 }
455
456 void
457 _cl_kernel::sampler_argument::bind(exec_context &ctx,
458 const clover::module::argument &marg) {
459 st = obj->bind(*ctx.q);
460 ctx.samplers.push_back(st);
461 }
462
463 void
464 _cl_kernel::sampler_argument::unbind(exec_context &ctx) {
465 obj->unbind(*ctx.q, st);
466 }