clover: fix build after a3ed98f7aa85636579a5696bf036ec13e5c9104a
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/u_math.h"
26 #include "pipe/p_context.h"
27
28 using namespace clover;
29
30 kernel::kernel(program &prog, const std::string &name,
31 const std::vector<module::argument> &margs) :
32 prog(prog), _name(name), exec(*this) {
33 for (auto &marg : margs) {
34 if (marg.type == module::argument::scalar)
35 _args.emplace_back(new scalar_argument(marg.size));
36 else if (marg.type == module::argument::global)
37 _args.emplace_back(new global_argument);
38 else if (marg.type == module::argument::local)
39 _args.emplace_back(new local_argument);
40 else if (marg.type == module::argument::constant)
41 _args.emplace_back(new constant_argument);
42 else if (marg.type == module::argument::image2d_rd ||
43 marg.type == module::argument::image3d_rd)
44 _args.emplace_back(new image_rd_argument);
45 else if (marg.type == module::argument::image2d_wr ||
46 marg.type == module::argument::image3d_wr)
47 _args.emplace_back(new image_wr_argument);
48 else if (marg.type == module::argument::sampler)
49 _args.emplace_back(new sampler_argument);
50 else
51 throw error(CL_INVALID_KERNEL_DEFINITION);
52 }
53 }
54
55 template<typename V>
56 static inline std::vector<uint>
57 pad_vector(command_queue &q, const V &v, uint x) {
58 std::vector<uint> w { v.begin(), v.end() };
59 w.resize(q.dev.max_block_size().size(), x);
60 return w;
61 }
62
63 void
64 kernel::launch(command_queue &q,
65 const std::vector<size_t> &grid_offset,
66 const std::vector<size_t> &grid_size,
67 const std::vector<size_t> &block_size) {
68 const auto m = prog.binary(q.dev);
69 const auto reduced_grid_size =
70 map(divides(), grid_size, block_size);
71 void *st = exec.bind(&q);
72
73 // The handles are created during exec_context::bind(), so we need make
74 // sure to call exec_context::bind() before retrieving them.
75 std::vector<uint32_t *> g_handles = map([&](size_t h) {
76 return (uint32_t *)&exec.input[h];
77 }, exec.g_handles);
78
79 q.pipe->bind_compute_state(q.pipe, st);
80 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
81 0, exec.samplers.size(),
82 exec.samplers.data());
83
84 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
85 exec.sviews.size(), exec.sviews.data());
86 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
87 exec.resources.data());
88 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
89 exec.g_buffers.data(), g_handles.data());
90
91 q.pipe->launch_grid(q.pipe,
92 pad_vector(q, block_size, 1).data(),
93 pad_vector(q, reduced_grid_size, 1).data(),
94 find(name_equals(_name), m.syms).offset,
95 exec.input.data());
96
97 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
98 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
99 q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
100 exec.sviews.size(), NULL);
101 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
102 exec.samplers.size(), NULL);
103 exec.unbind();
104 }
105
106 size_t
107 kernel::mem_local() const {
108 size_t sz = 0;
109
110 for (auto &arg : args()) {
111 if (dynamic_cast<local_argument *>(&arg))
112 sz += arg.storage();
113 }
114
115 return sz;
116 }
117
118 size_t
119 kernel::mem_private() const {
120 return 0;
121 }
122
123 size_t
124 kernel::max_block_size() const {
125 return std::numeric_limits<std::size_t>::max();
126 }
127
128 const std::string &
129 kernel::name() const {
130 return _name;
131 }
132
133 std::vector<size_t>
134 kernel::block_size() const {
135 return { 0, 0, 0 };
136 }
137
138 kernel::argument_range
139 kernel::args() {
140 return map(derefs(), _args);
141 }
142
143 kernel::const_argument_range
144 kernel::args() const {
145 return map(derefs(), _args);
146 }
147
148 const module &
149 kernel::module(const command_queue &q) const {
150 return prog.binary(q.dev);
151 }
152
153 kernel::exec_context::exec_context(kernel &kern) :
154 kern(kern), q(NULL), mem_local(0), st(NULL), cs() {
155 }
156
157 kernel::exec_context::~exec_context() {
158 if (st)
159 q->pipe->delete_compute_state(q->pipe, st);
160 }
161
162 void *
163 kernel::exec_context::bind(command_queue *_q) {
164 std::swap(q, _q);
165
166 // Bind kernel arguments.
167 auto &m = kern.prog.binary(q->dev);
168 auto margs = find(name_equals(kern.name()), m.syms).args;
169 auto msec = find(type_equals(module::section::text), m.secs);
170
171 for_each([=](kernel::argument &karg, const module::argument &marg) {
172 karg.bind(*this, marg);
173 }, kern.args(), margs);
174
175 // Create a new compute state if anything changed.
176 if (!st || q != _q ||
177 cs.req_local_mem != mem_local ||
178 cs.req_input_mem != input.size()) {
179 if (st)
180 _q->pipe->delete_compute_state(_q->pipe, st);
181
182 cs.prog = msec.data.begin();
183 cs.req_local_mem = mem_local;
184 cs.req_input_mem = input.size();
185 st = q->pipe->create_compute_state(q->pipe, &cs);
186 }
187
188 return st;
189 }
190
191 void
192 kernel::exec_context::unbind() {
193 for (auto &arg : kern.args())
194 arg.unbind(*this);
195
196 input.clear();
197 samplers.clear();
198 sviews.clear();
199 resources.clear();
200 g_buffers.clear();
201 g_handles.clear();
202 mem_local = 0;
203 }
204
205 namespace {
206 template<typename T>
207 std::vector<uint8_t>
208 bytes(const T& x) {
209 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
210 }
211
212 ///
213 /// Transform buffer \a v from the native byte order into the byte
214 /// order specified by \a e.
215 ///
216 template<typename T>
217 void
218 byteswap(T &v, pipe_endian e) {
219 if (PIPE_ENDIAN_NATIVE != e)
220 std::reverse(v.begin(), v.end());
221 }
222
223 ///
224 /// Pad buffer \a v to the next multiple of \a n.
225 ///
226 template<typename T>
227 void
228 align(T &v, size_t n) {
229 v.resize(util_align_npot(v.size(), n));
230 }
231
232 bool
233 msb(const std::vector<uint8_t> &s) {
234 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
235 return s.back() & 0x80;
236 else
237 return s.front() & 0x80;
238 }
239
240 ///
241 /// Resize buffer \a v to size \a n using sign or zero extension
242 /// according to \a ext.
243 ///
244 template<typename T>
245 void
246 extend(T &v, enum module::argument::ext_type ext, size_t n) {
247 const size_t m = std::min(v.size(), n);
248 const bool sign_ext = (ext == module::argument::sign_ext);
249 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
250 T w(n, fill);
251
252 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
253 std::copy_n(v.begin(), m, w.begin());
254 else
255 std::copy_n(v.end() - m, m, w.end() - m);
256
257 std::swap(v, w);
258 }
259
260 ///
261 /// Append buffer \a w to \a v.
262 ///
263 template<typename T>
264 void
265 insert(T &v, const T &w) {
266 v.insert(v.end(), w.begin(), w.end());
267 }
268
269 ///
270 /// Append \a n elements to the end of buffer \a v.
271 ///
272 template<typename T>
273 size_t
274 allocate(T &v, size_t n) {
275 size_t pos = v.size();
276 v.resize(pos + n);
277 return pos;
278 }
279 }
280
281 kernel::argument::argument() : _set(false) {
282 }
283
284 bool
285 kernel::argument::set() const {
286 return _set;
287 }
288
289 size_t
290 kernel::argument::storage() const {
291 return 0;
292 }
293
294 kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
295 }
296
297 void
298 kernel::scalar_argument::set(size_t size, const void *value) {
299 if (size != this->size)
300 throw error(CL_INVALID_ARG_SIZE);
301
302 v = { (uint8_t *)value, (uint8_t *)value + size };
303 _set = true;
304 }
305
306 void
307 kernel::scalar_argument::bind(exec_context &ctx,
308 const module::argument &marg) {
309 auto w = v;
310
311 extend(w, marg.ext_type, marg.target_size);
312 byteswap(w, ctx.q->dev.endianness());
313 align(ctx.input, marg.target_align);
314 insert(ctx.input, w);
315 }
316
317 void
318 kernel::scalar_argument::unbind(exec_context &ctx) {
319 }
320
321 void
322 kernel::global_argument::set(size_t size, const void *value) {
323 if (size != sizeof(cl_mem))
324 throw error(CL_INVALID_ARG_SIZE);
325
326 buf = &obj<buffer>(*(cl_mem *)value);
327 _set = true;
328 }
329
330 void
331 kernel::global_argument::bind(exec_context &ctx,
332 const module::argument &marg) {
333 align(ctx.input, marg.target_align);
334 ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
335 ctx.g_buffers.push_back(buf->resource(*ctx.q).pipe);
336 }
337
338 void
339 kernel::global_argument::unbind(exec_context &ctx) {
340 }
341
342 size_t
343 kernel::local_argument::storage() const {
344 return _storage;
345 }
346
347 void
348 kernel::local_argument::set(size_t size, const void *value) {
349 if (value)
350 throw error(CL_INVALID_ARG_VALUE);
351
352 _storage = size;
353 _set = true;
354 }
355
356 void
357 kernel::local_argument::bind(exec_context &ctx,
358 const module::argument &marg) {
359 auto v = bytes(ctx.mem_local);
360
361 extend(v, module::argument::zero_ext, marg.target_size);
362 byteswap(v, ctx.q->dev.endianness());
363 align(ctx.input, marg.target_align);
364 insert(ctx.input, v);
365
366 ctx.mem_local += _storage;
367 }
368
369 void
370 kernel::local_argument::unbind(exec_context &ctx) {
371 }
372
373 void
374 kernel::constant_argument::set(size_t size, const void *value) {
375 if (size != sizeof(cl_mem))
376 throw error(CL_INVALID_ARG_SIZE);
377
378 buf = &obj<buffer>(*(cl_mem *)value);
379 _set = true;
380 }
381
382 void
383 kernel::constant_argument::bind(exec_context &ctx,
384 const module::argument &marg) {
385 auto v = bytes(ctx.resources.size() << 24);
386
387 extend(v, module::argument::zero_ext, marg.target_size);
388 byteswap(v, ctx.q->dev.endianness());
389 align(ctx.input, marg.target_align);
390 insert(ctx.input, v);
391
392 st = buf->resource(*ctx.q).bind_surface(*ctx.q, false);
393 ctx.resources.push_back(st);
394 }
395
396 void
397 kernel::constant_argument::unbind(exec_context &ctx) {
398 buf->resource(*ctx.q).unbind_surface(*ctx.q, st);
399 }
400
401 void
402 kernel::image_rd_argument::set(size_t size, const void *value) {
403 if (size != sizeof(cl_mem))
404 throw error(CL_INVALID_ARG_SIZE);
405
406 img = &obj<image>(*(cl_mem *)value);
407 _set = true;
408 }
409
410 void
411 kernel::image_rd_argument::bind(exec_context &ctx,
412 const module::argument &marg) {
413 auto v = bytes(ctx.sviews.size());
414
415 extend(v, module::argument::zero_ext, marg.target_size);
416 byteswap(v, ctx.q->dev.endianness());
417 align(ctx.input, marg.target_align);
418 insert(ctx.input, v);
419
420 st = img->resource(*ctx.q).bind_sampler_view(*ctx.q);
421 ctx.sviews.push_back(st);
422 }
423
424 void
425 kernel::image_rd_argument::unbind(exec_context &ctx) {
426 img->resource(*ctx.q).unbind_sampler_view(*ctx.q, st);
427 }
428
429 void
430 kernel::image_wr_argument::set(size_t size, const void *value) {
431 if (size != sizeof(cl_mem))
432 throw error(CL_INVALID_ARG_SIZE);
433
434 img = &obj<image>(*(cl_mem *)value);
435 _set = true;
436 }
437
438 void
439 kernel::image_wr_argument::bind(exec_context &ctx,
440 const module::argument &marg) {
441 auto v = bytes(ctx.resources.size());
442
443 extend(v, module::argument::zero_ext, marg.target_size);
444 byteswap(v, ctx.q->dev.endianness());
445 align(ctx.input, marg.target_align);
446 insert(ctx.input, v);
447
448 st = img->resource(*ctx.q).bind_surface(*ctx.q, true);
449 ctx.resources.push_back(st);
450 }
451
452 void
453 kernel::image_wr_argument::unbind(exec_context &ctx) {
454 img->resource(*ctx.q).unbind_surface(*ctx.q, st);
455 }
456
457 void
458 kernel::sampler_argument::set(size_t size, const void *value) {
459 if (size != sizeof(cl_sampler))
460 throw error(CL_INVALID_ARG_SIZE);
461
462 s = &obj(*(cl_sampler *)value);
463 _set = true;
464 }
465
466 void
467 kernel::sampler_argument::bind(exec_context &ctx,
468 const module::argument &marg) {
469 st = s->bind(*ctx.q);
470 ctx.samplers.push_back(st);
471 }
472
473 void
474 kernel::sampler_argument::unbind(exec_context &ctx) {
475 s->unbind(*ctx.q, st);
476 }