clover: Switch kernel and program objects to the new model.
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/u_math.h"
26 #include "pipe/p_context.h"
27
28 using namespace clover;
29
30 kernel::kernel(program &prog,
31 const std::string &name,
32 const std::vector<module::argument> &margs) :
33 prog(prog), _name(name), exec(*this) {
34 for (auto marg : margs) {
35 if (marg.type == module::argument::scalar)
36 args.emplace_back(new scalar_argument(marg.size));
37 else if (marg.type == module::argument::global)
38 args.emplace_back(new global_argument);
39 else if (marg.type == module::argument::local)
40 args.emplace_back(new local_argument);
41 else if (marg.type == module::argument::constant)
42 args.emplace_back(new constant_argument);
43 else if (marg.type == module::argument::image2d_rd ||
44 marg.type == module::argument::image3d_rd)
45 args.emplace_back(new image_rd_argument);
46 else if (marg.type == module::argument::image2d_wr ||
47 marg.type == module::argument::image3d_wr)
48 args.emplace_back(new image_wr_argument);
49 else if (marg.type == module::argument::sampler)
50 args.emplace_back(new sampler_argument);
51 else
52 throw error(CL_INVALID_KERNEL_DEFINITION);
53 }
54 }
55
56 template<typename T, typename V>
57 static inline std::vector<T>
58 pad_vector(command_queue &q, const V &v, T x) {
59 std::vector<T> w { v.begin(), v.end() };
60 w.resize(q.dev.max_block_size().size(), x);
61 return w;
62 }
63
64 void
65 kernel::launch(command_queue &q,
66 const std::vector<size_t> &grid_offset,
67 const std::vector<size_t> &grid_size,
68 const std::vector<size_t> &block_size) {
69 void *st = exec.bind(&q);
70 std::vector<uint32_t *> g_handles = map([&](size_t h) {
71 return (uint32_t *)&exec.input[h];
72 }, exec.g_handles);
73
74 q.pipe->bind_compute_state(q.pipe, st);
75 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
76 0, exec.samplers.size(),
77 exec.samplers.data());
78
79 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
80 exec.sviews.data());
81 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
82 exec.resources.data());
83 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
84 exec.g_buffers.data(), g_handles.data());
85
86 q.pipe->launch_grid(q.pipe,
87 pad_vector<uint>(q, block_size, 1).data(),
88 pad_vector<uint>(q, grid_size, 1).data(),
89 module(q).sym(_name).offset,
90 exec.input.data());
91
92 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
93 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
94 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
95 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
96 exec.samplers.size(), NULL);
97 exec.unbind();
98 }
99
100 size_t
101 kernel::mem_local() const {
102 size_t sz = 0;
103
104 for (auto &arg : args) {
105 if (dynamic_cast<local_argument *>(arg.get()))
106 sz += arg->storage();
107 }
108
109 return sz;
110 }
111
112 size_t
113 kernel::mem_private() const {
114 return 0;
115 }
116
117 size_t
118 kernel::max_block_size() const {
119 return std::numeric_limits<std::size_t>::max();
120 }
121
122 const std::string &
123 kernel::name() const {
124 return _name;
125 }
126
127 std::vector<size_t>
128 kernel::block_size() const {
129 return { 0, 0, 0 };
130 }
131
132 const module &
133 kernel::module(const command_queue &q) const {
134 return prog.binaries().find(&q.dev)->second;
135 }
136
137 kernel::exec_context::exec_context(kernel &kern) :
138 kern(kern), q(NULL), mem_local(0), st(NULL) {
139 }
140
141 kernel::exec_context::~exec_context() {
142 if (st)
143 q->pipe->delete_compute_state(q->pipe, st);
144 }
145
146 void *
147 kernel::exec_context::bind(command_queue *_q) {
148 std::swap(q, _q);
149
150 // Bind kernel arguments.
151 auto margs = kern.module(*q).sym(kern.name()).args;
152 for_each([=](std::unique_ptr<kernel::argument> &karg,
153 const module::argument &marg) {
154 karg->bind(*this, marg);
155 }, kern.args, margs);
156
157 // Create a new compute state if anything changed.
158 if (!st || q != _q ||
159 cs.req_local_mem != mem_local ||
160 cs.req_input_mem != input.size()) {
161 if (st)
162 _q->pipe->delete_compute_state(_q->pipe, st);
163
164 cs.prog = kern.module(*q).sec(module::section::text).data.begin();
165 cs.req_local_mem = mem_local;
166 cs.req_input_mem = input.size();
167 st = q->pipe->create_compute_state(q->pipe, &cs);
168 }
169
170 return st;
171 }
172
173 void
174 kernel::exec_context::unbind() {
175 for (auto &arg : kern.args)
176 arg->unbind(*this);
177
178 input.clear();
179 samplers.clear();
180 sviews.clear();
181 resources.clear();
182 g_buffers.clear();
183 g_handles.clear();
184 mem_local = 0;
185 }
186
187 namespace {
188 template<typename T>
189 std::vector<uint8_t>
190 bytes(const T& x) {
191 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
192 }
193
194 ///
195 /// Transform buffer \a v from the native byte order into the byte
196 /// order specified by \a e.
197 ///
198 template<typename T>
199 void
200 byteswap(T &v, pipe_endian e) {
201 if (PIPE_ENDIAN_NATIVE != e)
202 std::reverse(v.begin(), v.end());
203 }
204
205 ///
206 /// Pad buffer \a v to the next multiple of \a n.
207 ///
208 template<typename T>
209 void
210 align(T &v, size_t n) {
211 v.resize(util_align_npot(v.size(), n));
212 }
213
214 bool
215 msb(const std::vector<uint8_t> &s) {
216 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
217 return s.back() & 0x80;
218 else
219 return s.front() & 0x80;
220 }
221
222 ///
223 /// Resize buffer \a v to size \a n using sign or zero extension
224 /// according to \a ext.
225 ///
226 template<typename T>
227 void
228 extend(T &v, enum module::argument::ext_type ext, size_t n) {
229 const size_t m = std::min(v.size(), n);
230 const bool sign_ext = (ext == module::argument::sign_ext);
231 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
232 T w(n, fill);
233
234 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
235 std::copy_n(v.begin(), m, w.begin());
236 else
237 std::copy_n(v.end() - m, m, w.end() - m);
238
239 std::swap(v, w);
240 }
241
242 ///
243 /// Append buffer \a w to \a v.
244 ///
245 template<typename T>
246 void
247 insert(T &v, const T &w) {
248 v.insert(v.end(), w.begin(), w.end());
249 }
250
251 ///
252 /// Append \a n elements to the end of buffer \a v.
253 ///
254 template<typename T>
255 size_t
256 allocate(T &v, size_t n) {
257 size_t pos = v.size();
258 v.resize(pos + n);
259 return pos;
260 }
261 }
262
263 kernel::argument::argument() : _set(false) {
264 }
265
266 bool
267 kernel::argument::set() const {
268 return _set;
269 }
270
271 size_t
272 kernel::argument::storage() const {
273 return 0;
274 }
275
276 kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
277 }
278
279 void
280 kernel::scalar_argument::set(size_t size, const void *value) {
281 if (size != this->size)
282 throw error(CL_INVALID_ARG_SIZE);
283
284 v = { (uint8_t *)value, (uint8_t *)value + size };
285 _set = true;
286 }
287
288 void
289 kernel::scalar_argument::bind(exec_context &ctx,
290 const module::argument &marg) {
291 auto w = v;
292
293 extend(w, marg.ext_type, marg.target_size);
294 byteswap(w, ctx.q->dev.endianness());
295 align(ctx.input, marg.target_align);
296 insert(ctx.input, w);
297 }
298
299 void
300 kernel::scalar_argument::unbind(exec_context &ctx) {
301 }
302
303 void
304 kernel::global_argument::set(size_t size, const void *value) {
305 if (size != sizeof(cl_mem))
306 throw error(CL_INVALID_ARG_SIZE);
307
308 buf = dynamic_cast<buffer *>(*(cl_mem *)value);
309 if (!buf)
310 throw error(CL_INVALID_MEM_OBJECT);
311
312 _set = true;
313 }
314
315 void
316 kernel::global_argument::bind(exec_context &ctx,
317 const module::argument &marg) {
318 align(ctx.input, marg.target_align);
319 ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
320 ctx.g_buffers.push_back(buf->resource(*ctx.q).pipe);
321 }
322
323 void
324 kernel::global_argument::unbind(exec_context &ctx) {
325 }
326
327 size_t
328 kernel::local_argument::storage() const {
329 return _storage;
330 }
331
332 void
333 kernel::local_argument::set(size_t size, const void *value) {
334 if (value)
335 throw error(CL_INVALID_ARG_VALUE);
336
337 _storage = size;
338 _set = true;
339 }
340
341 void
342 kernel::local_argument::bind(exec_context &ctx,
343 const module::argument &marg) {
344 auto v = bytes(ctx.mem_local);
345
346 extend(v, module::argument::zero_ext, marg.target_size);
347 byteswap(v, ctx.q->dev.endianness());
348 align(ctx.input, marg.target_align);
349 insert(ctx.input, v);
350
351 ctx.mem_local += _storage;
352 }
353
354 void
355 kernel::local_argument::unbind(exec_context &ctx) {
356 }
357
358 void
359 kernel::constant_argument::set(size_t size, const void *value) {
360 if (size != sizeof(cl_mem))
361 throw error(CL_INVALID_ARG_SIZE);
362
363 buf = dynamic_cast<buffer *>(*(cl_mem *)value);
364 if (!buf)
365 throw error(CL_INVALID_MEM_OBJECT);
366
367 _set = true;
368 }
369
370 void
371 kernel::constant_argument::bind(exec_context &ctx,
372 const module::argument &marg) {
373 auto v = bytes(ctx.resources.size() << 24);
374
375 extend(v, module::argument::zero_ext, marg.target_size);
376 byteswap(v, ctx.q->dev.endianness());
377 align(ctx.input, marg.target_align);
378 insert(ctx.input, v);
379
380 st = buf->resource(*ctx.q).bind_surface(*ctx.q, false);
381 ctx.resources.push_back(st);
382 }
383
384 void
385 kernel::constant_argument::unbind(exec_context &ctx) {
386 buf->resource(*ctx.q).unbind_surface(*ctx.q, st);
387 }
388
389 void
390 kernel::image_rd_argument::set(size_t size, const void *value) {
391 if (size != sizeof(cl_mem))
392 throw error(CL_INVALID_ARG_SIZE);
393
394 img = dynamic_cast<image *>(*(cl_mem *)value);
395 if (!img)
396 throw error(CL_INVALID_MEM_OBJECT);
397
398 _set = true;
399 }
400
401 void
402 kernel::image_rd_argument::bind(exec_context &ctx,
403 const module::argument &marg) {
404 auto v = bytes(ctx.sviews.size());
405
406 extend(v, module::argument::zero_ext, marg.target_size);
407 byteswap(v, ctx.q->dev.endianness());
408 align(ctx.input, marg.target_align);
409 insert(ctx.input, v);
410
411 st = img->resource(*ctx.q).bind_sampler_view(*ctx.q);
412 ctx.sviews.push_back(st);
413 }
414
415 void
416 kernel::image_rd_argument::unbind(exec_context &ctx) {
417 img->resource(*ctx.q).unbind_sampler_view(*ctx.q, st);
418 }
419
420 void
421 kernel::image_wr_argument::set(size_t size, const void *value) {
422 if (size != sizeof(cl_mem))
423 throw error(CL_INVALID_ARG_SIZE);
424
425 img = dynamic_cast<image *>(*(cl_mem *)value);
426 if (!img)
427 throw error(CL_INVALID_MEM_OBJECT);
428
429 _set = true;
430 }
431
432 void
433 kernel::image_wr_argument::bind(exec_context &ctx,
434 const module::argument &marg) {
435 auto v = bytes(ctx.resources.size());
436
437 extend(v, module::argument::zero_ext, marg.target_size);
438 byteswap(v, ctx.q->dev.endianness());
439 align(ctx.input, marg.target_align);
440 insert(ctx.input, v);
441
442 st = img->resource(*ctx.q).bind_surface(*ctx.q, true);
443 ctx.resources.push_back(st);
444 }
445
446 void
447 kernel::image_wr_argument::unbind(exec_context &ctx) {
448 img->resource(*ctx.q).unbind_surface(*ctx.q, st);
449 }
450
451 void
452 kernel::sampler_argument::set(size_t size, const void *value) {
453 if (size != sizeof(cl_sampler))
454 throw error(CL_INVALID_ARG_SIZE);
455
456 s = *(cl_sampler *)value;
457 _set = true;
458 }
459
460 void
461 kernel::sampler_argument::bind(exec_context &ctx,
462 const module::argument &marg) {
463 st = s->bind(*ctx.q);
464 ctx.samplers.push_back(st);
465 }
466
467 void
468 kernel::sampler_argument::unbind(exec_context &ctx) {
469 s->unbind(*ctx.q, st);
470 }