1a75cd48a00df4dfdeb89ed7d6e69b3591849a4a
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/algorithm.hpp"
26 #include "util/u_math.h"
27 #include "pipe/p_context.h"
28
29 using namespace clover;
30
31 _cl_kernel::_cl_kernel(clover::program &prog,
32 const std::string &name,
33 const std::vector<clover::module::argument> &margs) :
34 prog(prog), _name(name), exec(*this) {
35 for (auto marg : margs) {
36 if (marg.type == module::argument::scalar)
37 args.emplace_back(new scalar_argument(marg.size));
38 else if (marg.type == module::argument::global)
39 args.emplace_back(new global_argument);
40 else if (marg.type == module::argument::local)
41 args.emplace_back(new local_argument);
42 else if (marg.type == module::argument::constant)
43 args.emplace_back(new constant_argument);
44 else if (marg.type == module::argument::image2d_rd ||
45 marg.type == module::argument::image3d_rd)
46 args.emplace_back(new image_rd_argument);
47 else if (marg.type == module::argument::image2d_wr ||
48 marg.type == module::argument::image3d_wr)
49 args.emplace_back(new image_wr_argument);
50 else if (marg.type == module::argument::sampler)
51 args.emplace_back(new sampler_argument);
52 else
53 throw error(CL_INVALID_KERNEL_DEFINITION);
54 }
55 }
56
57 template<typename T, typename V>
58 static inline std::vector<T>
59 pad_vector(clover::command_queue &q, const V &v, T x) {
60 std::vector<T> w { v.begin(), v.end() };
61 w.resize(q.dev.max_block_size().size(), x);
62 return w;
63 }
64
65 void
66 _cl_kernel::launch(clover::command_queue &q,
67 const std::vector<size_t> &grid_offset,
68 const std::vector<size_t> &grid_size,
69 const std::vector<size_t> &block_size) {
70 void *st = exec.bind(&q);
71 std::vector<uint32_t *> g_handles = map([&](size_t h) {
72 return (uint32_t *)&exec.input[h];
73 }, exec.g_handles);
74
75 q.pipe->bind_compute_state(q.pipe, st);
76 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
77 0, exec.samplers.size(),
78 exec.samplers.data());
79
80 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
81 exec.sviews.data());
82 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
83 exec.resources.data());
84 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
85 exec.g_buffers.data(), g_handles.data());
86
87 q.pipe->launch_grid(q.pipe,
88 pad_vector<uint>(q, block_size, 1).data(),
89 pad_vector<uint>(q, grid_size, 1).data(),
90 module(q).sym(_name).offset,
91 exec.input.data());
92
93 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
94 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
95 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
96 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
97 exec.samplers.size(), NULL);
98 exec.unbind();
99 }
100
101 size_t
102 _cl_kernel::mem_local() const {
103 size_t sz = 0;
104
105 for (auto &arg : args) {
106 if (dynamic_cast<local_argument *>(arg.get()))
107 sz += arg->storage();
108 }
109
110 return sz;
111 }
112
113 size_t
114 _cl_kernel::mem_private() const {
115 return 0;
116 }
117
118 size_t
119 _cl_kernel::max_block_size() const {
120 return std::numeric_limits<std::size_t>::max();
121 }
122
123 const std::string &
124 _cl_kernel::name() const {
125 return _name;
126 }
127
128 std::vector<size_t>
129 _cl_kernel::block_size() const {
130 return { 0, 0, 0 };
131 }
132
133 const clover::module &
134 _cl_kernel::module(const clover::command_queue &q) const {
135 return prog.binaries().find(&q.dev)->second;
136 }
137
138 _cl_kernel::exec_context::exec_context(clover::kernel &kern) :
139 kern(kern), q(NULL), mem_local(0), st(NULL) {
140 }
141
142 _cl_kernel::exec_context::~exec_context() {
143 if (st)
144 q->pipe->delete_compute_state(q->pipe, st);
145 }
146
147 void *
148 _cl_kernel::exec_context::bind(clover::command_queue *_q) {
149 std::swap(q, _q);
150
151 // Bind kernel arguments.
152 auto margs = kern.module(*q).sym(kern.name()).args;
153 for_each([=](std::unique_ptr<kernel::argument> &karg,
154 const module::argument &marg) {
155 karg->bind(*this, marg);
156 }, kern.args, margs);
157
158 // Create a new compute state if anything changed.
159 if (!st || q != _q ||
160 cs.req_local_mem != mem_local ||
161 cs.req_input_mem != input.size()) {
162 if (st)
163 _q->pipe->delete_compute_state(_q->pipe, st);
164
165 cs.prog = kern.module(*q).sec(module::section::text).data.begin();
166 cs.req_local_mem = mem_local;
167 cs.req_input_mem = input.size();
168 st = q->pipe->create_compute_state(q->pipe, &cs);
169 }
170
171 return st;
172 }
173
174 void
175 _cl_kernel::exec_context::unbind() {
176 for (auto &arg : kern.args)
177 arg->unbind(*this);
178
179 input.clear();
180 samplers.clear();
181 sviews.clear();
182 resources.clear();
183 g_buffers.clear();
184 g_handles.clear();
185 mem_local = 0;
186 }
187
188 namespace {
189 template<typename T>
190 std::vector<uint8_t>
191 bytes(const T& x) {
192 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
193 }
194
195 ///
196 /// Transform buffer \a v from the native byte order into the byte
197 /// order specified by \a e.
198 ///
199 template<typename T>
200 void
201 byteswap(T &v, pipe_endian e) {
202 if (PIPE_ENDIAN_NATIVE != e)
203 std::reverse(v.begin(), v.end());
204 }
205
206 ///
207 /// Pad buffer \a v to the next multiple of \a n.
208 ///
209 template<typename T>
210 void
211 align(T &v, size_t n) {
212 v.resize(util_align_npot(v.size(), n));
213 }
214
215 bool
216 msb(const std::vector<uint8_t> &s) {
217 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
218 return s.back() & 0x80;
219 else
220 return s.front() & 0x80;
221 }
222
223 ///
224 /// Resize buffer \a v to size \a n using sign or zero extension
225 /// according to \a ext.
226 ///
227 template<typename T>
228 void
229 extend(T &v, enum clover::module::argument::ext_type ext, size_t n) {
230 const size_t m = std::min(v.size(), n);
231 const bool sign_ext = (ext == module::argument::sign_ext);
232 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
233 T w(n, fill);
234
235 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
236 std::copy_n(v.begin(), m, w.begin());
237 else
238 std::copy_n(v.end() - m, m, w.end() - m);
239
240 std::swap(v, w);
241 }
242
243 ///
244 /// Append buffer \a w to \a v.
245 ///
246 template<typename T>
247 void
248 insert(T &v, const T &w) {
249 v.insert(v.end(), w.begin(), w.end());
250 }
251
252 ///
253 /// Append \a n elements to the end of buffer \a v.
254 ///
255 template<typename T>
256 size_t
257 allocate(T &v, size_t n) {
258 size_t pos = v.size();
259 v.resize(pos + n);
260 return pos;
261 }
262 }
263
264 _cl_kernel::argument::argument() : _set(false) {
265 }
266
267 bool
268 _cl_kernel::argument::set() const {
269 return _set;
270 }
271
272 size_t
273 _cl_kernel::argument::storage() const {
274 return 0;
275 }
276
277 _cl_kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
278 }
279
280 void
281 _cl_kernel::scalar_argument::set(size_t size, const void *value) {
282 if (size != this->size)
283 throw error(CL_INVALID_ARG_SIZE);
284
285 v = { (uint8_t *)value, (uint8_t *)value + size };
286 _set = true;
287 }
288
289 void
290 _cl_kernel::scalar_argument::bind(exec_context &ctx,
291 const clover::module::argument &marg) {
292 auto w = v;
293
294 extend(w, marg.ext_type, marg.target_size);
295 byteswap(w, ctx.q->dev.endianness());
296 align(ctx.input, marg.target_align);
297 insert(ctx.input, w);
298 }
299
300 void
301 _cl_kernel::scalar_argument::unbind(exec_context &ctx) {
302 }
303
304 void
305 _cl_kernel::global_argument::set(size_t size, const void *value) {
306 if (size != sizeof(cl_mem))
307 throw error(CL_INVALID_ARG_SIZE);
308
309 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
310 if (!obj)
311 throw error(CL_INVALID_MEM_OBJECT);
312
313 _set = true;
314 }
315
316 void
317 _cl_kernel::global_argument::bind(exec_context &ctx,
318 const clover::module::argument &marg) {
319 align(ctx.input, marg.target_align);
320 ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
321 ctx.g_buffers.push_back(obj->resource(ctx.q).pipe);
322 }
323
324 void
325 _cl_kernel::global_argument::unbind(exec_context &ctx) {
326 }
327
328 size_t
329 _cl_kernel::local_argument::storage() const {
330 return _storage;
331 }
332
333 void
334 _cl_kernel::local_argument::set(size_t size, const void *value) {
335 if (value)
336 throw error(CL_INVALID_ARG_VALUE);
337
338 _storage = size;
339 _set = true;
340 }
341
342 void
343 _cl_kernel::local_argument::bind(exec_context &ctx,
344 const clover::module::argument &marg) {
345 auto v = bytes(ctx.mem_local);
346
347 extend(v, module::argument::zero_ext, marg.target_size);
348 byteswap(v, ctx.q->dev.endianness());
349 align(ctx.input, marg.target_align);
350 insert(ctx.input, v);
351
352 ctx.mem_local += _storage;
353 }
354
355 void
356 _cl_kernel::local_argument::unbind(exec_context &ctx) {
357 }
358
359 void
360 _cl_kernel::constant_argument::set(size_t size, const void *value) {
361 if (size != sizeof(cl_mem))
362 throw error(CL_INVALID_ARG_SIZE);
363
364 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
365 if (!obj)
366 throw error(CL_INVALID_MEM_OBJECT);
367
368 _set = true;
369 }
370
371 void
372 _cl_kernel::constant_argument::bind(exec_context &ctx,
373 const clover::module::argument &marg) {
374 auto v = bytes(ctx.resources.size() << 24);
375
376 extend(v, module::argument::zero_ext, marg.target_size);
377 byteswap(v, ctx.q->dev.endianness());
378 align(ctx.input, marg.target_align);
379 insert(ctx.input, v);
380
381 st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
382 ctx.resources.push_back(st);
383 }
384
385 void
386 _cl_kernel::constant_argument::unbind(exec_context &ctx) {
387 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
388 }
389
390 void
391 _cl_kernel::image_rd_argument::set(size_t size, const void *value) {
392 if (size != sizeof(cl_mem))
393 throw error(CL_INVALID_ARG_SIZE);
394
395 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
396 if (!obj)
397 throw error(CL_INVALID_MEM_OBJECT);
398
399 _set = true;
400 }
401
402 void
403 _cl_kernel::image_rd_argument::bind(exec_context &ctx,
404 const clover::module::argument &marg) {
405 auto v = bytes(ctx.sviews.size());
406
407 extend(v, module::argument::zero_ext, marg.target_size);
408 byteswap(v, ctx.q->dev.endianness());
409 align(ctx.input, marg.target_align);
410 insert(ctx.input, v);
411
412 st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
413 ctx.sviews.push_back(st);
414 }
415
416 void
417 _cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
418 obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
419 }
420
421 void
422 _cl_kernel::image_wr_argument::set(size_t size, const void *value) {
423 if (size != sizeof(cl_mem))
424 throw error(CL_INVALID_ARG_SIZE);
425
426 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
427 if (!obj)
428 throw error(CL_INVALID_MEM_OBJECT);
429
430 _set = true;
431 }
432
433 void
434 _cl_kernel::image_wr_argument::bind(exec_context &ctx,
435 const clover::module::argument &marg) {
436 auto v = bytes(ctx.resources.size());
437
438 extend(v, module::argument::zero_ext, marg.target_size);
439 byteswap(v, ctx.q->dev.endianness());
440 align(ctx.input, marg.target_align);
441 insert(ctx.input, v);
442
443 st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
444 ctx.resources.push_back(st);
445 }
446
447 void
448 _cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
449 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
450 }
451
452 void
453 _cl_kernel::sampler_argument::set(size_t size, const void *value) {
454 if (size != sizeof(cl_sampler))
455 throw error(CL_INVALID_ARG_SIZE);
456
457 obj = *(cl_sampler *)value;
458 _set = true;
459 }
460
461 void
462 _cl_kernel::sampler_argument::bind(exec_context &ctx,
463 const clover::module::argument &marg) {
464 st = obj->bind(*ctx.q);
465 ctx.samplers.push_back(st);
466 }
467
468 void
469 _cl_kernel::sampler_argument::unbind(exec_context &ctx) {
470 obj->unbind(*ctx.q, st);
471 }