clover: Use std::numeric_limits<std::size_t>::max() instead of SIZE_MAX
[mesa.git] / src / gallium / state_trackers / clover / core / kernel.cpp
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/u_math.h"
26 #include "pipe/p_context.h"
27
28 using namespace clover;
29
30 _cl_kernel::_cl_kernel(clover::program &prog,
31 const std::string &name,
32 const std::vector<clover::module::argument> &margs) :
33 prog(prog), __name(name), exec(*this) {
34 for (auto marg : margs) {
35 if (marg.type == module::argument::scalar)
36 args.emplace_back(new scalar_argument(marg.size));
37 else if (marg.type == module::argument::global)
38 args.emplace_back(new global_argument);
39 else if (marg.type == module::argument::local)
40 args.emplace_back(new local_argument);
41 else if (marg.type == module::argument::constant)
42 args.emplace_back(new constant_argument);
43 else if (marg.type == module::argument::image2d_rd ||
44 marg.type == module::argument::image3d_rd)
45 args.emplace_back(new image_rd_argument);
46 else if (marg.type == module::argument::image2d_wr ||
47 marg.type == module::argument::image3d_wr)
48 args.emplace_back(new image_wr_argument);
49 else if (marg.type == module::argument::sampler)
50 args.emplace_back(new sampler_argument);
51 else
52 throw error(CL_INVALID_KERNEL_DEFINITION);
53 }
54 }
55
56 template<typename T, typename V>
57 static inline std::vector<T>
58 pad_vector(clover::command_queue &q, const V &v, T x) {
59 std::vector<T> w { v.begin(), v.end() };
60 w.resize(q.dev.max_block_size().size(), x);
61 return w;
62 }
63
64 void
65 _cl_kernel::launch(clover::command_queue &q,
66 const std::vector<size_t> &grid_offset,
67 const std::vector<size_t> &grid_size,
68 const std::vector<size_t> &block_size) {
69 void *st = exec.bind(&q);
70 auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
71 exec.g_handles.begin(), exec.g_handles.end());
72
73 q.pipe->bind_compute_state(q.pipe, st);
74 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
75 0, exec.samplers.size(),
76 exec.samplers.data());
77
78 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
79 exec.sviews.data());
80 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
81 exec.resources.data());
82 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
83 exec.g_buffers.data(), g_handles.data());
84
85 q.pipe->launch_grid(q.pipe,
86 pad_vector<uint>(q, block_size, 1).data(),
87 pad_vector<uint>(q, grid_size, 1).data(),
88 module(q).sym(__name).offset,
89 exec.input.data());
90
91 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
92 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
93 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
94 q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
95 exec.samplers.size(), NULL);
96 exec.unbind();
97 }
98
99 size_t
100 _cl_kernel::mem_local() const {
101 size_t sz = 0;
102
103 for (auto &arg : args) {
104 if (dynamic_cast<local_argument *>(arg.get()))
105 sz += arg->storage();
106 }
107
108 return sz;
109 }
110
111 size_t
112 _cl_kernel::mem_private() const {
113 return 0;
114 }
115
116 size_t
117 _cl_kernel::max_block_size() const {
118 return std::numeric_limits<std::size_t>::max();
119 }
120
121 const std::string &
122 _cl_kernel::name() const {
123 return __name;
124 }
125
126 std::vector<size_t>
127 _cl_kernel::block_size() const {
128 return { 0, 0, 0 };
129 }
130
131 const clover::module &
132 _cl_kernel::module(const clover::command_queue &q) const {
133 return prog.binaries().find(&q.dev)->second;
134 }
135
136 _cl_kernel::exec_context::exec_context(clover::kernel &kern) :
137 kern(kern), q(NULL), mem_local(0), st(NULL) {
138 }
139
140 _cl_kernel::exec_context::~exec_context() {
141 if (st)
142 q->pipe->delete_compute_state(q->pipe, st);
143 }
144
145 void *
146 _cl_kernel::exec_context::bind(clover::command_queue *__q) {
147 std::swap(q, __q);
148
149 // Bind kernel arguments.
150 auto margs = kern.module(*q).sym(kern.name()).args;
151 for_each([=](std::unique_ptr<kernel::argument> &karg,
152 const module::argument &marg) {
153 karg->bind(*this, marg);
154 }, kern.args.begin(), kern.args.end(), margs.begin());
155
156 // Create a new compute state if anything changed.
157 if (!st || q != __q ||
158 cs.req_local_mem != mem_local ||
159 cs.req_input_mem != input.size()) {
160 if (st)
161 __q->pipe->delete_compute_state(__q->pipe, st);
162
163 cs.prog = kern.module(*q).sec(module::section::text).data.begin();
164 cs.req_local_mem = mem_local;
165 cs.req_input_mem = input.size();
166 st = q->pipe->create_compute_state(q->pipe, &cs);
167 }
168
169 return st;
170 }
171
172 void
173 _cl_kernel::exec_context::unbind() {
174 for (auto &arg : kern.args)
175 arg->unbind(*this);
176
177 input.clear();
178 samplers.clear();
179 sviews.clear();
180 resources.clear();
181 g_buffers.clear();
182 g_handles.clear();
183 mem_local = 0;
184 }
185
186 namespace {
187 template<typename T>
188 std::vector<uint8_t>
189 bytes(const T& x) {
190 return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
191 }
192
193 ///
194 /// Transform buffer \a v from the native byte order into the byte
195 /// order specified by \a e.
196 ///
197 template<typename T>
198 void
199 byteswap(T &v, pipe_endian e) {
200 if (PIPE_ENDIAN_NATIVE != e)
201 std::reverse(v.begin(), v.end());
202 }
203
204 ///
205 /// Pad buffer \a v to the next multiple of \a n.
206 ///
207 template<typename T>
208 void
209 align(T &v, size_t n) {
210 v.resize(util_align_npot(v.size(), n));
211 }
212
213 bool
214 msb(const std::vector<uint8_t> &s) {
215 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
216 return s.back() & 0x80;
217 else
218 return s.front() & 0x80;
219 }
220
221 ///
222 /// Resize buffer \a v to size \a n using sign or zero extension
223 /// according to \a ext.
224 ///
225 template<typename T>
226 void
227 extend(T &v, enum clover::module::argument::ext_type ext, size_t n) {
228 const size_t m = std::min(v.size(), n);
229 const bool sign_ext = (ext == module::argument::sign_ext);
230 const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
231 T w(n, fill);
232
233 if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
234 std::copy_n(v.begin(), m, w.begin());
235 else
236 std::copy_n(v.end() - m, m, w.end() - m);
237
238 std::swap(v, w);
239 }
240
241 ///
242 /// Append buffer \a w to \a v.
243 ///
244 template<typename T>
245 void
246 insert(T &v, const T &w) {
247 v.insert(v.end(), w.begin(), w.end());
248 }
249
250 ///
251 /// Append \a n elements to the end of buffer \a v.
252 ///
253 template<typename T>
254 size_t
255 allocate(T &v, size_t n) {
256 size_t pos = v.size();
257 v.resize(pos + n);
258 return pos;
259 }
260 }
261
262 _cl_kernel::argument::argument() : __set(false) {
263 }
264
265 bool
266 _cl_kernel::argument::set() const {
267 return __set;
268 }
269
270 size_t
271 _cl_kernel::argument::storage() const {
272 return 0;
273 }
274
275 _cl_kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
276 }
277
278 void
279 _cl_kernel::scalar_argument::set(size_t size, const void *value) {
280 if (size != this->size)
281 throw error(CL_INVALID_ARG_SIZE);
282
283 v = { (uint8_t *)value, (uint8_t *)value + size };
284 __set = true;
285 }
286
287 void
288 _cl_kernel::scalar_argument::bind(exec_context &ctx,
289 const clover::module::argument &marg) {
290 auto w = v;
291
292 extend(w, marg.ext_type, marg.target_size);
293 byteswap(w, ctx.q->dev.endianness());
294 align(ctx.input, marg.target_align);
295 insert(ctx.input, w);
296 }
297
298 void
299 _cl_kernel::scalar_argument::unbind(exec_context &ctx) {
300 }
301
302 void
303 _cl_kernel::global_argument::set(size_t size, const void *value) {
304 if (size != sizeof(cl_mem))
305 throw error(CL_INVALID_ARG_SIZE);
306
307 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
308 if (!obj)
309 throw error(CL_INVALID_MEM_OBJECT);
310
311 __set = true;
312 }
313
314 void
315 _cl_kernel::global_argument::bind(exec_context &ctx,
316 const clover::module::argument &marg) {
317 align(ctx.input, marg.target_align);
318 ctx.g_handles.push_back(allocate(ctx.input, marg.target_size));
319 ctx.g_buffers.push_back(obj->resource(ctx.q).pipe);
320 }
321
322 void
323 _cl_kernel::global_argument::unbind(exec_context &ctx) {
324 }
325
326 size_t
327 _cl_kernel::local_argument::storage() const {
328 return __storage;
329 }
330
331 void
332 _cl_kernel::local_argument::set(size_t size, const void *value) {
333 if (value)
334 throw error(CL_INVALID_ARG_VALUE);
335
336 __storage = size;
337 __set = true;
338 }
339
340 void
341 _cl_kernel::local_argument::bind(exec_context &ctx,
342 const clover::module::argument &marg) {
343 auto v = bytes(ctx.mem_local);
344
345 extend(v, module::argument::zero_ext, marg.target_size);
346 byteswap(v, ctx.q->dev.endianness());
347 align(ctx.input, marg.target_align);
348 insert(ctx.input, v);
349
350 ctx.mem_local += __storage;
351 }
352
353 void
354 _cl_kernel::local_argument::unbind(exec_context &ctx) {
355 }
356
357 void
358 _cl_kernel::constant_argument::set(size_t size, const void *value) {
359 if (size != sizeof(cl_mem))
360 throw error(CL_INVALID_ARG_SIZE);
361
362 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
363 if (!obj)
364 throw error(CL_INVALID_MEM_OBJECT);
365
366 __set = true;
367 }
368
369 void
370 _cl_kernel::constant_argument::bind(exec_context &ctx,
371 const clover::module::argument &marg) {
372 auto v = bytes(ctx.resources.size() << 24);
373
374 extend(v, module::argument::zero_ext, marg.target_size);
375 byteswap(v, ctx.q->dev.endianness());
376 align(ctx.input, marg.target_align);
377 insert(ctx.input, v);
378
379 st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
380 ctx.resources.push_back(st);
381 }
382
383 void
384 _cl_kernel::constant_argument::unbind(exec_context &ctx) {
385 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
386 }
387
388 void
389 _cl_kernel::image_rd_argument::set(size_t size, const void *value) {
390 if (size != sizeof(cl_mem))
391 throw error(CL_INVALID_ARG_SIZE);
392
393 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
394 if (!obj)
395 throw error(CL_INVALID_MEM_OBJECT);
396
397 __set = true;
398 }
399
400 void
401 _cl_kernel::image_rd_argument::bind(exec_context &ctx,
402 const clover::module::argument &marg) {
403 auto v = bytes(ctx.sviews.size());
404
405 extend(v, module::argument::zero_ext, marg.target_size);
406 byteswap(v, ctx.q->dev.endianness());
407 align(ctx.input, marg.target_align);
408 insert(ctx.input, v);
409
410 st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
411 ctx.sviews.push_back(st);
412 }
413
414 void
415 _cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
416 obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
417 }
418
419 void
420 _cl_kernel::image_wr_argument::set(size_t size, const void *value) {
421 if (size != sizeof(cl_mem))
422 throw error(CL_INVALID_ARG_SIZE);
423
424 obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
425 if (!obj)
426 throw error(CL_INVALID_MEM_OBJECT);
427
428 __set = true;
429 }
430
431 void
432 _cl_kernel::image_wr_argument::bind(exec_context &ctx,
433 const clover::module::argument &marg) {
434 auto v = bytes(ctx.resources.size());
435
436 extend(v, module::argument::zero_ext, marg.target_size);
437 byteswap(v, ctx.q->dev.endianness());
438 align(ctx.input, marg.target_align);
439 insert(ctx.input, v);
440
441 st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
442 ctx.resources.push_back(st);
443 }
444
445 void
446 _cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
447 obj->resource(ctx.q).unbind_surface(*ctx.q, st);
448 }
449
450 void
451 _cl_kernel::sampler_argument::set(size_t size, const void *value) {
452 if (size != sizeof(cl_sampler))
453 throw error(CL_INVALID_ARG_SIZE);
454
455 obj = *(cl_sampler *)value;
456 __set = true;
457 }
458
459 void
460 _cl_kernel::sampler_argument::bind(exec_context &ctx,
461 const clover::module::argument &marg) {
462 st = obj->bind(*ctx.q);
463 ctx.samplers.push_back(st);
464 }
465
466 void
467 _cl_kernel::sampler_argument::unbind(exec_context &ctx) {
468 obj->unbind(*ctx.q, st);
469 }