2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "util/u_math.h"
26 #include "pipe/p_context.h"
28 using namespace clover
;
30 _cl_kernel::_cl_kernel(clover::program
&prog
,
31 const std::string
&name
,
32 const std::vector
<clover::module::argument
> &margs
) :
33 prog(prog
), __name(name
), exec(*this) {
34 for (auto marg
: margs
) {
35 if (marg
.type
== module::argument::scalar
)
36 args
.emplace_back(new scalar_argument(marg
.size
));
37 else if (marg
.type
== module::argument::global
)
38 args
.emplace_back(new global_argument
);
39 else if (marg
.type
== module::argument::local
)
40 args
.emplace_back(new local_argument
);
41 else if (marg
.type
== module::argument::constant
)
42 args
.emplace_back(new constant_argument
);
43 else if (marg
.type
== module::argument::image2d_rd
||
44 marg
.type
== module::argument::image3d_rd
)
45 args
.emplace_back(new image_rd_argument
);
46 else if (marg
.type
== module::argument::image2d_wr
||
47 marg
.type
== module::argument::image3d_wr
)
48 args
.emplace_back(new image_wr_argument
);
49 else if (marg
.type
== module::argument::sampler
)
50 args
.emplace_back(new sampler_argument
);
52 throw error(CL_INVALID_KERNEL_DEFINITION
);
56 template<typename T
, typename V
>
57 static inline std::vector
<T
>
58 pad_vector(clover::command_queue
&q
, const V
&v
, T x
) {
59 std::vector
<T
> w
{ v
.begin(), v
.end() };
60 w
.resize(q
.dev
.max_block_size().size(), x
);
65 _cl_kernel::launch(clover::command_queue
&q
,
66 const std::vector
<size_t> &grid_offset
,
67 const std::vector
<size_t> &grid_size
,
68 const std::vector
<size_t> &block_size
) {
69 void *st
= exec
.bind(&q
);
70 auto g_handles
= map([&](size_t h
) { return (uint32_t *)&exec
.input
[h
]; },
71 exec
.g_handles
.begin(), exec
.g_handles
.end());
73 q
.pipe
->bind_compute_state(q
.pipe
, st
);
74 q
.pipe
->bind_sampler_states(q
.pipe
, PIPE_SHADER_COMPUTE
,
75 0, exec
.samplers
.size(),
76 exec
.samplers
.data());
78 q
.pipe
->set_compute_sampler_views(q
.pipe
, 0, exec
.sviews
.size(),
80 q
.pipe
->set_compute_resources(q
.pipe
, 0, exec
.resources
.size(),
81 exec
.resources
.data());
82 q
.pipe
->set_global_binding(q
.pipe
, 0, exec
.g_buffers
.size(),
83 exec
.g_buffers
.data(), g_handles
.data());
85 q
.pipe
->launch_grid(q
.pipe
,
86 pad_vector
<uint
>(q
, block_size
, 1).data(),
87 pad_vector
<uint
>(q
, grid_size
, 1).data(),
88 module(q
).sym(__name
).offset
,
91 q
.pipe
->set_global_binding(q
.pipe
, 0, exec
.g_buffers
.size(), NULL
, NULL
);
92 q
.pipe
->set_compute_resources(q
.pipe
, 0, exec
.resources
.size(), NULL
);
93 q
.pipe
->set_compute_sampler_views(q
.pipe
, 0, exec
.sviews
.size(), NULL
);
94 q
.pipe
->bind_sampler_states(q
.pipe
, PIPE_SHADER_COMPUTE
, 0,
95 exec
.samplers
.size(), NULL
);
100 _cl_kernel::mem_local() const {
103 for (auto &arg
: args
) {
104 if (dynamic_cast<local_argument
*>(arg
.get()))
105 sz
+= arg
->storage();
112 _cl_kernel::mem_private() const {
117 _cl_kernel::max_block_size() const {
118 return std::numeric_limits
<std::size_t>::max();
122 _cl_kernel::name() const {
127 _cl_kernel::block_size() const {
131 const clover::module
&
132 _cl_kernel::module(const clover::command_queue
&q
) const {
133 return prog
.binaries().find(&q
.dev
)->second
;
136 _cl_kernel::exec_context::exec_context(clover::kernel
&kern
) :
137 kern(kern
), q(NULL
), mem_local(0), st(NULL
) {
140 _cl_kernel::exec_context::~exec_context() {
142 q
->pipe
->delete_compute_state(q
->pipe
, st
);
146 _cl_kernel::exec_context::bind(clover::command_queue
*__q
) {
149 // Bind kernel arguments.
150 auto margs
= kern
.module(*q
).sym(kern
.name()).args
;
151 for_each([=](std::unique_ptr
<kernel::argument
> &karg
,
152 const module::argument
&marg
) {
153 karg
->bind(*this, marg
);
154 }, kern
.args
.begin(), kern
.args
.end(), margs
.begin());
156 // Create a new compute state if anything changed.
157 if (!st
|| q
!= __q
||
158 cs
.req_local_mem
!= mem_local
||
159 cs
.req_input_mem
!= input
.size()) {
161 __q
->pipe
->delete_compute_state(__q
->pipe
, st
);
163 cs
.prog
= kern
.module(*q
).sec(module::section::text
).data
.begin();
164 cs
.req_local_mem
= mem_local
;
165 cs
.req_input_mem
= input
.size();
166 st
= q
->pipe
->create_compute_state(q
->pipe
, &cs
);
173 _cl_kernel::exec_context::unbind() {
174 for (auto &arg
: kern
.args
)
190 return { (uint8_t *)&x
, (uint8_t *)&x
+ sizeof(x
) };
194 /// Transform buffer \a v from the native byte order into the byte
195 /// order specified by \a e.
199 byteswap(T
&v
, pipe_endian e
) {
200 if (PIPE_ENDIAN_NATIVE
!= e
)
201 std::reverse(v
.begin(), v
.end());
205 /// Pad buffer \a v to the next multiple of \a n.
209 align(T
&v
, size_t n
) {
210 v
.resize(util_align_npot(v
.size(), n
));
214 msb(const std::vector
<uint8_t> &s
) {
215 if (PIPE_ENDIAN_NATIVE
== PIPE_ENDIAN_LITTLE
)
216 return s
.back() & 0x80;
218 return s
.front() & 0x80;
222 /// Resize buffer \a v to size \a n using sign or zero extension
223 /// according to \a ext.
227 extend(T
&v
, enum clover::module::argument::ext_type ext
, size_t n
) {
228 const size_t m
= std::min(v
.size(), n
);
229 const bool sign_ext
= (ext
== module::argument::sign_ext
);
230 const uint8_t fill
= (sign_ext
&& msb(v
) ? ~0 : 0);
233 if (PIPE_ENDIAN_NATIVE
== PIPE_ENDIAN_LITTLE
)
234 std::copy_n(v
.begin(), m
, w
.begin());
236 std::copy_n(v
.end() - m
, m
, w
.end() - m
);
242 /// Append buffer \a w to \a v.
246 insert(T
&v
, const T
&w
) {
247 v
.insert(v
.end(), w
.begin(), w
.end());
251 /// Append \a n elements to the end of buffer \a v.
255 allocate(T
&v
, size_t n
) {
256 size_t pos
= v
.size();
262 _cl_kernel::argument::argument() : __set(false) {
266 _cl_kernel::argument::set() const {
271 _cl_kernel::argument::storage() const {
275 _cl_kernel::scalar_argument::scalar_argument(size_t size
) : size(size
) {
279 _cl_kernel::scalar_argument::set(size_t size
, const void *value
) {
280 if (size
!= this->size
)
281 throw error(CL_INVALID_ARG_SIZE
);
283 v
= { (uint8_t *)value
, (uint8_t *)value
+ size
};
288 _cl_kernel::scalar_argument::bind(exec_context
&ctx
,
289 const clover::module::argument
&marg
) {
292 extend(w
, marg
.ext_type
, marg
.target_size
);
293 byteswap(w
, ctx
.q
->dev
.endianness());
294 align(ctx
.input
, marg
.target_align
);
295 insert(ctx
.input
, w
);
299 _cl_kernel::scalar_argument::unbind(exec_context
&ctx
) {
303 _cl_kernel::global_argument::set(size_t size
, const void *value
) {
304 if (size
!= sizeof(cl_mem
))
305 throw error(CL_INVALID_ARG_SIZE
);
307 obj
= dynamic_cast<clover::buffer
*>(*(cl_mem
*)value
);
309 throw error(CL_INVALID_MEM_OBJECT
);
315 _cl_kernel::global_argument::bind(exec_context
&ctx
,
316 const clover::module::argument
&marg
) {
317 align(ctx
.input
, marg
.target_align
);
318 ctx
.g_handles
.push_back(allocate(ctx
.input
, marg
.target_size
));
319 ctx
.g_buffers
.push_back(obj
->resource(ctx
.q
).pipe
);
323 _cl_kernel::global_argument::unbind(exec_context
&ctx
) {
327 _cl_kernel::local_argument::storage() const {
332 _cl_kernel::local_argument::set(size_t size
, const void *value
) {
334 throw error(CL_INVALID_ARG_VALUE
);
341 _cl_kernel::local_argument::bind(exec_context
&ctx
,
342 const clover::module::argument
&marg
) {
343 auto v
= bytes(ctx
.mem_local
);
345 extend(v
, module::argument::zero_ext
, marg
.target_size
);
346 byteswap(v
, ctx
.q
->dev
.endianness());
347 align(ctx
.input
, marg
.target_align
);
348 insert(ctx
.input
, v
);
350 ctx
.mem_local
+= __storage
;
354 _cl_kernel::local_argument::unbind(exec_context
&ctx
) {
358 _cl_kernel::constant_argument::set(size_t size
, const void *value
) {
359 if (size
!= sizeof(cl_mem
))
360 throw error(CL_INVALID_ARG_SIZE
);
362 obj
= dynamic_cast<clover::buffer
*>(*(cl_mem
*)value
);
364 throw error(CL_INVALID_MEM_OBJECT
);
370 _cl_kernel::constant_argument::bind(exec_context
&ctx
,
371 const clover::module::argument
&marg
) {
372 auto v
= bytes(ctx
.resources
.size() << 24);
374 extend(v
, module::argument::zero_ext
, marg
.target_size
);
375 byteswap(v
, ctx
.q
->dev
.endianness());
376 align(ctx
.input
, marg
.target_align
);
377 insert(ctx
.input
, v
);
379 st
= obj
->resource(ctx
.q
).bind_surface(*ctx
.q
, false);
380 ctx
.resources
.push_back(st
);
384 _cl_kernel::constant_argument::unbind(exec_context
&ctx
) {
385 obj
->resource(ctx
.q
).unbind_surface(*ctx
.q
, st
);
389 _cl_kernel::image_rd_argument::set(size_t size
, const void *value
) {
390 if (size
!= sizeof(cl_mem
))
391 throw error(CL_INVALID_ARG_SIZE
);
393 obj
= dynamic_cast<clover::image
*>(*(cl_mem
*)value
);
395 throw error(CL_INVALID_MEM_OBJECT
);
401 _cl_kernel::image_rd_argument::bind(exec_context
&ctx
,
402 const clover::module::argument
&marg
) {
403 auto v
= bytes(ctx
.sviews
.size());
405 extend(v
, module::argument::zero_ext
, marg
.target_size
);
406 byteswap(v
, ctx
.q
->dev
.endianness());
407 align(ctx
.input
, marg
.target_align
);
408 insert(ctx
.input
, v
);
410 st
= obj
->resource(ctx
.q
).bind_sampler_view(*ctx
.q
);
411 ctx
.sviews
.push_back(st
);
415 _cl_kernel::image_rd_argument::unbind(exec_context
&ctx
) {
416 obj
->resource(ctx
.q
).unbind_sampler_view(*ctx
.q
, st
);
420 _cl_kernel::image_wr_argument::set(size_t size
, const void *value
) {
421 if (size
!= sizeof(cl_mem
))
422 throw error(CL_INVALID_ARG_SIZE
);
424 obj
= dynamic_cast<clover::image
*>(*(cl_mem
*)value
);
426 throw error(CL_INVALID_MEM_OBJECT
);
432 _cl_kernel::image_wr_argument::bind(exec_context
&ctx
,
433 const clover::module::argument
&marg
) {
434 auto v
= bytes(ctx
.resources
.size());
436 extend(v
, module::argument::zero_ext
, marg
.target_size
);
437 byteswap(v
, ctx
.q
->dev
.endianness());
438 align(ctx
.input
, marg
.target_align
);
439 insert(ctx
.input
, v
);
441 st
= obj
->resource(ctx
.q
).bind_surface(*ctx
.q
, true);
442 ctx
.resources
.push_back(st
);
446 _cl_kernel::image_wr_argument::unbind(exec_context
&ctx
) {
447 obj
->resource(ctx
.q
).unbind_surface(*ctx
.q
, st
);
451 _cl_kernel::sampler_argument::set(size_t size
, const void *value
) {
452 if (size
!= sizeof(cl_sampler
))
453 throw error(CL_INVALID_ARG_SIZE
);
455 obj
= *(cl_sampler
*)value
;
460 _cl_kernel::sampler_argument::bind(exec_context
&ctx
,
461 const clover::module::argument
&marg
) {
462 st
= obj
->bind(*ctx
.q
);
463 ctx
.samplers
.push_back(st
);
467 _cl_kernel::sampler_argument::unbind(exec_context
&ctx
) {
468 obj
->unbind(*ctx
.q
, st
);