2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "pipe/p_context.h"
27 using namespace clover
;
29 _cl_kernel::_cl_kernel(clover::program
&prog
,
30 const std::string
&name
,
31 const std::vector
<clover::module::argument
> &margs
) :
32 prog(prog
), __name(name
), exec(*this) {
33 for (auto marg
: margs
) {
34 if (marg
.type
== module::argument::scalar
)
35 args
.emplace_back(new scalar_argument(marg
.size
));
36 else if (marg
.type
== module::argument::global
)
37 args
.emplace_back(new global_argument
);
38 else if (marg
.type
== module::argument::local
)
39 args
.emplace_back(new local_argument
);
40 else if (marg
.type
== module::argument::constant
)
41 args
.emplace_back(new constant_argument
);
42 else if (marg
.type
== module::argument::image2d_rd
||
43 marg
.type
== module::argument::image3d_rd
)
44 args
.emplace_back(new image_rd_argument
);
45 else if (marg
.type
== module::argument::image2d_wr
||
46 marg
.type
== module::argument::image3d_wr
)
47 args
.emplace_back(new image_wr_argument
);
48 else if (marg
.type
== module::argument::sampler
)
49 args
.emplace_back(new sampler_argument
);
51 throw error(CL_INVALID_KERNEL_DEFINITION
);
55 template<typename T
, typename V
>
56 static inline std::vector
<T
>
57 pad_vector(clover::command_queue
&q
, const V
&v
, T x
) {
58 std::vector
<T
> w
{ v
.begin(), v
.end() };
59 w
.resize(q
.dev
.max_block_size().size(), x
);
64 _cl_kernel::launch(clover::command_queue
&q
,
65 const std::vector
<size_t> &grid_offset
,
66 const std::vector
<size_t> &grid_size
,
67 const std::vector
<size_t> &block_size
) {
68 void *st
= exec
.bind(&q
);
69 auto g_handles
= map([&](size_t h
) { return (uint32_t *)&exec
.input
[h
]; },
70 exec
.g_handles
.begin(), exec
.g_handles
.end());
72 q
.pipe
->bind_compute_state(q
.pipe
, st
);
73 q
.pipe
->bind_compute_sampler_states(q
.pipe
, 0, exec
.samplers
.size(),
74 exec
.samplers
.data());
75 q
.pipe
->set_compute_sampler_views(q
.pipe
, 0, exec
.sviews
.size(),
77 q
.pipe
->set_compute_resources(q
.pipe
, 0, exec
.resources
.size(),
78 exec
.resources
.data());
79 q
.pipe
->set_global_binding(q
.pipe
, 0, exec
.g_buffers
.size(),
80 exec
.g_buffers
.data(), g_handles
.data());
82 q
.pipe
->launch_grid(q
.pipe
,
83 pad_vector
<uint
>(q
, block_size
, 1).data(),
84 pad_vector
<uint
>(q
, grid_size
, 1).data(),
85 module(q
).sym(__name
).offset
,
88 q
.pipe
->set_global_binding(q
.pipe
, 0, exec
.g_buffers
.size(), NULL
, NULL
);
89 q
.pipe
->set_compute_resources(q
.pipe
, 0, exec
.resources
.size(), NULL
);
90 q
.pipe
->set_compute_sampler_views(q
.pipe
, 0, exec
.sviews
.size(), NULL
);
91 q
.pipe
->bind_compute_sampler_states(q
.pipe
, 0, exec
.samplers
.size(), NULL
);
96 _cl_kernel::mem_local() const {
99 for (auto &arg
: args
) {
100 if (dynamic_cast<local_argument
*>(arg
.get()))
101 sz
+= arg
->storage();
108 _cl_kernel::mem_private() const {
113 _cl_kernel::max_block_size() const {
118 _cl_kernel::name() const {
123 _cl_kernel::block_size() const {
127 const clover::module
&
128 _cl_kernel::module(const clover::command_queue
&q
) const {
129 return prog
.binaries().find(&q
.dev
)->second
;
132 _cl_kernel::exec_context::exec_context(clover::kernel
&kern
) :
133 kern(kern
), q(NULL
), mem_local(0), st(NULL
) {
136 _cl_kernel::exec_context::~exec_context() {
138 q
->pipe
->delete_compute_state(q
->pipe
, st
);
142 _cl_kernel::exec_context::bind(clover::command_queue
*__q
) {
145 // Bind kernel arguments.
146 auto margs
= kern
.module(*q
).sym(kern
.name()).args
;
147 for_each([=](std::unique_ptr
<kernel::argument
> &karg
,
148 const module::argument
&marg
) {
149 karg
->bind(*this, marg
);
150 }, kern
.args
.begin(), kern
.args
.end(), margs
.begin());
152 // Create a new compute state if anything changed.
153 if (!st
|| q
!= __q
||
154 cs
.req_local_mem
!= mem_local
||
155 cs
.req_input_mem
!= input
.size()) {
157 __q
->pipe
->delete_compute_state(__q
->pipe
, st
);
159 cs
.prog
= kern
.module(*q
).sec(module::section::text
).data
.begin();
160 cs
.req_local_mem
= mem_local
;
161 cs
.req_input_mem
= input
.size();
162 st
= q
->pipe
->create_compute_state(q
->pipe
, &cs
);
169 _cl_kernel::exec_context::unbind() {
170 for (auto &arg
: kern
.args
)
182 _cl_kernel::argument::argument() : __set(false) {
186 _cl_kernel::argument::set() const {
191 _cl_kernel::argument::storage() const {
195 _cl_kernel::scalar_argument::scalar_argument(size_t size
) : size(size
) {
199 _cl_kernel::scalar_argument::set(size_t size
, const void *value
) {
200 if (size
!= this->size
)
201 throw error(CL_INVALID_ARG_SIZE
);
203 v
= { (uint8_t *)value
, (uint8_t *)value
+ size
};
208 _cl_kernel::scalar_argument::bind(exec_context
&ctx
,
209 const clover::module::argument
&marg
) {
210 ctx
.input
.insert(ctx
.input
.end(), v
.begin(), v
.end());
214 _cl_kernel::scalar_argument::unbind(exec_context
&ctx
) {
218 _cl_kernel::global_argument::set(size_t size
, const void *value
) {
219 if (size
!= sizeof(cl_mem
))
220 throw error(CL_INVALID_ARG_SIZE
);
222 obj
= dynamic_cast<clover::buffer
*>(*(cl_mem
*)value
);
224 throw error(CL_INVALID_MEM_OBJECT
);
230 _cl_kernel::global_argument::bind(exec_context
&ctx
,
231 const clover::module::argument
&marg
) {
232 size_t offset
= ctx
.input
.size();
233 size_t idx
= ctx
.g_buffers
.size();
235 ctx
.input
.resize(offset
+ marg
.size
);
237 ctx
.g_buffers
.resize(idx
+ 1);
238 ctx
.g_buffers
[idx
] = obj
->resource(ctx
.q
).pipe
;
240 ctx
.g_handles
.resize(idx
+ 1);
241 ctx
.g_handles
[idx
] = offset
;
245 _cl_kernel::global_argument::unbind(exec_context
&ctx
) {
249 _cl_kernel::local_argument::storage() const {
254 _cl_kernel::local_argument::set(size_t size
, const void *value
) {
256 throw error(CL_INVALID_ARG_VALUE
);
263 _cl_kernel::local_argument::bind(exec_context
&ctx
,
264 const clover::module::argument
&marg
) {
265 size_t offset
= ctx
.input
.size();
266 size_t ptr
= ctx
.mem_local
;
268 ctx
.input
.resize(offset
+ sizeof(uint32_t));
269 *(uint32_t *)&ctx
.input
[offset
] = ptr
;
271 ctx
.mem_local
+= __storage
;
275 _cl_kernel::local_argument::unbind(exec_context
&ctx
) {
279 _cl_kernel::constant_argument::set(size_t size
, const void *value
) {
280 if (size
!= sizeof(cl_mem
))
281 throw error(CL_INVALID_ARG_SIZE
);
283 obj
= dynamic_cast<clover::buffer
*>(*(cl_mem
*)value
);
285 throw error(CL_INVALID_MEM_OBJECT
);
291 _cl_kernel::constant_argument::bind(exec_context
&ctx
,
292 const clover::module::argument
&marg
) {
293 size_t offset
= ctx
.input
.size();
294 size_t idx
= ctx
.resources
.size();
296 ctx
.input
.resize(offset
+ sizeof(uint32_t));
297 *(uint32_t *)&ctx
.input
[offset
] = idx
<< 24;
299 ctx
.resources
.resize(idx
+ 1);
300 ctx
.resources
[idx
] = st
= obj
->resource(ctx
.q
).bind_surface(*ctx
.q
, false);
304 _cl_kernel::constant_argument::unbind(exec_context
&ctx
) {
305 obj
->resource(ctx
.q
).unbind_surface(*ctx
.q
, st
);
309 _cl_kernel::image_rd_argument::set(size_t size
, const void *value
) {
310 if (size
!= sizeof(cl_mem
))
311 throw error(CL_INVALID_ARG_SIZE
);
313 obj
= dynamic_cast<clover::image
*>(*(cl_mem
*)value
);
315 throw error(CL_INVALID_MEM_OBJECT
);
321 _cl_kernel::image_rd_argument::bind(exec_context
&ctx
,
322 const clover::module::argument
&marg
) {
323 size_t offset
= ctx
.input
.size();
324 size_t idx
= ctx
.sviews
.size();
326 ctx
.input
.resize(offset
+ sizeof(uint32_t));
327 *(uint32_t *)&ctx
.input
[offset
] = idx
;
329 ctx
.sviews
.resize(idx
+ 1);
330 ctx
.sviews
[idx
] = st
= obj
->resource(ctx
.q
).bind_sampler_view(*ctx
.q
);
334 _cl_kernel::image_rd_argument::unbind(exec_context
&ctx
) {
335 obj
->resource(ctx
.q
).unbind_sampler_view(*ctx
.q
, st
);
339 _cl_kernel::image_wr_argument::set(size_t size
, const void *value
) {
340 if (size
!= sizeof(cl_mem
))
341 throw error(CL_INVALID_ARG_SIZE
);
343 obj
= dynamic_cast<clover::image
*>(*(cl_mem
*)value
);
345 throw error(CL_INVALID_MEM_OBJECT
);
351 _cl_kernel::image_wr_argument::bind(exec_context
&ctx
,
352 const clover::module::argument
&marg
) {
353 size_t offset
= ctx
.input
.size();
354 size_t idx
= ctx
.resources
.size();
356 ctx
.input
.resize(offset
+ sizeof(uint32_t));
357 *(uint32_t *)&ctx
.input
[offset
] = idx
;
359 ctx
.resources
.resize(idx
+ 1);
360 ctx
.resources
[idx
] = st
= obj
->resource(ctx
.q
).bind_surface(*ctx
.q
, true);
364 _cl_kernel::image_wr_argument::unbind(exec_context
&ctx
) {
365 obj
->resource(ctx
.q
).unbind_surface(*ctx
.q
, st
);
369 _cl_kernel::sampler_argument::set(size_t size
, const void *value
) {
370 if (size
!= sizeof(cl_sampler
))
371 throw error(CL_INVALID_ARG_SIZE
);
373 obj
= *(cl_sampler
*)value
;
378 _cl_kernel::sampler_argument::bind(exec_context
&ctx
,
379 const clover::module::argument
&marg
) {
380 size_t idx
= ctx
.samplers
.size();
382 ctx
.samplers
.resize(idx
+ 1);
383 ctx
.samplers
[idx
] = st
= obj
->bind(*ctx
.q
);
387 _cl_kernel::sampler_argument::unbind(exec_context
&ctx
) {
388 obj
->unbind(*ctx
.q
, st
);