const std::vector<size_t> &grid_offset,
const std::vector<size_t> &grid_size,
const std::vector<size_t> &block_size) {
- const auto m = program().binary(q.device());
+ const auto m = program().build(q.device()).binary;
const auto reduced_grid_size =
map(divides(), grid_size, block_size);
void *st = exec.bind(&q, grid_offset);
exec.g_buffers.data(), g_handles.data());
// Fill information for the launch_grid() call.
+ info.work_dim = grid_size.size();
copy(pad_vector(q, block_size, 1), info.block);
copy(pad_vector(q, reduced_grid_size, 1), info.grid);
info.pc = find(name_equals(_name), m.syms).offset;
exec.sviews.size(), NULL);
q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
exec.samplers.size(), NULL);
+
+ q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
exec.unbind();
}
const module &
kernel::module(const command_queue &q) const {
- return program().binary(q.device());
+ return program().build(q.device()).binary;
}
kernel::exec_context::exec_context(kernel &kern) :
std::swap(q, _q);
// Bind kernel arguments.
- auto &m = kern.program().binary(q->device());
+ auto &m = kern.program().build(q->device()).binary;
auto margs = find(name_equals(kern.name()), m.syms).args;
- auto msec = find(type_equals(module::section::text), m.secs);
+ auto msec = find(type_equals(module::section::text_executable), m.secs);
auto explicit_arg = kern._args.begin();
for (auto &marg : margs) {
break;
}
case module::argument::grid_offset: {
- for (cl_uint x : pad_vector(*q, grid_offset, 1)) {
+ for (cl_uint x : pad_vector(*q, grid_offset, 0)) {
auto arg = argument::create(marg);
arg->set(sizeof(x), &x);