From 945d87f95877e198fb2203f47a4ebbccae883978 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 13 Feb 2014 14:46:25 -0800 Subject: [PATCH] clover: Pass buffer offsets to the driver in set_global_binding() v3 The offsets will be stored in the handles parameter. This makes it possible to use sub-buffers. v2: - Style fixes - Add support for constant sub-buffers - Store handles in device byte order v3: - Use endian helpers Reviewed-by: Francisco Jerez --- src/gallium/drivers/r600/evergreen_compute.c | 7 ++++++- src/gallium/drivers/radeonsi/si_compute.c | 4 ++++ src/gallium/include/pipe/p_context.h | 13 ++++++++----- .../state_trackers/clover/core/kernel.cpp | 18 ++++++++++++++---- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 70efe5c5a87..5d090ad2698 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -662,10 +662,15 @@ static void evergreen_set_global_binding( for (int i = 0; i < n; i++) { + uint32_t buffer_offset; + uint32_t handle; assert(resources[i]->target == PIPE_BUFFER); assert(resources[i]->bind & PIPE_BIND_GLOBAL); - *(handles[i]) = buffers[i]->chunk->start_in_dw * 4; + buffer_offset = util_le32_to_cpu(*(handles[i])); + handle = buffer_offset + buffers[i]->chunk->start_in_dw * 4; + + *(handles[i]) = util_cpu_to_le32(handle); } evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a7f49e790af..ead5862df39 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -107,8 +107,12 @@ static void si_set_global_binding( for (i = first; i < first + n; i++) { uint64_t va; + uint32_t offset; program->global_buffers[i] = resources[i]; va = r600_resource_va(ctx->screen, resources[i]); + offset = util_le32_to_cpu(*handles[i]); + va += offset; + va = util_cpu_to_le64(va); memcpy(handles[i], &va, sizeof(va)); } } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 8ef6e27e7cd..209ec9e074b 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -460,11 +460,14 @@ struct pipe_context { * unless it's NULL, in which case no new * resources will be bound. * \param handles array of pointers to the memory locations that - * will be filled with the respective base - * addresses each buffer will be mapped to. It - * should contain at least \a count elements, - * unless \a resources is NULL in which case \a - * handles should be NULL as well. + * will be updated with the address each buffer + * will be mapped to. The base memory address of + * each of the buffers will be added to the value + * pointed to by its corresponding handle to form + * the final address argument. It should contain + * at least \a count elements, unless \a + * resources is NULL in which case \a handles + * should be NULL as well. * * Note that the driver isn't required to make any guarantees about * the contents of the \a handles array being valid anytime except diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index ea67e3b0aa0..5e5fe51f1c0 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -337,8 +337,17 @@ kernel::global_argument::bind(exec_context &ctx, align(ctx.input, marg.target_align); if (buf) { - ctx.g_handles.push_back(allocate(ctx.input, marg.target_size)); - ctx.g_buffers.push_back(buf->resource(*ctx.q).pipe); + const resource &r = buf->resource(*ctx.q); + ctx.g_handles.push_back(ctx.input.size()); + ctx.g_buffers.push_back(r.pipe); + + // How to handle multi-demensional offsets? + // We don't need to. Buffer offsets are always + // one-dimensional. + auto v = bytes(r.offset[0]); + extend(v, marg.ext_type, marg.target_size); + byteswap(v, ctx.q->device().endianness()); + insert(ctx.input, v); } else { // Null pointer. allocate(ctx.input, marg.target_size); @@ -395,13 +404,14 @@ kernel::constant_argument::bind(exec_context &ctx, align(ctx.input, marg.target_align); if (buf) { - auto v = bytes(ctx.resources.size() << 24); + resource &r = buf->resource(*ctx.q); + auto v = bytes(ctx.resources.size() << 24 | r.offset[0]); extend(v, module::argument::zero_ext, marg.target_size); byteswap(v, ctx.q->device().endianness()); insert(ctx.input, v); - st = buf->resource(*ctx.q).bind_surface(*ctx.q, false); + st = r.bind_surface(*ctx.q, false); ctx.resources.push_back(st); } else { // Null pointer. -- 2.30.2