From 73bc0fdb6f65ddb481cf525bf354e1a16bea4d75 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 9 Dec 2014 20:05:44 -0500 Subject: [PATCH] radeonsi/compute: Use relocs for scratch pointer rather than user sgprs v2 Instead of passing a pointer to the scratch buffer via user sgprs, we now patch the shader with the buffer address using reloc information from the LLVM generated ELF. v2: - Make sure not to break older LLVM. --- src/gallium/drivers/radeonsi/si_compute.c | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 20fec84ed77..4427d3bdbd5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -36,9 +36,18 @@ #if HAVE_LLVM < 0x0305 #define NUM_USER_SGPRS 2 #else +/* XXX: Even though we don't pass the scratch buffer via user sgprs any more + * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible + * with older mesa. */ #define NUM_USER_SGPRS 4 #endif +static const char *scratch_rsrc_dword0_symbol = + "SCRATCH_RSRC_DWORD0"; + +static const char *scratch_rsrc_dword1_symbol = + "SCRATCH_RSRC_DWORD1"; + struct si_compute { struct si_context *ctx; @@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch( return scratch_waves; } +static void apply_scratch_relocs(const struct si_screen *sscreen, + const struct radeon_shader_binary *binary, + struct si_shader *shader, uint64_t scratch_va) { + unsigned i; + char *ptr; + uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff; + uint32_t scratch_rsrc_dword1 = + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) + | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64); + + if (!binary->reloc_count) { + return; + } + + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, + PIPE_TRANSFER_READ_WRITE); + for (i = 0 ; i < binary->reloc_count; i++) { + const struct radeon_shader_reloc *reloc = &binary->relocs[i]; + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(ptr + reloc->offset, + &scratch_rsrc_dword0, 4); + } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + util_memcpy_cpu_to_le32(ptr + reloc->offset, + &scratch_rsrc_dword1, 4); + } + } + sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); +} + static void si_launch_grid( struct pipe_context *ctx, const uint *block_layout, const uint *grid_layout, @@ -256,6 +294,10 @@ static void si_launch_grid( RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + /* Patch the shader with the scratch buffer address. */ + apply_scratch_relocs(sctx->screen, + &program->binary, shader, scratch_buffer_va); + } for (i = 0; i < (kernel_args_size / 4); i++) { -- 2.30.2