radeonsi/compute: Use relocs for scratch pointer rather than user sgprs v2
authorTom Stellard <thomas.stellard@amd.com>
Wed, 10 Dec 2014 01:05:44 +0000 (20:05 -0500)
committerTom Stellard <thomas.stellard@amd.com>
Tue, 20 Jan 2015 14:55:44 +0000 (09:55 -0500)
Instead of passing a pointer to the scratch buffer via user sgprs, we
now patch the shader with the buffer address using reloc information
from the LLVM generated ELF.

v2:
  - Make sure not to break older LLVM.

src/gallium/drivers/radeonsi/si_compute.c

index 20fec84ed77d9ff9239dc8e3c945f5416468b147..4427d3bdbd5912c065a63c0abe5d79bb8d77a31c 100644 (file)
 #if HAVE_LLVM < 0x0305
 #define NUM_USER_SGPRS 2
 #else
+/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
+ * LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
+ * with older mesa. */
 #define NUM_USER_SGPRS 4
 #endif
 
+static const char *scratch_rsrc_dword0_symbol =
+       "SCRATCH_RSRC_DWORD0";
+
+static const char *scratch_rsrc_dword1_symbol =
+       "SCRATCH_RSRC_DWORD1";
+
 struct si_compute {
        struct si_context *ctx;
 
@@ -174,6 +183,35 @@ static unsigned compute_num_waves_for_scratch(
        return scratch_waves;
 }
 
+static void apply_scratch_relocs(const struct si_screen *sscreen,
+                       const struct radeon_shader_binary *binary,
+                       struct si_shader *shader, uint64_t scratch_va) {
+       unsigned i;
+       char *ptr;
+       uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
+       uint32_t scratch_rsrc_dword1 =
+               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
+               |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+
+       if (!binary->reloc_count) {
+               return;
+       }
+
+       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+                                       PIPE_TRANSFER_READ_WRITE);
+       for (i = 0 ; i < binary->reloc_count; i++) {
+               const struct radeon_shader_reloc *reloc = &binary->relocs[i];
+               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
+                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
+                               &scratch_rsrc_dword0, 4);
+               } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                       util_memcpy_cpu_to_le32(ptr + reloc->offset,
+                               &scratch_rsrc_dword1, 4);
+               }
+       }
+       sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+}
+
 static void si_launch_grid(
                struct pipe_context *ctx,
                const uint *block_layout, const uint *grid_layout,
@@ -256,6 +294,10 @@ static void si_launch_grid(
                                RADEON_USAGE_READWRITE,
                                RADEON_PRIO_SHADER_RESOURCE_RW);
 
+               /* Patch the shader with the scratch buffer address. */
+               apply_scratch_relocs(sctx->screen,
+                       &program->binary, shader, scratch_buffer_va);
+
        }
 
        for (i = 0; i < (kernel_args_size / 4); i++) {