radeonsi: enable scratch coalescing
authorMarek Olšák <marek.olsak@amd.com>
Wed, 8 Jun 2016 11:21:25 +0000 (13:21 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 13 Jun 2016 16:13:51 +0000 (18:13 +0200)
This makes one particular compute shader 8x faster.

Latest LLVM git is required.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c

index 754b4aff335f02e28abc545f8d80e5213915b193..f2bd3370c8af6d056b0b88ac9fa9538ce530c760 100644 (file)
@@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
        unsigned i;
        uint32_t scratch_rsrc_dword0 = scratch_va;
        uint32_t scratch_rsrc_dword1 =
-               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
-               |  S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
+               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
+
+       /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
+        * correctly.
+        */
+       if (HAVE_LLVM >= 0x0309)
+               scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
+       else
+               scratch_rsrc_dword1 |=
+                       S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
 
        for (i = 0 ; i < shader->binary.reloc_count; i++) {
                const struct radeon_shader_reloc *reloc =