radeonsi: add a workaround for weird s_buffer_load_dword behavior on SI
authorMarek Olšák <marek.olsak@amd.com>
Sun, 22 Oct 2017 13:54:23 +0000 (15:54 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 26 Oct 2017 14:44:01 +0000 (16:44 +0200)
See my LLVM patch which fixes the root cause.

Users have to apply this patch and then they have 2 choices:
- Downgrade to LLVM 5.0
- Update to LLVM git after my LLVM patch is pushed.

It won't be possible to use current and earlier development version
of LLVM 6.0.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Cc: 17.3 <mesa-stable@lists.freedesktop.org>
src/gallium/drivers/radeonsi/si_shader.c

index 4bf2a45eebda2fc73022f13cf45b15113ab6fb5d..4456026ac8191c3297cd591a0b6c7e4ffb7b58af 100644 (file)
@@ -2013,14 +2013,21 @@ static LLVMValueRef fetch_constant(
                 * code reducing SIMD wave occupancy from 8 to 2 in many cases.
                 *
                 * Using s_buffer_load_dword (x1) seems to be the best option right now.
+                *
+                * LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
+                * a descriptor and s_buffer_load_dword using it, so we can't expand
+                * the pointer into a full descriptor like below. We have to use
+                * s_load_dword instead. The only case when LLVM 5.0 would select
+                * s_buffer_load_dword (that we have to prevent) is when we use use
+                * a literal offset where we don't need bounds checking.
                 */
-#if 0 /* keep this codepath disabled */
-               if (!reg->Register.Indirect) {
+               if (ctx->screen->b.chip_class == SI &&
+                    HAVE_LLVM < 0x0600 &&
+                    !reg->Register.Indirect) {
                        addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
                        LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
                        return bitcast(bld_base, type, result);
                }
-#endif
 
                /* Do the bounds checking with a descriptor, because
                 * doing computation and manual bounds checking of 64-bit