radv: apply the indexing workaround for atomic buffer operations on GFX9
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 3 May 2019 09:45:34 +0000 (11:45 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 3 May 2019 15:59:12 +0000 (17:59 +0200)
Because the new raw/struct intrinsics are buggy with LLVM 8
(they weren't marked as source of divergence), we fallback to the
old instrinsics for atomic buffer operations only. This means we need
to apply the indexing workaround for GFX9. The load/store
operations still use the new LLVM 8 intrinsics.

The fact that we need another workaround is painful but we should
be able to clean up that a bit once LLVM 7 support will be dropped.

This fixes a GPU hang with AC Odyssey and some rendering problems
with Nioh.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110573
Fixes: 31164cf5f70 ("ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_abi.h
src/amd/vulkan/radv_nir_to_llvm.c

index c92eaaca31d901cde82bd29100222d87fc11156a..151e0d0f961a0ca9ff21f64702a9ac06f14550b8 100644 (file)
@@ -2417,10 +2417,12 @@ static void get_image_coords(struct ac_nir_context *ctx,
 }
 
 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
-                                                const nir_intrinsic_instr *instr, bool write)
+                                                const nir_intrinsic_instr *instr,
+                                               bool write, bool atomic)
 {
        LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
-       if (ctx->abi->gfx9_stride_size_workaround) {
+       if (ctx->abi->gfx9_stride_size_workaround ||
+           (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
                LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
                LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
                stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
@@ -2466,7 +2468,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
                unsigned num_channels = util_last_bit(mask);
                LLVMValueRef rsrc, vindex;
 
-               rsrc = get_image_buffer_descriptor(ctx, instr, false);
+               rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
                vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
                                                 ctx->ac.i32_0, "");
 
@@ -2520,7 +2522,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
        args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
 
        if (dim == GLSL_SAMPLER_DIM_BUF) {
-               LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
+               LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
                LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
                unsigned src_channels = ac_get_llvm_num_components(src);
                LLVMValueRef vindex;
@@ -2632,7 +2634,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
        params[param_count++] = get_src(ctx, instr->src[3]);
 
        if (dim == GLSL_SAMPLER_DIM_BUF) {
-               params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
+               params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
                params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
                                                                ctx->ac.i32_0, ""); /* vindex */
                params[param_count++] = ctx->ac.i32_0; /* voffset */
index 108fe58ce57104da41fd955f58b9d3549c1363d1..8debb1ff9860fed46dd13bcdb42544365955ba0e 100644 (file)
@@ -203,6 +203,7 @@ struct ac_shader_abi {
        /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
        * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
        bool gfx9_stride_size_workaround;
+       bool gfx9_stride_size_workaround_for_atomic;
 };
 
 #endif /* AC_SHADER_ABI_H */
index 796d78e34f4c24e38f4e3fad30297aac150162a6..d83f0bd547f3d516f3b73b0563091ac991a78576 100644 (file)
@@ -3687,6 +3687,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
        ctx.abi.clamp_shadow_reference = false;
        ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
 
+       /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
+        * we fallback to the old intrinsics for atomic buffer image operations
+        * and thus we need to apply the indexing workaround...
+        */
+       ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
+
        if (shader_count >= 2)
                ac_init_exec_full_mask(&ctx.ac);