ac/nir: Add workaround for GFX9 buffer views.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 28 Mar 2018 21:54:40 +0000 (23:54 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 28 Mar 2018 22:03:03 +0000 (00:03 +0200)
On GFX9 whether the buffer size is interpreted as elements or bytes
depends on whether IDXEN is enabled in the instruction. If the index
is a constant zero, LLVM optimizes IDXEN to 0.

Now the size in elements is interpreted in bytes which of course
results in out of bounds accesses.

The correct fix is most likely to disable the LLVM optimization,
but we need something to work with LLVM <= 6.0.

radeonsi does the max between stride and element count on the CPU
but that results in the size intrinsics returning the wrong size
for the buffer. This would cause CTS errors for radv.

v2: Also include the store changes.

Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_abi.h
src/amd/vulkan/radv_nir_to_llvm.c

index 1ae2b9dd170ea62ac17645e1f06bd926d0aa233b..6f577cdac7f687d46fc90904fc8727c5733f88ff 100644 (file)
@@ -1082,6 +1082,30 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                           can_speculate, true);
 }
 
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+                                                  LLVMValueRef rsrc,
+                                                  LLVMValueRef vindex,
+                                                  LLVMValueRef voffset,
+                                                  unsigned num_channels,
+                                                  bool glc,
+                                                  bool can_speculate)
+{
+       LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
+       LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
+       stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
+
+       LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
+                                                     LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
+                                                     elem_count, stride, "");
+
+       LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
+                                                      LLVMConstInt(ctx->i32, 2, 0), "");
+
+       return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset,
+                                          num_channels, glc, false,
+                                          can_speculate, true);
+}
+
 /**
  * Set range metadata on an instruction.  This can only be used on load and
  * call instructions.  If you know an instruction can only produce the values
index 6adcc11448c07bd5c6e52f262708e3825201adb6..f901f336857b8ca8fcb503747232c0fa8fd3569b 100644 (file)
@@ -242,6 +242,16 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         bool glc,
                                         bool can_speculate);
 
+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+                                                  LLVMValueRef rsrc,
+                                                  LLVMValueRef vindex,
+                                                  LLVMValueRef voffset,
+                                                  unsigned num_channels,
+                                                  bool glc,
+                                                  bool can_speculate);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
index 7de59efcffff494b2c64c314be6f8860df922054..2da73a7bfb1a8aba3070f2955cc01bd51135640b 100644 (file)
@@ -1175,12 +1175,21 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
        if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
                unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
 
-               return ac_build_buffer_load_format(&ctx->ac,
-                                                  args->resource,
-                                                  args->addr,
-                                                  ctx->ac.i32_0,
-                                                  util_last_bit(mask),
-                                                  false, true);
+               if (ctx->abi->gfx9_stride_size_workaround) {
+                       return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
+                                                                    args->resource,
+                                                                    args->addr,
+                                                                    ctx->ac.i32_0,
+                                                                    util_last_bit(mask),
+                                                                    false, true);
+               } else {
+                       return ac_build_buffer_load_format(&ctx->ac,
+                                                          args->resource,
+                                                          args->addr,
+                                                          ctx->ac.i32_0,
+                                                          util_last_bit(mask),
+                                                          false, true);
+               }
        }
 
        args->opcode = ac_image_sample;
@@ -2198,8 +2207,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
                glc = ctx->ac.i1true;
 
        if (dim == GLSL_SAMPLER_DIM_BUF) {
+               LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
+
+               if (ctx->abi->gfx9_stride_size_workaround) {
+                       LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
+                       LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
+                       stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
+
+                       LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
+                                                                     LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
+                                                                     elem_count, stride, "");
+
+                       rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
+                                                     LLVMConstInt(ctx->ac.i32, 2, 0), "");
+               }
+
                params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
-               params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
+               params[1] = rsrc;
                params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
                                                    ctx->ac.i32_0, ""); /* vindex */
                params[3] = ctx->ac.i32_0; /* voffset */
index 2f222cf8d61faf2266cadd915819a01be24c3bf1..6b9a91c92a9d6cbf2f7afa9c2d5d2db27789a9f6 100644 (file)
@@ -188,6 +188,10 @@ struct ac_shader_abi {
        /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
         * uses it due to promoting D16 to D32, but radv needs it off. */
        bool clamp_shadow_reference;
+
+       /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
+       * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
+       bool gfx9_stride_size_workaround;
 };
 
 #endif /* AC_SHADER_ABI_H */
index 23b58c37b23a51940c1609f808560f21f34c3422..c6b4e8b5328c15d25fed110b23844cf75730225d 100644 (file)
@@ -3068,6 +3068,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
        ctx.abi.load_sampler_desc = radv_get_sampler_desc;
        ctx.abi.load_resource = radv_load_resource;
        ctx.abi.clamp_shadow_reference = false;
+       ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
 
        if (shader_count >= 2)
                ac_init_exec_full_mask(&ctx.ac);