radeonsi: fix GPU hangs with bindless textures and LLVM 7.0
authorMarek Olšák <marek.olsak@amd.com>
Wed, 29 Aug 2018 05:34:46 +0000 (01:34 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 10 Sep 2018 19:19:56 +0000 (15:19 -0400)
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_nir.c
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c

index 1c8d944db74696f4d82199f7807a42ed8c5e9f7b..1f5112e9929491086c8c1fd2774a3aba907823d9 100644 (file)
@@ -835,6 +835,14 @@ ac_build_gep0(struct ac_llvm_context *ctx,
        return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
 }
 
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+                                 LLVMValueRef index)
+{
+       return LLVMBuildPointerCast(ctx->builder,
+                                   ac_build_gep0(ctx, ptr, index),
+                                   LLVMTypeOf(ptr), "");
+}
+
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
                       LLVMValueRef base_ptr, LLVMValueRef index,
@@ -853,14 +861,39 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
  * \param uniform   Whether the base_ptr and index can be assumed to be
  *                  dynamically uniform (i.e. load to an SGPR)
  * \param invariant Whether the load is invariant (no other opcodes affect it)
+ * \param no_unsigned_wraparound
+ *    For all possible re-associations and re-distributions of an expression
+ *    "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
+ *    without inbounds in base_ptr), this parameter is true if "addr + offset"
+ *    does not result in an unsigned integer wraparound. This is used for
+ *    optimal code generation of 32-bit pointer arithmetic.
+ *
+ *    For example, a 32-bit immediate offset that causes a 32-bit unsigned
+ *    integer wraparound can't be an imm offset in s_load_dword, because
+ *    the instruction performs "addr + offset" in 64 bits.
+ *
+ *    Expected usage for bindless textures by chaining GEPs:
+ *      // possible unsigned wraparound, don't use InBounds:
+ *      ptr1 = LLVMBuildGEP(base_ptr, index);
+ *      image = load(ptr1); // becomes "s_load ptr1, 0"
+ *
+ *      ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
+ *      sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
  */
 static LLVMValueRef
 ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-                    LLVMValueRef index, bool uniform, bool invariant)
+                    LLVMValueRef index, bool uniform, bool invariant,
+                    bool no_unsigned_wraparound)
 {
        LLVMValueRef pointer, result;
+       LLVMValueRef indices[2] = {ctx->i32_0, index};
+
+       if (no_unsigned_wraparound &&
+           LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_CONST_32BIT_ADDR_SPACE)
+               pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, "");
+       else
+               pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
 
-       pointer = ac_build_gep0(ctx, base_ptr, index);
        if (uniform)
                LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
        result = LLVMBuildLoad(ctx->builder, pointer, "");
@@ -872,19 +905,28 @@ ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
                           LLVMValueRef index)
 {
-       return ac_build_load_custom(ctx, base_ptr, index, false, false);
+       return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
 }
 
 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
                                     LLVMValueRef base_ptr, LLVMValueRef index)
 {
-       return ac_build_load_custom(ctx, base_ptr, index, false, true);
+       return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
 }
 
+/* This assumes that there is no unsigned integer wraparound during the address
+ * computation, excluding all GEPs within base_ptr. */
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
                                   LLVMValueRef base_ptr, LLVMValueRef index)
 {
-       return ac_build_load_custom(ctx, base_ptr, index, true, true);
+       return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
+}
+
+/* See ac_build_load_custom() documentation. */
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index)
+{
+       return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
index b080cca4cb730db3ba0cdc60b87b43a607470728..0d261bae0977edf1166275faad6917e4bb09fe27 100644 (file)
@@ -203,6 +203,8 @@ LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
              LLVMValueRef base_ptr,
              LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+                                 LLVMValueRef index);
 
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
@@ -215,6 +217,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
                                     LLVMValueRef base_ptr, LLVMValueRef index);
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
                                   LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index);
 
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
index a638dbf8f1a657f3ac21c87c0d23c4ddba7de101..235c46ecf92798c0016be3a473b40dfb5c062794 100644 (file)
@@ -305,7 +305,8 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
                                  enum ac_descriptor_type type);
 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
                                LLVMValueRef list, LLVMValueRef index,
-                               enum ac_descriptor_type desc_type, bool dcc_off);
+                               enum ac_descriptor_type desc_type, bool dcc_off,
+                               bool bindless);
 
 void si_load_system_value(struct si_shader_context *ctx,
                          unsigned index,
index 0aefca22385b9038ee697eab6ec5b2a92b8f5410..5d6280b80f7e1b4a530c6cd1e79fcf2ac9d705e7 100644 (file)
@@ -920,7 +920,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                                     index, "");
 
                /* TODO: be smarter about when we use dcc_off */
-               return si_load_image_desc(ctx, list, index, desc_type, write);
+               return si_load_image_desc(ctx, list, index, desc_type, write, bindless);
        }
 
        assert(base_index + constant_index < ctx->num_samplers);
@@ -931,6 +931,16 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
        index = LLVMBuildAdd(ctx->ac.builder, index,
                             LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
 
+       if (bindless) {
+               /* Since bindless handle arithmetic can contain an unsigned integer
+                * wraparound and si_load_sampler_desc assumes there isn't any,
+                * use GEP without "inbounds" (inside ac_build_pointer_add)
+                * to prevent incorrect code generation and hangs.
+                */
+               index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+               list = ac_build_pointer_add(&ctx->ac, list, index);
+               index = ctx->i32_0;
+       }
        return si_load_sampler_desc(ctx, list, index, desc_type);
 }
 
index eaa200a95d63ef4697c52f879dff559ae3b99f86..cabc448a08295a0c8fa0e1b6fd26f62da33a6dd9 100644 (file)
@@ -176,7 +176,8 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
 
 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
                                LLVMValueRef list, LLVMValueRef index,
-                               enum ac_descriptor_type desc_type, bool dcc_off)
+                               enum ac_descriptor_type desc_type, bool dcc_off,
+                               bool bindless)
 {
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef rsrc;
@@ -190,7 +191,11 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
                assert(desc_type == AC_DESC_IMAGE);
        }
 
-       rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+       if (bindless)
+               rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+       else
+               rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
        if (desc_type == AC_DESC_IMAGE && dcc_off)
                rsrc = force_dcc_off(ctx, rsrc);
        return rsrc;
@@ -240,6 +245,8 @@ image_fetch_rsrc(
                                     index, "");
        }
 
+       bool bindless = false;
+
        if (image->Register.File != TGSI_FILE_IMAGE) {
                /* Bindless descriptors are accessible from a different pair of
                 * user SGPR indices.
@@ -254,11 +261,12 @@ image_fetch_rsrc(
                 */
                index = LLVMBuildMul(ctx->ac.builder, index,
                                     LLVMConstInt(ctx->i32, 2, 0), "");
+               bindless = true;
        }
 
        *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
                                   target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
-                                  dcc_off);
+                                  dcc_off, bindless);
 }
 
 static void image_fetch_coords(
@@ -1068,6 +1076,15 @@ static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
                                    ctx->param_bindless_samplers_and_images);
                index = lp_build_emit_fetch_src(bld_base, reg,
                                                TGSI_TYPE_UNSIGNED, 0);
+
+               /* Since bindless handle arithmetic can contain an unsigned integer
+                * wraparound and si_load_sampler_desc assumes there isn't any,
+                * use GEP without "inbounds" (inside ac_build_pointer_add)
+                * to prevent incorrect code generation and hangs.
+                */
+               index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+               list = ac_build_pointer_add(&ctx->ac, list, index);
+               index = ctx->i32_0;
        }
 
        if (target == TGSI_TEXTURE_BUFFER)