ac/nir,radv: Optimize bounds check for 64 bit CAS.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fri, 2 Aug 2019 10:40:17 +0000 (12:40 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fri, 2 Aug 2019 19:21:55 +0000 (21:21 +0200)
When the application does not ask for robust buffer access.

Only implemented the check in radv.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_abi.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h
src/gallium/drivers/radeonsi/si_shader_nir.c

index 70fcaaf7bf340c762dc0a3d5631d491f620fdef9..53b93f7e1d333484ac7626e853e30fb345e67436 100644 (file)
@@ -1644,14 +1644,17 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
                                           LLVMValueRef compare,
                                           LLVMValueRef exchange)
 {
-       LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+       LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+       if (ctx->abi->robust_buffer_access) {
+               LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
 
-       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
-       LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder);
+               LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+               start_block = LLVMGetInsertBlock(ctx->ac.builder);
 
-       ac_build_ifcc(&ctx->ac, cond, -1);
+               ac_build_ifcc(&ctx->ac, cond, -1);
 
-       LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder);
+               then_block = LLVMGetInsertBlock(ctx->ac.builder);
+       }
 
        LLVMValueRef ptr_parts[2] = {
                ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
@@ -1673,20 +1676,24 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
        LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
        result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
 
-       ac_build_endif(&ctx->ac, -1);
+       if (ctx->abi->robust_buffer_access) {
+               ac_build_endif(&ctx->ac, -1);
 
-       LLVMBasicBlockRef incoming_blocks[2] = {
-               start_block,
-               then_block,
-       };
+               LLVMBasicBlockRef incoming_blocks[2] = {
+                       start_block,
+                       then_block,
+               };
 
-       LLVMValueRef incoming_values[2] = {
-               LLVMConstInt(ctx->ac.i64, 0, 0),
-               result,
-       };
-       LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
-       LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
-       return ret;
+               LLVMValueRef incoming_values[2] = {
+                       LLVMConstInt(ctx->ac.i64, 0, 0),
+                       result,
+               };
+               LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+               LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+               return ret;
+       } else {
+               return result;
+       }
 }
 
 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
index d8572d124e9aa2a230d32d26cb6a08ea92dc8544..d6db398ee7b276ac474f0decbdae0901db3ea470 100644 (file)
@@ -208,6 +208,9 @@ struct ac_shader_abi {
        * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
        bool gfx9_stride_size_workaround;
        bool gfx9_stride_size_workaround_for_atomic;
+
+       /* Whether bounds checks are required */
+       bool robust_buffer_access;
 };
 
 #endif /* AC_SHADER_ABI_H */
index cb83cde08cfca85eba97679841d853ed730d94a1..07b4aeb0649649e6f0ae088e2696aab7c93fd0cd 100644 (file)
@@ -1886,6 +1886,9 @@ VkResult radv_CreateDevice(
                device->enabled_extensions.EXT_descriptor_indexing ||
                device->enabled_extensions.EXT_buffer_device_address;
 
+       device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
+                                      pCreateInfo->pEnabledFeatures->robustBufferAccess;
+
        mtx_init(&device->shader_slab_mutex, mtx_plain);
        list_inithead(&device->shader_slabs);
 
index 91251aa69bdba79ba819515053640d1fe83e8a64..45f5a9e096e3505c6c743bb049ab11622f0331e9 100644 (file)
@@ -4389,6 +4389,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
        ctx.abi.load_resource = radv_load_resource;
        ctx.abi.clamp_shadow_reference = false;
        ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
+       ctx.abi.robust_buffer_access = options->robust_buffer_access;
 
        /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
         * we fallback to the old intrinsics for atomic buffer image operations
index ee0761e69fe6de2073f38b0ec168226d6539f305..a329f13586f6e55396efb95a420a8b47d797252f 100644 (file)
@@ -750,6 +750,9 @@ struct radv_device {
 
        struct radv_device_extension_table enabled_extensions;
 
+       /* Whether the app has enabled the robustBufferAccess feature. */
+       bool robust_buffer_access;
+
        /* Whether the driver uses a global BO list. */
        bool use_global_bo_list;
 
index d0f6895f148ee73fd9cb1f5262b4f60a6a3b1208..b695fa9834b06466b56ded3d8136f5bc5b3a4866 100644 (file)
@@ -1217,6 +1217,7 @@ radv_shader_variant_compile(struct radv_device *device,
 
        options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
        options.supports_spill = true;
+       options.robust_buffer_access = device->robust_buffer_access;
 
        return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
                                     &options, false, binary_out);
index 0ab7db20181498ae79f5bf547a7a3fd0fe283018..60e52969b3ea6e5afb73adfc5ed46c4f420240b1 100644 (file)
@@ -120,6 +120,7 @@ struct radv_nir_compiler_options {
        bool unsafe_math;
        bool supports_spill;
        bool clamp_shadow_reference;
+       bool robust_buffer_access;
        bool dump_shader;
        bool dump_preoptir;
        bool record_llvm_ir;
index 20031ad3f60321298e3ee2827a25146df6871b90..56d35e533e5e5cf0355e42c5afbb3db130e3e58d 100644 (file)
@@ -1248,6 +1248,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
        ctx->abi.inputs = &ctx->inputs[0];
        ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
        ctx->abi.clamp_shadow_reference = true;
+       ctx->abi.robust_buffer_access = true;
 
        ctx->num_samplers = util_last_bit(info->samplers_declared);
        ctx->num_images = util_last_bit(info->images_declared);