From: Bas Nieuwenhuizen Date: Fri, 2 Aug 2019 10:40:17 +0000 (+0200) Subject: ac/nir,radv: Optimize bounds check for 64 bit CAS. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=72e7b7a00b660240bed1029841ffbdaeedfaccfe;p=mesa.git ac/nir,radv: Optimize bounds check for 64 bit CAS. When the application does not ask for robust buffer access. Only implemented the check in radv. Reviewed-by: Samuel Pitoiset --- diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 70fcaaf7bf3..53b93f7e1d3 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1644,14 +1644,17 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef compare, LLVMValueRef exchange) { - LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2); + LLVMBasicBlockRef start_block = NULL, then_block = NULL; + if (ctx->abi->robust_buffer_access) { + LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2); - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); - LLVMBasicBlockRef start_block = LLVMGetInsertBlock(ctx->ac.builder); + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, ""); + start_block = LLVMGetInsertBlock(ctx->ac.builder); - ac_build_ifcc(&ctx->ac, cond, -1); + ac_build_ifcc(&ctx->ac, cond, -1); - LLVMBasicBlockRef then_block = LLVMGetInsertBlock(ctx->ac.builder); + then_block = LLVMGetInsertBlock(ctx->ac.builder); + } LLVMValueRef ptr_parts[2] = { ac_llvm_extract_elem(&ctx->ac, descriptor, 0), @@ -1673,20 +1676,24 @@ static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as"); result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, ""); - ac_build_endif(&ctx->ac, -1); + if (ctx->abi->robust_buffer_access) { + ac_build_endif(&ctx->ac, -1); - LLVMBasicBlockRef incoming_blocks[2] = { - start_block, - then_block, - }; + LLVMBasicBlockRef incoming_blocks[2] = { + start_block, + then_block, + }; - LLVMValueRef incoming_values[2] = { - LLVMConstInt(ctx->ac.i64, 0, 0), - result, - }; - LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, ""); - LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2); - return ret; + LLVMValueRef incoming_values[2] = { + LLVMConstInt(ctx->ac.i64, 0, 0), + result, + }; + LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, ""); + LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2); + return ret; + } else { + return result; + } } static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index d8572d124e9..d6db398ee7b 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -208,6 +208,9 @@ struct ac_shader_abi { * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ bool gfx9_stride_size_workaround; bool gfx9_stride_size_workaround_for_atomic; + + /* Whether bounds checks are required */ + bool robust_buffer_access; }; #endif /* AC_SHADER_ABI_H */ diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index cb83cde08cf..07b4aeb0649 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1886,6 +1886,9 @@ VkResult radv_CreateDevice( device->enabled_extensions.EXT_descriptor_indexing || device->enabled_extensions.EXT_buffer_device_address; + device->robust_buffer_access = pCreateInfo->pEnabledFeatures && + pCreateInfo->pEnabledFeatures->robustBufferAccess; + mtx_init(&device->shader_slab_mutex, mtx_plain); list_inithead(&device->shader_slabs); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 91251aa69bd..45f5a9e096e 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -4389,6 +4389,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800; + ctx.abi.robust_buffer_access = options->robust_buffer_access; /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8, * we fallback to the old intrinsics for atomic buffer image operations diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ee0761e69fe..a329f13586f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -750,6 +750,9 @@ struct radv_device { struct radv_device_extension_table enabled_extensions; + /* Whether the app has enabled the robustBufferAccess feature. */ + bool robust_buffer_access; + /* Whether the driver uses a global BO list. */ bool use_global_bo_list; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index d0f6895f148..b695fa9834b 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1217,6 +1217,7 @@ radv_shader_variant_compile(struct radv_device *device, options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH); options.supports_spill = true; + options.robust_buffer_access = device->robust_buffer_access; return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, &options, false, binary_out); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0ab7db20181..60e52969b3e 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -120,6 +120,7 @@ struct radv_nir_compiler_options { bool unsafe_math; bool supports_spill; bool clamp_shadow_reference; + bool robust_buffer_access; bool dump_shader; bool dump_preoptir; bool record_llvm_ir; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 20031ad3f60..56d35e533e5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -1248,6 +1248,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) ctx->abi.inputs = &ctx->inputs[0]; ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; ctx->abi.clamp_shadow_reference = true; + ctx->abi.robust_buffer_access = true; ctx->num_samplers = util_last_bit(info->samplers_declared); ctx->num_images = util_last_bit(info->images_declared);