From: Bas Nieuwenhuizen Date: Thu, 19 Dec 2019 00:39:56 +0000 (+0100) Subject: amd/llvm: Mark pointer function arguments as 32-byte aligned. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=40e00c800cf863ddf75b65b249968031ecbf52d6;p=mesa.git amd/llvm: Mark pointer function arguments as 32-byte aligned. Otherwise LLVM does not see the pointers as allowing speculative loads. The pipeline-db results are pretty wild, but mostly what is to be expected from allowing more code movement in LLVM: Totals from affected shaders: SGPRS: 157728 -> 168336 (6.73 %) VGPRS: 158628 -> 158664 (0.02 %) Spilled SGPRs: 10845 -> 24753 (128.24 %) Spilled VGPRs: 13 -> 13 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 8 -> 8 (0.00 %) dwords per thread Code Size: 17189180 -> 17313712 (0.72 %) bytes LDS: 204 -> 204 (0.00 %) blocks Max Waves: 5700 -> 5687 (-0.23 %) Wait states: 0 -> 0 (0.00 %) This gives some boosts for shaders we can move a descriptor load outside a loop. Reviewed-by: Marek Olšák Part-of: --- diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 6d5d1f7bab2..8be8433c997 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4988,6 +4988,7 @@ ac_build_main(const struct ac_shader_args *args, if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); + ac_add_attr_alignment(P, 32); } } diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index f5383344dd4..779085b7e78 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -45,6 +45,18 @@ void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); } +void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes) +{ +#if LLVM_VERSION_MAJOR >= 10 + llvm::Argument *A = llvm::unwrap(val); + A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes))); +#else + /* Avoid unused parameter warnings. */ + (void)val; + (void)bytes; +#endif +} + bool ac_is_sgpr_param(LLVMValueRef arg) { llvm::Argument *A = llvm::unwrap(arg); diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h index 1d90a6a1a03..4a01eceadec 100644 --- a/src/amd/llvm/ac_llvm_util.h +++ b/src/amd/llvm/ac_llvm_util.h @@ -95,6 +95,7 @@ struct ac_llvm_compiler { const char *ac_get_llvm_processor_name(enum radeon_family family); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); +void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, enum ac_func_attr attr);