Otherwise LLVM does not see the pointers as allowing speculative
loads.
The pipeline-db results are pretty wild, but mostly what is to be
expected from allowing more code movement in LLVM:
Totals from affected shaders:
SGPRS: 157728 -> 168336 (6.73 %)
VGPRS: 158628 -> 158664 (0.02 %)
Spilled SGPRs: 10845 -> 24753 (128.24 %)
Spilled VGPRs: 13 -> 13 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 8 -> 8 (0.00 %) dwords per thread
Code Size:
17189180 ->
17313712 (0.72 %) bytes
LDS: 204 -> 204 (0.00 %) blocks
Max Waves: 5700 -> 5687 (-0.23 %)
Wait states: 0 -> 0 (0.00 %)
This gives some boosts for shaders we can move a descriptor load
outside a loop.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3159>
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
+ ac_add_attr_alignment(P, 32);
}
}
A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
}
+void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
+{
+#if LLVM_VERSION_MAJOR >= 10
+ llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
+ A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
+#else
+ /* Avoid unused parameter warnings. */
+ (void)val;
+ (void)bytes;
+#endif
+}
+
bool ac_is_sgpr_param(LLVMValueRef arg)
{
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
const char *ac_get_llvm_processor_name(enum radeon_family family);
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
+void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
bool ac_is_sgpr_param(LLVMValueRef param);
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
int attr_idx, enum ac_func_attr attr);