From cf2f3c27533d8721abed4cdd4dfb00d4d53e8a0f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 22 Apr 2020 23:01:28 -0400 Subject: [PATCH 1/1] ac: reassociate FP expressions for inexact instructions for radeonsi Totals: SGPRS: 2591784 -> 2590696 (-0.04 %) VGPRS: 1666888 -> 1666736 (-0.01 %) Spilled SGPRs: 4131 -> 4107 (-0.58 %) Spilled VGPRs: 38 -> 38 (0.00 %) Private memory VGPRs: 2176 -> 2176 (0.00 %) Scratch size: 2228 -> 2228 (0.00 %) dwords per thread Code Size: 52715468 -> 52693584 (-0.04 %) bytes LDS: 92 -> 92 (0.00 %) blocks Max Waves: 479897 -> 479892 (-0.00 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_llvm_helper.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index f5383344dd4..97b9a1a035a 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -101,6 +101,11 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, */ flags.setAllowContract(); /* contract */ + /* Allow reassociation transformations for floating-point + * instructions. This may dramatically change results. + */ + flags.setAllowReassoc(); /* reassoc */ + llvm::unwrap(builder)->setFastMathFlags(flags); break; } @@ -113,11 +118,13 @@ bool ac_disable_inexact_math(LLVMBuilderRef builder) { auto *b = llvm::unwrap(builder); llvm::FastMathFlags flags = b->getFastMathFlags(); + assert(flags.allowContract() == flags.allowReassoc()); if (!flags.allowContract()) return false; flags.setAllowContract(false); + flags.setAllowReassoc(false); b->setFastMathFlags(flags); return true; } @@ -126,11 +133,13 @@ void ac_restore_inexact_math(LLVMBuilderRef builder, bool value) { auto *b = llvm::unwrap(builder); llvm::FastMathFlags flags = b->getFastMathFlags(); + assert(flags.allowContract() == flags.allowReassoc()); if (flags.allowContract() == value) return; flags.setAllowContract(value); + flags.setAllowReassoc(value); b->setFastMathFlags(flags); } -- 2.30.2