From 3a4847179b95d1f2a03e575ef9f0e5c71631de65 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 17 Jul 2020 15:01:41 +0100 Subject: [PATCH] aco: allow overflow for some SMEM instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit fossil-db (Navi): Totals from 10184 (7.49% of 135946) affected shaders: CodeSize: 83419748 -> 82430824 (-1.19%); split: -1.19%, +0.01% Instrs: 16054612 -> 15908523 (-0.91%); split: -0.93%, +0.02% VMEM: 1608018 -> 1581829 (-1.63%); split: +0.20%, -1.83% SMEM: 577031 -> 563492 (-2.35%); split: +0.10%, -2.45% VClause: 242643 -> 242512 (-0.05%); split: -0.06%, +0.00% SClause: 640966 -> 569897 (-11.09%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 2 +- src/amd/compiler/aco_ir.h | 3 ++- src/amd/compiler/aco_opt_value_numbering.cpp | 5 ++++- src/amd/compiler/aco_optimizer.cpp | 3 ++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4d161cf0dbd..0af1f1f5c15 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5329,7 +5329,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr) unreachable("unimplemented or forbidden load_push_constant."); } - bld.smem(op, Definition(vec), ptr, index); + static_cast(bld.smem(op, Definition(vec), ptr, index).instr)->prevent_overflow = true; if (!aligned) { Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index c1735a0d31a..a23ff7ce017 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -924,7 +924,8 @@ struct SMEM_instruction : public Instruction { bool nv : 1; /* VEGA only: Non-volatile */ bool can_reorder : 1; bool disable_wqm : 1; - uint32_t padding: 19; + bool prevent_overflow : 1; /* avoid overflow when combining additions */ + uint32_t padding: 18; }; static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 93668442d32..9a1972ff34e 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -226,8 +226,11 @@ struct InstrPred { case Format::SMEM: { SMEM_instruction* aS = static_cast(a); SMEM_instruction* bS = static_cast(b); + /* isel shouldn't be creating situations where this assertion fails */ + assert(aS->prevent_overflow == bS->prevent_overflow); return aS->can_reorder && bS->can_reorder && - aS->glc == bS->glc && aS->nv == bS->nv; + aS->glc == bS->glc && aS->nv == bS->nv && + aS->prevent_overflow == bS->prevent_overflow; } case Format::VINTRP: { Interp_instruction* aI = static_cast(a); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f66521399ca..a254728baa5 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1001,13 +1001,14 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) SMEM_instruction *smem = static_cast(instr.get()); Temp base; uint32_t offset; + bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow; if (i == 1 && info.is_constant_or_literal(32) && ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) || (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) { instr->operands[i] = Operand(info.val); continue; - } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { + } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4); if (soe && (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) || -- 2.30.2