From d962bbd89528c540b86bd61d19677bda4d352ebe Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 13 Nov 2019 11:14:51 +0100 Subject: [PATCH] aco: Implement 64-bit constant propagation. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_ir.h | 39 ++++++++++++++-------- src/amd/compiler/aco_lower_bool_phis.cpp | 4 +-- src/amd/compiler/aco_lower_to_hw_instr.cpp | 2 +- src/amd/compiler/aco_optimizer.cpp | 38 ++++++++++++++++----- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 5426dc05fb1..4239e5ffaf8 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -300,10 +300,11 @@ public: setFixed(PhysReg{128}); } }; - explicit Operand(uint32_t v) noexcept + explicit Operand(uint32_t v, bool is64bit = false) noexcept { data_.i = v; isConstant_ = true; + is64BitConst_ = is64bit; if (v <= 64) setFixed(PhysReg{128 + v}); else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */ @@ -324,34 +325,46 @@ public: setFixed(PhysReg{246}); else if (v == 0xc0800000) /* -4.0 */ setFixed(PhysReg{247}); - else /* Literal Constant */ + else { /* Literal Constant */ + assert(!is64bit && "attempt to create a 64-bit literal constant"); setFixed(PhysReg{255}); + } }; explicit Operand(uint64_t v) noexcept { isConstant_ = true; is64BitConst_ = true; - if (v <= 64) + if (v <= 64) { + data_.i = (uint32_t) v; setFixed(PhysReg{128 + (uint32_t) v}); - else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */ + } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */ + data_.i = (uint32_t) v; setFixed(PhysReg{192 - (uint32_t) v}); - else if (v == 0x3FE0000000000000) /* 0.5 */ + } else if (v == 0x3FE0000000000000) { /* 0.5 */ + data_.i = 0x3f000000; setFixed(PhysReg{240}); - else if (v == 0xBFE0000000000000) /* -0.5 */ + } else if (v == 0xBFE0000000000000) { /* -0.5 */ + data_.i = 0xbf000000; setFixed(PhysReg{241}); - else if (v == 0x3FF0000000000000) /* 1.0 */ + } else if (v == 0x3FF0000000000000) { /* 1.0 */ + data_.i = 0x3f800000; setFixed(PhysReg{242}); - else if (v == 0xBFF0000000000000) /* -1.0 */ + } else if (v == 0xBFF0000000000000) { /* -1.0 */ + data_.i = 0xbf800000; setFixed(PhysReg{243}); - else if (v == 0x4000000000000000) /* 2.0 */ + } else if (v == 0x4000000000000000) { /* 2.0 */ + data_.i = 0x40000000; setFixed(PhysReg{244}); - else if (v == 0xC000000000000000) /* -2.0 */ + } else if (v == 0xC000000000000000) { /* -2.0 */ + data_.i = 0xc0000000; setFixed(PhysReg{245}); - else if (v == 0x4010000000000000) /* 4.0 */ + } else if (v == 0x4010000000000000) { /* 4.0 */ + data_.i = 0x40800000; setFixed(PhysReg{246}); - else if (v == 0xC010000000000000) /* -4.0 */ + } else if (v == 0xC010000000000000) { /* -4.0 */ + data_.i = 0xc0800000; setFixed(PhysReg{247}); - else { /* Literal Constant: we don't know if it is a long or double.*/ + } else { /* Literal Constant: we don't know if it is a long or double.*/ isConstant_ = 0; assert(false && "attempt to create a 64-bit literal constant"); } diff --git a/src/amd/compiler/aco_lower_bool_phis.cpp b/src/amd/compiler/aco_lower_bool_phis.cpp index 988f753c82d..e5e27d70971 100644 --- a/src/amd/compiler/aco_lower_bool_phis.cpp +++ b/src/amd/compiler/aco_lower_bool_phis.cpp @@ -54,7 +54,7 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state) while (true) { auto pos = state->latest.find(block_idx); if (pos != state->latest.end()) - return Operand({pos->second, program->lane_mask}); + return Operand(Temp(pos->second, program->lane_mask)); Block& block = program->blocks[block_idx]; size_t pred = block.linear_preds.size(); @@ -78,7 +78,7 @@ Operand get_ssa(Program *program, unsigned block_idx, ssa_state *state) phi->definitions[0] = Definition(Temp{res, program->lane_mask}); block.instructions.emplace(block.instructions.begin(), std::move(phi)); - return Operand({res, program->lane_mask}); + return Operand(Temp(res, program->lane_mask)); } } } diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 7c304aa7501..c555ccdfa85 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -962,7 +962,7 @@ void lower_to_hw_instr(Program* program) { Operand operand = instr->operands[i]; if (operand.isConstant() || operand.size() == 1) { - assert(instr->definitions[i].size() == 1); + assert(instr->definitions[i].size() == operand.size()); copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, 1}; } else { RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 77cdf3b04a9..bf075b6ffab 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -82,11 +82,12 @@ enum Label { label_minmax = 1 << 19, label_fcmp = 1 << 20, label_uniform_bool = 1 << 21, + label_constant_64bit = 1 << 22, }; static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | label_add_sub | label_bitwise | label_minmax | label_fcmp; static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_omod2 | label_omod4 | label_omod5 | label_clamp; -static constexpr uint32_t val_labels = label_constant | label_literal | label_mad; +static constexpr uint32_t val_labels = label_constant | label_constant_64bit | label_literal | label_mad; struct ssa_info { uint32_t val; @@ -137,6 +138,17 @@ struct ssa_info { return label & label_constant; } + void set_constant_64bit(uint32_t constant) + { + add_label(label_constant_64bit); + val = constant; + } + + bool is_constant_64bit() + { + return label & label_constant_64bit; + } + void set_abs(Temp abs_temp) { add_label(label_abs); @@ -604,10 +616,10 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp return false; } -Operand get_constant_op(opt_ctx &ctx, uint32_t val) +Operand get_constant_op(opt_ctx &ctx, uint32_t val, bool is64bit = false) { // TODO: this functions shouldn't be needed if we store Operand instead of value. - Operand op(val); + Operand op(val, is64bit); if (val == 0x3e22f983 && ctx.program->chip_class >= GFX8) op.setFixed(PhysReg{248}); /* 1/2 PI can be an inline constant on GFX8+ */ return op; @@ -661,8 +673,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) break; } } - if ((info.is_constant() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) { - instr->operands[i] = get_constant_op(ctx, info.val); + if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) { + instr->operands[i] = get_constant_op(ctx, info.val, info.is_constant_64bit()); continue; } } @@ -696,18 +708,19 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) static_cast(instr.get())->neg[i] = true; continue; } - if (info.is_constant() && can_accept_constant(instr, i)) { + if ((info.is_constant() || info.is_constant_64bit()) && can_accept_constant(instr, i)) { + Operand op = get_constant_op(ctx, info.val, info.is_constant_64bit()); perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get()); if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) { - instr->operands[i] = get_constant_op(ctx, info.val); + instr->operands[i] = op; continue; } else if (!instr->isVOP3() && can_swap_operands(instr)) { instr->operands[i] = instr->operands[0]; - instr->operands[0] = get_constant_op(ctx, info.val); + instr->operands[0] = op; continue; } else if (can_use_VOP3(ctx, instr)) { to_VOP3(ctx, instr); - instr->operands[i] = get_constant_op(ctx, info.val); + instr->operands[i] = op; continue; } } @@ -856,6 +869,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) ctx.info[instr->definitions[i].tempId()].set_literal(vec_op.constantValue()); else if (vec_op.size() == 1) ctx.info[instr->definitions[i].tempId()].set_constant(vec_op.constantValue()); + else if (vec_op.size() == 2) + ctx.info[instr->definitions[i].tempId()].set_constant_64bit(vec_op.constantValue()); } else { assert(vec_op.isTemp()); ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp()); @@ -886,6 +901,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue()); else if (vec_op.size() == 1) ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue()); + else if (vec_op.size() == 2) + ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue()); + } else { assert(vec_op.isTemp()); ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp()); @@ -906,6 +924,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) ctx.info[instr->definitions[0].tempId()].set_literal(instr->operands[0].constantValue()); else if (instr->operands[0].size() == 1) ctx.info[instr->definitions[0].tempId()].set_constant(instr->operands[0].constantValue()); + else if (instr->operands[0].size() == 2) + ctx.info[instr->definitions[0].tempId()].set_constant_64bit(instr->operands[0].constantValue()); } else if (instr->operands[0].isTemp()) { ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); } else { -- 2.30.2