From da7ff58835b27d10fb643b303ce6a3145517fc3b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 14 Nov 2019 08:09:32 +0100 Subject: [PATCH] aco: make 1/2*PI a literal constant on SI/CI Reviewed-by: Rhys Perry --- src/amd/compiler/aco_builder_h.py | 5 ++++- src/amd/compiler/aco_instruction_selection.cpp | 8 ++------ src/amd/compiler/aco_ir.h | 4 ---- src/amd/compiler/aco_optimizer.cpp | 17 +++++++++++++---- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index d215c7b198f..b8854810e47 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -327,7 +327,10 @@ public: Operand op = op_.op; if (dst.regClass() == s1 && op.size() == 1 && op.isLiteral()) { uint32_t imm = op.constantValue(); - if (imm >= 0xffff8000 || imm <= 0x7fff) { + if (imm == 0x3e22f983) { + if (program->chip_class >= GFX8) + op.setFixed(PhysReg{248}); /* it can be an inline constant on GFX8+ */ + } else if (imm >= 0xffff8000 || imm <= 0x7fff) { return sopk(aco_opcode::s_movk_i32, dst, imm & 0xFFFFu); } else if (util_bitreverse(imm) <= 64 || util_bitreverse(imm) >= 0xFFFFFFF0) { uint32_t rev = util_bitreverse(imm); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3f741e2c0fd..85ed7b3bf61 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1660,12 +1660,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp src = get_alu_src(ctx, instr->src[0]); aco_ptr norm; if (dst.size() == 1) { - Temp tmp; - Operand half_pi(0x3e22f983u); - if (src.type() == RegType::sgpr) - tmp = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src); - else - tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src); + Temp half_pi = bld.copy(bld.def(s1), Operand(0x3e22f983u)); + Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, as_vgpr(ctx, src)); /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */ if (ctx->options->chip_class < GFX9) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 10661858ca0..2928d831c8b 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -324,8 +324,6 @@ public: setFixed(PhysReg{246}); else if (v == 0xc0800000) /* -4.0 */ setFixed(PhysReg{247}); - else if (v == 0x3e22f983) /* 1/(2*PI) */ - setFixed(PhysReg{248}); else /* Literal Constant */ setFixed(PhysReg{255}); }; @@ -353,8 +351,6 @@ public: setFixed(PhysReg{246}); else if (v == 0xC010000000000000) /* -4.0 */ setFixed(PhysReg{247}); - else if (v == 0x3fc45f306dc9c882) /* 1/(2*PI) */ - setFixed(PhysReg{248}); else { /* Literal Constant: we don't know if it is a long or double.*/ isConstant_ = 0; assert(false && "attempt to create a 64-bit literal constant"); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index cea466e3819..66f4db8ce07 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -556,6 +556,15 @@ bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp return false; } +Operand get_constant_op(opt_ctx &ctx, uint32_t val) +{ + // TODO: this functions shouldn't be needed if we store Operand instead of value. + Operand op(val); + if (val == 0x3e22f983 && ctx.program->chip_class >= GFX8) + op.setFixed(PhysReg{248}); /* 1/2 PI can be an inline constant on GFX8+ */ + return op; +} + void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) { if (instr->isSALU() || instr->isVALU() || instr->format == Format::PSEUDO) { @@ -605,7 +614,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } } if ((info.is_constant() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) { - instr->operands[i] = Operand(info.val); + instr->operands[i] = get_constant_op(ctx, info.val); continue; } } @@ -642,15 +651,15 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) if (info.is_constant() && can_accept_constant(instr, i)) { perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get()); if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) { - instr->operands[i] = Operand(info.val); + instr->operands[i] = get_constant_op(ctx, info.val); continue; } else if (!instr->isVOP3() && can_swap_operands(instr)) { instr->operands[i] = instr->operands[0]; - instr->operands[0] = Operand(info.val); + instr->operands[0] = get_constant_op(ctx, info.val); continue; } else if (can_use_VOP3(instr)) { to_VOP3(ctx, instr); - instr->operands[i] = Operand(info.val); + instr->operands[i] = get_constant_op(ctx, info.val); continue; } } -- 2.30.2