From: Daniel Schürmann Date: Wed, 8 Jan 2020 10:49:11 +0000 (+0100) Subject: aco: compact various Instruction classes X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ffb4790279ca779572ec393ba84d71ef1036b437;p=mesa.git aco: compact various Instruction classes No pipelinedb changes. Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index bc905f9f969..207c40acf49 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -516,10 +516,9 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= opcode << 16; encoding |= (vop3->clamp ? 1 : 0) << 15; } + encoding |= vop3->opsel << 11; for (unsigned i = 0; i < 3; i++) encoding |= vop3->abs[i] << (8+i); - for (unsigned i = 0; i < 4; i++) - encoding |= vop3->opsel[i] << (11+i); if (instr->definitions.size() == 2) encoding |= instr->definitions[1].physReg() << 8; encoding |= (0xFF & instr->definitions[0].physReg()); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 00310c0975b..aa1648f0d1b 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4424,7 +4424,7 @@ void visit_image_size(isel_context *ctx, nir_intrinsic_instr *instr) aco_ptr mimg{create_instruction(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1)}; mimg->operands[0] = Operand(lod); mimg->operands[1] = Operand(resource); - unsigned& dmask = mimg->dmask; + uint8_t& dmask = mimg->dmask; mimg->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array); mimg->dmask = (1 << instr->dest.ssa.num_components) - 1; mimg->da = glsl_sampler_type_is_array(type); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 04647981b68..388bf064000 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -102,7 +102,7 @@ enum class Format : std::uint16_t { SDWA = 1 << 15, }; -enum barrier_interaction { +enum barrier_interaction : uint8_t { barrier_none = 0, barrier_buffer = 0x1, barrier_image = 0x2, @@ -701,11 +701,11 @@ struct SOP2_instruction : public Instruction { * */ struct SMEM_instruction : public Instruction { - bool glc; /* VI+: globally coherent */ - bool dlc; /* NAVI: device level coherent */ - bool nv; /* VEGA only: Non-volatile */ - bool can_reorder; - bool disable_wqm; + bool glc : 1; /* VI+: globally coherent */ + bool dlc : 1; /* NAVI: device level coherent */ + bool nv : 1; /* VEGA only: Non-volatile */ + bool can_reorder : 1; + bool disable_wqm : 1; barrier_interaction barrier; }; @@ -720,10 +720,10 @@ struct VOPC_instruction : public Instruction { struct VOP3A_instruction : public Instruction { bool abs[3]; - bool opsel[4]; - bool clamp; - unsigned omod; bool neg[3]; + uint8_t opsel : 4; + uint8_t omod : 2; + bool clamp : 1; }; /** @@ -733,17 +733,17 @@ struct VOP3A_instruction : public Instruction { * */ struct DPP_instruction : public Instruction { - uint16_t dpp_ctrl; - uint8_t row_mask; - uint8_t bank_mask; bool abs[2]; bool neg[2]; - bool bound_ctrl; + uint16_t dpp_ctrl; + uint8_t row_mask : 4; + uint8_t bank_mask : 4; + bool bound_ctrl : 1; }; struct Interp_instruction : public Instruction { - unsigned attribute; - unsigned component; + uint8_t attribute; + uint8_t component; }; /** @@ -770,16 +770,16 @@ struct DS_instruction : public Instruction { * */ struct MUBUF_instruction : public Instruction { - unsigned offset; /* Unsigned byte offset - 12 bit */ - bool offen; /* Supply an offset from VGPR (VADDR) */ - bool idxen; /* Supply an index from VGPR (VADDR) */ - bool glc; /* globally coherent */ - bool dlc; /* NAVI: device level coherent */ - bool slc; /* system level coherent */ - bool tfe; /* texture fail enable */ - bool lds; /* Return read-data to LDS instead of VGPRs */ - bool disable_wqm; /* Require an exec mask without helper invocations */ - bool can_reorder; + uint16_t offset : 12; /* Unsigned byte offset - 12 bit */ + bool offen : 1; /* Supply an offset from VGPR (VADDR) */ + bool idxen : 1; /* Supply an index from VGPR (VADDR) */ + bool glc : 1; /* globally coherent */ + bool dlc : 1; /* NAVI: device level coherent */ + bool slc : 1; /* system level coherent */ + bool tfe : 1; /* texture fail enable */ + bool lds : 1; /* Return read-data to LDS instead of VGPRs */ + bool disable_wqm : 1; /* Require an exec mask without helper invocations */ + bool can_reorder : 1; barrier_interaction barrier; }; @@ -792,17 +792,17 @@ struct MUBUF_instruction : public Instruction { * */ struct MTBUF_instruction : public Instruction { + uint16_t offset; /* Unsigned byte offset - 12 bit */ uint8_t dfmt : 4; /* Data Format of data in memory buffer */ uint8_t nfmt : 3; /* Numeric format of data in memory */ - unsigned offset; /* Unsigned byte offset - 12 bit */ - bool offen; /* Supply an offset from VGPR (VADDR) */ - bool idxen; /* Supply an index from VGPR (VADDR) */ - bool glc; /* globally coherent */ - bool dlc; /* NAVI: device level coherent */ - bool slc; /* system level coherent */ - bool tfe; /* texture fail enable */ - bool disable_wqm; /* Require an exec mask without helper invocations */ - bool can_reorder; + bool offen : 1; /* Supply an offset from VGPR (VADDR) */ + bool idxen : 1; /* Supply an index from VGPR (VADDR) */ + bool glc : 1; /* globally coherent */ + bool dlc : 1; /* NAVI: device level coherent */ + bool slc : 1; /* system level coherent */ + bool tfe : 1; /* texture fail enable */ + bool disable_wqm : 1; /* Require an exec mask without helper invocations */ + bool can_reorder : 1; barrier_interaction barrier; }; @@ -815,20 +815,20 @@ struct MTBUF_instruction : public Instruction { * */ struct MIMG_instruction : public Instruction { - unsigned dmask; /* Data VGPR enable mask */ - unsigned dim; /* NAVI: dimensionality */ - bool unrm; /* Force address to be un-normalized */ - bool dlc; /* NAVI: device level coherent */ - bool glc; /* globally coherent */ - bool slc; /* system level coherent */ - bool tfe; /* texture fail enable */ - bool da; /* declare an array */ - bool lwe; /* Force data to be un-normalized */ - bool r128; /* NAVI: Texture resource size */ - bool a16; /* VEGA, NAVI: Address components are 16-bits */ - bool d16; /* Convert 32-bit data to 16-bit data */ - bool disable_wqm; /* Require an exec mask without helper invocations */ - bool can_reorder; + uint8_t dmask; /* Data VGPR enable mask */ + uint8_t dim : 3; /* NAVI: dimensionality */ + bool unrm : 1; /* Force address to be un-normalized */ + bool dlc : 1; /* NAVI: device level coherent */ + bool glc : 1; /* globally coherent */ + bool slc : 1; /* system level coherent */ + bool tfe : 1; /* texture fail enable */ + bool da : 1; /* declare an array */ + bool lwe : 1; /* Force data to be un-normalized */ + bool r128 : 1; /* NAVI: Texture resource size */ + bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */ + bool d16 : 1; /* Convert 32-bit data to 16-bit data */ + bool disable_wqm : 1; /* Require an exec mask without helper invocations */ + bool can_reorder : 1; barrier_interaction barrier; }; @@ -841,22 +841,22 @@ struct MIMG_instruction : public Instruction { */ struct FLAT_instruction : public Instruction { uint16_t offset; /* Vega/Navi only */ - bool slc; /* system level coherent */ - bool glc; /* globally coherent */ - bool dlc; /* NAVI: device level coherent */ - bool lds; - bool nv; - bool disable_wqm; /* Require an exec mask without helper invocations */ - bool can_reorder; + bool slc : 1; /* system level coherent */ + bool glc : 1; /* globally coherent */ + bool dlc : 1; /* NAVI: device level coherent */ + bool lds : 1; + bool nv : 1; + bool disable_wqm : 1; /* Require an exec mask without helper invocations */ + bool can_reorder : 1; barrier_interaction barrier; }; struct Export_instruction : public Instruction { - unsigned enabled_mask; - unsigned dest; - bool compressed; - bool done; - bool valid_mask; + uint8_t enabled_mask; + uint8_t dest; + bool compressed : 1; + bool done : 1; + bool valid_mask : 1; }; struct Pseudo_instruction : public Instruction { @@ -943,10 +943,10 @@ constexpr bool Instruction::usesModifiers() const noexcept return false; const VOP3A_instruction *vop3 = static_cast(this); for (unsigned i = 0; i < operands.size(); i++) { - if (vop3->abs[i] || vop3->opsel[i] || vop3->neg[i]) + if (vop3->abs[i] || vop3->neg[i]) return true; } - return vop3->opsel[3] || vop3->clamp || vop3->omod; + return vop3->opsel || vop3->clamp || vop3->omod; } constexpr bool is_phi(Instruction* instr) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 592c3868e20..7c304aa7501 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -533,7 +533,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast(perm)->opsel[0] = true; /* FI (Fetch Inactive) */ + static_cast(perm)->opsel = 1; /* FI (Fetch Inactive) */ } bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX)); @@ -644,7 +644,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast(perm)->opsel[0] = true; /* FI (Fetch Inactive) */ + static_cast(perm)->opsel = 1; /* FI (Fetch Inactive) */ } emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 708987d4285..295a4343854 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -47,9 +47,9 @@ struct InstrHash { VOP3A_instruction* vop3 = static_cast(instr); for (unsigned i = 0; i < 3; i++) { hash ^= vop3->abs[i] << (i*3 + 0); - hash ^= vop3->opsel[i] << (i*3 + 1); hash ^= vop3->neg[i] << (i*3 + 2); } + hash ^= vop3->opsel * 13; hash ^= (vop3->clamp << 28) * 13; hash += vop3->omod << 19; } @@ -134,12 +134,12 @@ struct InstrPred { VOP3A_instruction* b3 = static_cast(b); for (unsigned i = 0; i < 3; i++) { if (a3->abs[i] != b3->abs[i] || - a3->opsel[i] != b3->opsel[i] || a3->neg[i] != b3->neg[i]) return false; } return a3->clamp == b3->clamp && - a3->omod == b3->omod; + a3->omod == b3->omod && + a3->opsel == b3->opsel; } if (a->isDPP()) { DPP_instruction* aDPP = static_cast(a); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 612497928dd..41fcc6f27f8 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -948,10 +948,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } case aco_opcode::v_med3_f32: { /* clamp */ VOP3A_instruction* vop3 = static_cast(instr.get()); - if (vop3->abs[0] || vop3->neg[0] || vop3->opsel[0] || - vop3->abs[1] || vop3->neg[1] || vop3->opsel[1] || - vop3->abs[2] || vop3->neg[2] || vop3->opsel[2] || - vop3->omod != 0) + if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] || + vop3->neg[0] || vop3->neg[1] || vop3->neg[2] || + vop3->omod != 0 || vop3->opsel != 0) break; unsigned idx = 0; @@ -1173,7 +1172,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) bool neg[2] = {false, false}; bool abs[2] = {false, false}; - bool opsel[2] = {false, false}; + uint8_t opsel = 0; Instruction *op_instr[2]; Temp op[2]; @@ -1191,11 +1190,11 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) if (op_instr[i]->isVOP3()) { VOP3A_instruction *vop3 = static_cast(op_instr[i]); - if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel[0] != vop3->opsel[1]) + if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) return false; neg[i] = vop3->neg[0]; abs[i] = vop3->abs[0]; - opsel[i] = vop3->opsel[0]; + opsel |= (vop3->opsel & 1) << i; } Temp op0 = op_instr[i]->operands[0].getTemp(); @@ -1216,13 +1215,13 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32; Instruction *new_instr; - if (neg[0] || neg[1] || abs[0] || abs[1] || opsel[0] || opsel[1]) { + if (neg[0] || neg[1] || abs[0] || abs[1] || opsel) { VOP3A_instruction *vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); for (unsigned i = 0; i < 2; i++) { vop3->neg[i] = neg[i]; vop3->abs[i] = abs[i]; - vop3->opsel[i] = opsel[i]; } + vop3->opsel = opsel; new_instr = static_cast(vop3); } else { new_instr = create_instruction(new_op, Format::VOPC, 2, 1); @@ -1289,10 +1288,10 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr& instr) VOP3A_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); VOP3A_instruction *cmp_vop3 = static_cast(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; new_vop3->omod = cmp_vop3->omod; + new_vop3->opsel = cmp_vop3->opsel; new_instr = new_vop3; } else { new_instr = create_instruction(new_op, Format::VOPC, 2, 1); @@ -1385,10 +1384,10 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in VOP3A_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); VOP3A_instruction *cmp_vop3 = static_cast(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; new_vop3->omod = cmp_vop3->omod; + new_vop3->opsel = cmp_vop3->opsel; new_instr = new_vop3; } else { new_instr = create_instruction(new_op, Format::VOPC, 2, 1); @@ -1434,10 +1433,10 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) VOP3A_instruction *new_vop3 = create_instruction(new_opcode, asVOP3(Format::VOPC), 2, 1); VOP3A_instruction *cmp_vop3 = static_cast(cmp); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; new_vop3->omod = cmp_vop3->omod; + new_vop3->opsel = cmp_vop3->opsel; new_instr = new_vop3; } else { new_instr = create_instruction(new_opcode, Format::VOPC, 2, 1); @@ -1458,8 +1457,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) * op1(0, op2(1, 2)) if swap = true */ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, Instruction* op1_instr, bool swap, const char *shuffle_str, - Operand operands[3], bool neg[3], bool abs[3], bool opsel[3], - bool *op1_clamp, unsigned *op1_omod, + Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel, + bool *op1_clamp, uint8_t *op1_omod, bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel) { /* checks */ @@ -1492,8 +1491,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, return false; if (inbetween_opsel) - *inbetween_opsel = op1_vop3 ? op1_vop3->opsel[swap] : false; - else if (op1_vop3 && op1_vop3->opsel[swap]) + *inbetween_opsel = op1_vop3 ? op1_vop3->opsel & (1 << swap) : false; + else if (op1_vop3 && op1_vop3->opsel & (1 << swap)) return false; int shuffle[3]; @@ -1504,13 +1503,15 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, operands[shuffle[0]] = op1_instr->operands[!swap]; neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false; abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false; - opsel[shuffle[0]] = op1_vop3 ? op1_vop3->opsel[!swap] : false; + if (op1_vop3 && op1_vop3->opsel & (1 << !swap)) + *opsel |= 1 << shuffle[0]; for (unsigned i = 0; i < 2; i++) { operands[shuffle[i + 1]] = op2_instr->operands[i]; neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false; abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false; - opsel[shuffle[i + 1]] = op2_vop3 ? op2_vop3->opsel[i] : false; + if (op2_vop3 && op2_vop3->opsel & (1 << i)) + *opsel |= 1 << shuffle[i + 1]; } /* check operands */ @@ -1530,15 +1531,15 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, } void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr& instr, - Operand operands[3], bool neg[3], bool abs[3], bool opsel[3], + Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel, bool clamp, unsigned omod) { VOP3A_instruction *new_instr = create_instruction(opcode, Format::VOP3A, 3, 1); memcpy(new_instr->abs, abs, sizeof(bool[3])); - memcpy(new_instr->opsel, opsel, sizeof(bool[3])); memcpy(new_instr->neg, neg, sizeof(bool[3])); new_instr->clamp = clamp; new_instr->omod = omod; + new_instr->opsel = opsel; new_instr->operands[0] = operands[0]; new_instr->operands[1] = operands[1]; new_instr->operands[2] = operands[2]; @@ -1558,11 +1559,11 @@ bool combine_three_valu_op(opt_ctx& ctx, aco_ptr& instr, aco_opcode continue; Operand operands[3]; - bool neg[3], abs[3], opsel[3], clamp; - unsigned omod; + bool neg[3], abs[3], clamp; + uint8_t opsel = 0, omod = 0; if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, - operands, neg, abs, opsel, + operands, neg, abs, &opsel, &clamp, &omod, NULL, NULL, NULL)) { ctx.uses[instr->operands[swap].tempId()]--; create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod); @@ -1751,10 +1752,10 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr& instr, for (unsigned swap = 0; swap < 2; swap++) { Operand operands[3]; - bool neg[3], abs[3], opsel[3], clamp, inbetween_neg, inbetween_abs; - unsigned omod; + bool neg[3], abs[3], clamp, inbetween_neg, inbetween_abs; + uint8_t opsel = 0, omod = 0; if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, - "012", operands, neg, abs, opsel, + "012", operands, neg, abs, &opsel, &clamp, &omod, &inbetween_neg, &inbetween_abs, NULL)) { int const0_idx = -1, const1_idx = -1; uint32_t const0 = 0, const1 = 0; @@ -1779,9 +1780,9 @@ bool combine_clamp(opt_ctx& ctx, aco_ptr& instr, if (const0_idx < 0 || const1_idx < 0) continue; - if (opsel[const0_idx]) + if (opsel & (1 << const0_idx)) const0 >>= 16; - if (opsel[const1_idx]) + if (opsel & (1 << const1_idx)) const1 >>= 16; int lower_idx = const0_idx; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 834f06d2b94..dd414205eef 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1497,9 +1497,8 @@ void register_allocation(Program *program, std::vector> live_out_ instr->operands[1].getTemp().type() == RegType::vgpr) { /* TODO: swap src0 and src1 in this case */ VOP3A_instruction* vop3 = static_cast(instr.get()); bool can_use_mac = !(vop3->abs[0] || vop3->abs[1] || vop3->abs[2] || - vop3->opsel[0] || vop3->opsel[1] || vop3->opsel[2] || vop3->neg[0] || vop3->neg[1] || vop3->neg[2] || - vop3->clamp || vop3->omod); + vop3->clamp || vop3->omod || vop3->opsel); if (can_use_mac) { instr->format = Format::VOP2; instr->opcode = aco_opcode::v_mac_f32;