SDWA = 1 << 15,
};
-enum barrier_interaction {
+enum barrier_interaction : uint8_t {
barrier_none = 0,
barrier_buffer = 0x1,
barrier_image = 0x2,
*
*/
struct SMEM_instruction : public Instruction {
- bool glc; /* VI+: globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool nv; /* VEGA only: Non-volatile */
- bool can_reorder;
- bool disable_wqm;
+ bool glc : 1; /* VI+: globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool nv : 1; /* VEGA only: Non-volatile */
+ bool can_reorder : 1;
+ bool disable_wqm : 1;
barrier_interaction barrier;
};
struct VOP3A_instruction : public Instruction {
bool abs[3];
- bool opsel[4];
- bool clamp;
- unsigned omod;
bool neg[3];
+ uint8_t opsel : 4;
+ uint8_t omod : 2;
+ bool clamp : 1;
};
/**
*
*/
struct DPP_instruction : public Instruction {
- uint16_t dpp_ctrl;
- uint8_t row_mask;
- uint8_t bank_mask;
bool abs[2];
bool neg[2];
- bool bound_ctrl;
+ uint16_t dpp_ctrl;
+ uint8_t row_mask : 4;
+ uint8_t bank_mask : 4;
+ bool bound_ctrl : 1;
};
struct Interp_instruction : public Instruction {
- unsigned attribute;
- unsigned component;
+ uint8_t attribute;
+ uint8_t component;
};
/**
*
*/
struct MUBUF_instruction : public Instruction {
- unsigned offset; /* Unsigned byte offset - 12 bit */
- bool offen; /* Supply an offset from VGPR (VADDR) */
- bool idxen; /* Supply an index from VGPR (VADDR) */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool lds; /* Return read-data to LDS instead of VGPRs */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
+ bool offen : 1; /* Supply an offset from VGPR (VADDR) */
+ bool idxen : 1; /* Supply an index from VGPR (VADDR) */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool lds : 1; /* Return read-data to LDS instead of VGPRs */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
*
*/
struct MTBUF_instruction : public Instruction {
+ uint16_t offset; /* Unsigned byte offset - 12 bit */
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
uint8_t nfmt : 3; /* Numeric format of data in memory */
- unsigned offset; /* Unsigned byte offset - 12 bit */
- bool offen; /* Supply an offset from VGPR (VADDR) */
- bool idxen; /* Supply an index from VGPR (VADDR) */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ bool offen : 1; /* Supply an offset from VGPR (VADDR) */
+ bool idxen : 1; /* Supply an index from VGPR (VADDR) */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
*
*/
struct MIMG_instruction : public Instruction {
- unsigned dmask; /* Data VGPR enable mask */
- unsigned dim; /* NAVI: dimensionality */
- bool unrm; /* Force address to be un-normalized */
- bool dlc; /* NAVI: device level coherent */
- bool glc; /* globally coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool da; /* declare an array */
- bool lwe; /* Force data to be un-normalized */
- bool r128; /* NAVI: Texture resource size */
- bool a16; /* VEGA, NAVI: Address components are 16-bits */
- bool d16; /* Convert 32-bit data to 16-bit data */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ uint8_t dmask; /* Data VGPR enable mask */
+ uint8_t dim : 3; /* NAVI: dimensionality */
+ bool unrm : 1; /* Force address to be un-normalized */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool glc : 1; /* globally coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool da : 1; /* declare an array */
+ bool lwe : 1; /* Force data to be un-normalized */
+ bool r128 : 1; /* NAVI: Texture resource size */
+ bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
+ bool d16 : 1; /* Convert 32-bit data to 16-bit data */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
*/
struct FLAT_instruction : public Instruction {
uint16_t offset; /* Vega/Navi only */
- bool slc; /* system level coherent */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool lds;
- bool nv;
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ bool slc : 1; /* system level coherent */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool lds : 1;
+ bool nv : 1;
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
struct Export_instruction : public Instruction {
- unsigned enabled_mask;
- unsigned dest;
- bool compressed;
- bool done;
- bool valid_mask;
+ uint8_t enabled_mask;
+ uint8_t dest;
+ bool compressed : 1;
+ bool done : 1;
+ bool valid_mask : 1;
};
struct Pseudo_instruction : public Instruction {
return false;
const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
for (unsigned i = 0; i < operands.size(); i++) {
- if (vop3->abs[i] || vop3->opsel[i] || vop3->neg[i])
+ if (vop3->abs[i] || vop3->neg[i])
return true;
}
- return vop3->opsel[3] || vop3->clamp || vop3->omod;
+ return vop3->opsel || vop3->clamp || vop3->omod;
}
constexpr bool is_phi(Instruction* instr)
}
case aco_opcode::v_med3_f32: { /* clamp */
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
- if (vop3->abs[0] || vop3->neg[0] || vop3->opsel[0] ||
- vop3->abs[1] || vop3->neg[1] || vop3->opsel[1] ||
- vop3->abs[2] || vop3->neg[2] || vop3->opsel[2] ||
- vop3->omod != 0)
+ if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
+ vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
+ vop3->omod != 0 || vop3->opsel != 0)
break;
unsigned idx = 0;
bool neg[2] = {false, false};
bool abs[2] = {false, false};
- bool opsel[2] = {false, false};
+ uint8_t opsel = 0;
Instruction *op_instr[2];
Temp op[2];
if (op_instr[i]->isVOP3()) {
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
- if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel[0] != vop3->opsel[1])
+ if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
return false;
neg[i] = vop3->neg[0];
abs[i] = vop3->abs[0];
- opsel[i] = vop3->opsel[0];
+ opsel |= (vop3->opsel & 1) << i;
}
Temp op0 = op_instr[i]->operands[0].getTemp();
aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
Instruction *new_instr;
- if (neg[0] || neg[1] || abs[0] || abs[1] || opsel[0] || opsel[1]) {
+ if (neg[0] || neg[1] || abs[0] || abs[1] || opsel) {
VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
for (unsigned i = 0; i < 2; i++) {
vop3->neg[i] = neg[i];
vop3->abs[i] = abs[i];
- vop3->opsel[i] = opsel[i];
}
+ vop3->opsel = opsel;
new_instr = static_cast<Instruction *>(vop3);
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
* op1(0, op2(1, 2)) if swap = true */
bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
Instruction* op1_instr, bool swap, const char *shuffle_str,
- Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
- bool *op1_clamp, unsigned *op1_omod,
+ Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
+ bool *op1_clamp, uint8_t *op1_omod,
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
{
/* checks */
return false;
if (inbetween_opsel)
- *inbetween_opsel = op1_vop3 ? op1_vop3->opsel[swap] : false;
- else if (op1_vop3 && op1_vop3->opsel[swap])
+ *inbetween_opsel = op1_vop3 ? op1_vop3->opsel & (1 << swap) : false;
+ else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
return false;
int shuffle[3];
operands[shuffle[0]] = op1_instr->operands[!swap];
neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false;
abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false;
- opsel[shuffle[0]] = op1_vop3 ? op1_vop3->opsel[!swap] : false;
+ if (op1_vop3 && op1_vop3->opsel & (1 << !swap))
+ *opsel |= 1 << shuffle[0];
for (unsigned i = 0; i < 2; i++) {
operands[shuffle[i + 1]] = op2_instr->operands[i];
neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false;
abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false;
- opsel[shuffle[i + 1]] = op2_vop3 ? op2_vop3->opsel[i] : false;
+ if (op2_vop3 && op2_vop3->opsel & (1 << i))
+ *opsel |= 1 << shuffle[i + 1];
}
/* check operands */
}
void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
- Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
+ Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
bool clamp, unsigned omod)
{
VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
memcpy(new_instr->abs, abs, sizeof(bool[3]));
- memcpy(new_instr->opsel, opsel, sizeof(bool[3]));
memcpy(new_instr->neg, neg, sizeof(bool[3]));
new_instr->clamp = clamp;
new_instr->omod = omod;
+ new_instr->opsel = opsel;
new_instr->operands[0] = operands[0];
new_instr->operands[1] = operands[1];
new_instr->operands[2] = operands[2];
continue;
Operand operands[3];
- bool neg[3], abs[3], opsel[3], clamp;
- unsigned omod;
+ bool neg[3], abs[3], clamp;
+ uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, op2,
instr.get(), swap, shuffle,
- operands, neg, abs, opsel,
+ operands, neg, abs, &opsel,
&clamp, &omod, NULL, NULL, NULL)) {
ctx.uses[instr->operands[swap].tempId()]--;
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
for (unsigned swap = 0; swap < 2; swap++) {
Operand operands[3];
- bool neg[3], abs[3], opsel[3], clamp, inbetween_neg, inbetween_abs;
- unsigned omod;
+ bool neg[3], abs[3], clamp, inbetween_neg, inbetween_abs;
+ uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
- "012", operands, neg, abs, opsel,
+ "012", operands, neg, abs, &opsel,
&clamp, &omod, &inbetween_neg, &inbetween_abs, NULL)) {
int const0_idx = -1, const1_idx = -1;
uint32_t const0 = 0, const1 = 0;
if (const0_idx < 0 || const1_idx < 0)
continue;
- if (opsel[const0_idx])
+ if (opsel & (1 << const0_idx))
const0 >>= 16;
- if (opsel[const1_idx])
+ if (opsel & (1 << const1_idx))
const1 >>= 16;
int lower_idx = const0_idx;