};
static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | label_add_sub | label_bitwise | label_minmax | label_fcmp;
-static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool;
+static constexpr uint32_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_omod2 | label_omod4 | label_omod5 | label_clamp;
static constexpr uint32_t val_labels = label_constant | label_literal | label_mad;
struct ssa_info {
return label & label_mad;
}
- void set_omod2()
+ void set_omod2(Temp def)
{
add_label(label_omod2);
+ temp = def;
}
bool is_omod2()
return label & label_omod2;
}
- void set_omod4()
+ void set_omod4(Temp def)
{
add_label(label_omod4);
+ temp = def;
}
bool is_omod4()
return label & label_omod4;
}
- void set_omod5()
+ void set_omod5(Temp def)
{
add_label(label_omod5);
+ temp = def;
}
bool is_omod5()
return label & label_omod_success;
}
- void set_clamp()
+ void set_clamp(Temp def)
{
add_label(label_clamp);
+ temp = def;
}
bool is_clamp()
bool can_use_VOP3(aco_ptr<Instruction>& instr)
{
+ if (instr->isVOP3())
+ return true;
+
if (instr->operands.size() && instr->operands[0].isLiteral())
return false;
return instr->opcode != aco_opcode::v_madmk_f32 &&
instr->opcode != aco_opcode::v_madak_f32 &&
instr->opcode != aco_opcode::v_madmk_f16 &&
- instr->opcode != aco_opcode::v_madak_f16;
+ instr->opcode != aco_opcode::v_madak_f16 &&
+ instr->opcode != aco_opcode::v_fmamk_f32 &&
+ instr->opcode != aco_opcode::v_fmaak_f32 &&
+ instr->opcode != aco_opcode::v_fmamk_f16 &&
+ instr->opcode != aco_opcode::v_fmaak_f16 &&
+ instr->opcode != aco_opcode::v_readlane_b32 &&
+ instr->opcode != aco_opcode::v_writelane_b32 &&
+ instr->opcode != aco_opcode::v_readfirstlane_b32;
}
bool can_apply_sgprs(aco_ptr<Instruction>& instr)
{
return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
instr->opcode != aco_opcode::v_readlane_b32 &&
- instr->opcode != aco_opcode::v_writelane_b32;
+ instr->opcode != aco_opcode::v_readlane_b32_e64 &&
+ instr->opcode != aco_opcode::v_writelane_b32 &&
+ instr->opcode != aco_opcode::v_writelane_b32_e64;
}
void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_mac_f32:
case aco_opcode::v_writelane_b32:
+ case aco_opcode::v_writelane_b32_e64:
case aco_opcode::v_cndmask_b32:
return operand != 2;
case aco_opcode::s_addk_i32:
case aco_opcode::p_extract_vector:
case aco_opcode::p_split_vector:
case aco_opcode::v_readlane_b32:
+ case aco_opcode::v_readlane_b32_e64:
case aco_opcode::v_readfirstlane_b32:
return operand != 0;
default:
}
}
-bool valu_can_accept_literal(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned operand)
+bool valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand)
{
- /* instructions like v_cndmask_b32 can't take a literal because they always
- * read SGPRs */
- if (instr->operands.size() >= 3 &&
- instr->operands[2].isTemp() && instr->operands[2].regClass().type() == RegType::sgpr)
- return false;
-
- // TODO: VOP3 can take a literal on GFX10
- return !instr->isSDWA() && !instr->isDPP() && !instr->isVOP3() &&
- operand == 0 && can_accept_constant(instr, operand);
+ if (instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_readlane_b32_e64 ||
+ instr->opcode == aco_opcode::v_writelane_b32 || instr->opcode == aco_opcode::v_writelane_b32_e64)
+ return operand != 1;
+ return true;
}
-bool valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand)
+/* check constant bus and literal limitations */
+bool check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand *operands)
{
- if (instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32)
- return operand != 1;
+ int limit = 1;
+ unsigned num_sgprs = 0;
+ unsigned sgpr[] = {0, 0};
+
+ for (unsigned i = 0; i < num_operands; i++) {
+ Operand op = operands[i];
+
+ if (op.hasRegClass() && op.regClass().type() == RegType::sgpr) {
+ /* two reads of the same SGPR count as 1 to the limit */
+ if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
+ if (num_sgprs < 2)
+ sgpr[num_sgprs++] = op.tempId();
+ limit--;
+ if (limit < 0)
+ return false;
+ }
+ } else if (op.isLiteral()) {
+ return false;
+ }
+ }
+
return true;
}
return false;
}
+Operand get_constant_op(opt_ctx &ctx, uint32_t val)
+{
+ // TODO: this functions shouldn't be needed if we store Operand instead of value.
+ Operand op(val);
+ if (val == 0x3e22f983 && ctx.program->chip_class >= GFX8)
+ op.setFixed(PhysReg{248}); /* 1/2 PI can be an inline constant on GFX8+ */
+ return op;
+}
+
void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
{
if (instr->isSALU() || instr->isVALU() || instr->format == Format::PSEUDO) {
}
}
if ((info.is_constant() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) {
- instr->operands[i] = Operand(info.val);
+ instr->operands[i] = get_constant_op(ctx, info.val);
continue;
}
}
}
if (info.is_constant() && can_accept_constant(instr, i)) {
perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get());
- if (i == 0) {
- instr->operands[i] = Operand(info.val);
+ if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) {
+ instr->operands[i] = get_constant_op(ctx, info.val);
continue;
} else if (!instr->isVOP3() && can_swap_operands(instr)) {
instr->operands[i] = instr->operands[0];
- instr->operands[0] = Operand(info.val);
+ instr->operands[0] = get_constant_op(ctx, info.val);
continue;
} else if (can_use_VOP3(instr)) {
to_VOP3(ctx, instr);
- instr->operands[i] = Operand(info.val);
+ instr->operands[i] = get_constant_op(ctx, info.val);
continue;
}
}
DS_instruction *ds = static_cast<DS_instruction *>(instr.get());
Temp base;
uint32_t offset;
- if (i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == instr->operands[i].regClass()) {
+ if (i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == instr->operands[i].regClass() && instr->opcode != aco_opcode::ds_swizzle_b32) {
if (instr->opcode == aco_opcode::ds_write2_b32 || instr->opcode == aco_opcode::ds_read2_b32 ||
instr->opcode == aco_opcode::ds_write2_b64 || instr->opcode == aco_opcode::ds_read2_b64) {
if (offset % 4 == 0 &&
SMEM_instruction *smem = static_cast<SMEM_instruction *>(instr.get());
Temp base;
uint32_t offset;
- if (i == 1 && info.is_constant_or_literal() && info.val <= 0xFFFFF) {
+ if (i == 1 && info.is_constant_or_literal() &&
+ (ctx.program->chip_class < GFX8 || info.val <= 0xFFFFF)) {
instr->operands[i] = Operand(info.val);
continue;
} else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
unsigned k = 0;
for (Operand& old_op : old_vec->operands) {
if (old_op.isTemp() && ctx.info[old_op.tempId()].is_vec()) {
- for (unsigned j = 0; j < ctx.info[old_op.tempId()].instr->operands.size(); j++)
- instr->operands[k++] = ctx.info[old_op.tempId()].instr->operands[j];
+ for (unsigned j = 0; j < ctx.info[old_op.tempId()].instr->operands.size(); j++) {
+ Operand op = ctx.info[old_op.tempId()].instr->operands[j];
+ if (op.isTemp() && ctx.info[op.tempId()].is_temp() &&
+ ctx.info[op.tempId()].temp.type() == instr->definitions[0].regClass().type())
+ op.setTemp(ctx.info[op.tempId()].temp);
+ instr->operands[k++] = op;
+ }
} else {
instr->operands[k++] = old_op;
}
Format format = is_vgpr ? Format::VOP1 : Format::SOP1;
instr->opcode = opcode;
instr->format = format;
- instr->operands = {instr->operands.begin(), 1 };
+ while (instr->operands.size() > 1)
+ instr->operands.pop_back();
instr->operands[0] = vec_op;
if (vec_op.isConstant()) {
for (unsigned i = 0; i < 2; i++) {
if (instr->operands[!i].isConstant() && instr->operands[i].isTemp()) {
if (instr->operands[!i].constantValue() == 0x40000000) { /* 2.0 */
- ctx.info[instr->operands[i].tempId()].set_omod2();
+ ctx.info[instr->operands[i].tempId()].set_omod2(instr->definitions[0].getTemp());
} else if (instr->operands[!i].constantValue() == 0x40800000) { /* 4.0 */
- ctx.info[instr->operands[i].tempId()].set_omod4();
+ ctx.info[instr->operands[i].tempId()].set_omod4(instr->definitions[0].getTemp());
} else if (instr->operands[!i].constantValue() == 0x3f000000) { /* 0.5 */
- ctx.info[instr->operands[i].tempId()].set_omod5();
+ ctx.info[instr->operands[i].tempId()].set_omod5(instr->definitions[0].getTemp());
} else if (instr->operands[!i].constantValue() == 0x3f800000 &&
!block.fp_mode.must_flush_denorms32) { /* 1.0 */
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[i].getTemp());
break;
}
case aco_opcode::v_and_b32: /* abs */
- if (instr->operands[0].constantEquals(0x7FFFFFFF) && instr->operands[1].isTemp())
+ if (!instr->usesModifiers() && instr->operands[0].constantEquals(0x7FFFFFFF) &&
+ instr->operands[1].isTemp() && instr->operands[1].getTemp().type() == RegType::vgpr)
ctx.info[instr->definitions[0].tempId()].set_abs(instr->operands[1].getTemp());
else
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
break;
case aco_opcode::v_xor_b32: { /* neg */
- if (instr->operands[0].constantEquals(0x80000000u) && instr->operands[1].isTemp()) {
+ if (!instr->usesModifiers() && instr->operands[0].constantEquals(0x80000000u) && instr->operands[1].isTemp()) {
if (ctx.info[instr->operands[1].tempId()].is_neg()) {
ctx.info[instr->definitions[0].tempId()].set_temp(ctx.info[instr->operands[1].tempId()].temp);
- } else {
+ } else if (instr->operands[1].getTemp().type() == RegType::vgpr) {
if (ctx.info[instr->operands[1].tempId()].is_abs()) { /* neg(abs(x)) */
instr->operands[1].setTemp(ctx.info[instr->operands[1].tempId()].temp);
instr->opcode = aco_opcode::v_or_b32;
}
case aco_opcode::v_med3_f32: { /* clamp */
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
- if (vop3->abs[0] || vop3->neg[0] || vop3->opsel[0] ||
- vop3->abs[1] || vop3->neg[1] || vop3->opsel[1] ||
- vop3->abs[2] || vop3->neg[2] || vop3->opsel[2] ||
- vop3->omod != 0)
+ if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
+ vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
+ vop3->omod != 0 || vop3->opsel != 0)
break;
unsigned idx = 0;
idx = i;
}
if (found_zero && found_one && instr->operands[idx].isTemp()) {
- ctx.info[instr->operands[idx].tempId()].set_clamp();
+ ctx.info[instr->operands[idx].tempId()].set_clamp(instr->definitions[0].getTemp());
}
break;
}
* s_and_b64(eq(a, a), eq(b, b)) -> v_cmp_o_f32(a, b) */
bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
{
- if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64)
+ if (instr->definitions[0].regClass() != ctx.program->lane_mask)
return false;
if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
return false;
+ bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
+
bool neg[2] = {false, false};
bool abs[2] = {false, false};
- bool opsel[2] = {false, false};
+ uint8_t opsel = 0;
Instruction *op_instr[2];
Temp op[2];
if (!op_instr[i])
return false;
- aco_opcode expected_cmp = instr->opcode == aco_opcode::s_or_b64 ?
- aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32;
+ aco_opcode expected_cmp = is_or ? aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32;
if (op_instr[i]->opcode != expected_cmp)
return false;
if (op_instr[i]->isVOP3()) {
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
- if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel[0] != vop3->opsel[1])
+ if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
return false;
neg[i] = vop3->neg[0];
abs[i] = vop3->abs[0];
- opsel[i] = vop3->opsel[0];
+ opsel |= (vop3->opsel & 1) << i;
}
Temp op0 = op_instr[i]->operands[0].getTemp();
Temp op1 = op_instr[i]->operands[1].getTemp();
if (original_temp_id(ctx, op0) != original_temp_id(ctx, op1))
return false;
- /* shouldn't happen yet, but best to be safe */
- if (op1.type() != RegType::vgpr)
- return false;
op[i] = op1;
}
+ if (op[1].type() == RegType::sgpr)
+ std::swap(op[0], op[1]);
+ //TODO: we can use two different SGPRs on GFX10
+ if (op[0].type() == RegType::sgpr && op[1].type() == RegType::sgpr)
+ return false;
+
ctx.uses[op[0].id()]++;
ctx.uses[op[1].id()]++;
decrease_uses(ctx, op_instr[0]);
decrease_uses(ctx, op_instr[1]);
- aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ?
- aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
+ aco_opcode new_op = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
Instruction *new_instr;
- if (neg[0] || neg[1] || abs[0] || abs[1] || opsel[0] || opsel[1]) {
+ if (neg[0] || neg[1] || abs[0] || abs[1] || opsel) {
VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
for (unsigned i = 0; i < 2; i++) {
vop3->neg[i] = neg[i];
vop3->abs[i] = abs[i];
- vop3->opsel[i] = opsel[i];
}
+ vop3->opsel = opsel;
new_instr = static_cast<Instruction *>(vop3);
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
* s_and_b64(v_cmp_o_f32(a, b), cmp(a, b)) -> get_ordered(cmp)(a, b) */
bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
{
- if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64)
+ if (instr->definitions[0].regClass() != ctx.program->lane_mask)
return false;
if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
return false;
- aco_opcode expected_nan_test = instr->opcode == aco_opcode::s_or_b64 ?
- aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
+ bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
+ aco_opcode expected_nan_test = is_or ? aco_opcode::v_cmp_u_f32 : aco_opcode::v_cmp_o_f32;
Instruction *nan_test = follow_operand(ctx, instr->operands[0], true);
Instruction *cmp = follow_operand(ctx, instr->operands[1], true);
decrease_uses(ctx, nan_test);
decrease_uses(ctx, cmp);
- aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ?
- get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
+ aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction *new_instr;
if (cmp->isVOP3()) {
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
* s_and_b64(v_cmp_eq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_ordered(cmp)(a, b) */
bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
{
- if (instr->opcode != aco_opcode::s_or_b64 && instr->opcode != aco_opcode::s_and_b64)
+ if (instr->definitions[0].regClass() != ctx.program->lane_mask)
return false;
if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
return false;
+ bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
+
Instruction *nan_test = follow_operand(ctx, instr->operands[0], true);
Instruction *cmp = follow_operand(ctx, instr->operands[1], true);
if (!nan_test || !cmp)
return false;
- aco_opcode expected_nan_test = instr->opcode == aco_opcode::s_or_b64 ?
- aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32;
+ aco_opcode expected_nan_test = is_or ? aco_opcode::v_cmp_neq_f32 : aco_opcode::v_cmp_eq_f32;
if (cmp->opcode == expected_nan_test)
std::swap(nan_test, cmp);
else if (nan_test->opcode != expected_nan_test)
if (prop_nan0 != prop_nan1)
return false;
+ if (nan_test->isVOP3()) {
+ VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(nan_test);
+ if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
+ return false;
+ }
+
int constant_operand = -1;
for (unsigned i = 0; i < 2; i++) {
if (cmp->operands[i].isTemp() && original_temp_id(ctx, cmp->operands[i].getTemp()) == prop_nan0) {
if (cmp->operands[constant_operand].isConstant()) {
constant = cmp->operands[constant_operand].constantValue();
} else if (cmp->operands[constant_operand].isTemp()) {
- unsigned id = cmp->operands[constant_operand].tempId();
+ Temp tmp = cmp->operands[constant_operand].getTemp();
+ unsigned id = original_temp_id(ctx, tmp);
if (!ctx.info[id].is_constant() && !ctx.info[id].is_literal())
return false;
constant = ctx.info[id].val;
decrease_uses(ctx, nan_test);
decrease_uses(ctx, cmp);
- aco_opcode new_op = instr->opcode == aco_opcode::s_or_b64 ?
- get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
+ aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction *new_instr;
if (cmp->isVOP3()) {
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
- memcpy(new_vop3->opsel, cmp_vop3->opsel, sizeof(new_vop3->opsel));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
new_vop3->omod = cmp_vop3->omod;
+ new_vop3->opsel = cmp_vop3->opsel;
new_instr = new_vop3;
} else {
new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
* op1(0, op2(1, 2)) if swap = true */
bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
Instruction* op1_instr, bool swap, const char *shuffle_str,
- Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
- bool *op1_clamp, unsigned *op1_omod,
+ Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
+ bool *op1_clamp, uint8_t *op1_omod,
bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
{
/* checks */
return false;
if (inbetween_opsel)
- *inbetween_opsel = op1_vop3 ? op1_vop3->opsel[swap] : false;
- else if (op1_vop3 && op1_vop3->opsel[swap])
+ *inbetween_opsel = op1_vop3 ? op1_vop3->opsel & (1 << swap) : false;
+ else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
return false;
int shuffle[3];
operands[shuffle[0]] = op1_instr->operands[!swap];
neg[shuffle[0]] = op1_vop3 ? op1_vop3->neg[!swap] : false;
abs[shuffle[0]] = op1_vop3 ? op1_vop3->abs[!swap] : false;
- opsel[shuffle[0]] = op1_vop3 ? op1_vop3->opsel[!swap] : false;
+ if (op1_vop3 && op1_vop3->opsel & (1 << !swap))
+ *opsel |= 1 << shuffle[0];
for (unsigned i = 0; i < 2; i++) {
operands[shuffle[i + 1]] = op2_instr->operands[i];
neg[shuffle[i + 1]] = op2_vop3 ? op2_vop3->neg[i] : false;
abs[shuffle[i + 1]] = op2_vop3 ? op2_vop3->abs[i] : false;
- opsel[shuffle[i + 1]] = op2_vop3 ? op2_vop3->opsel[i] : false;
+ if (op2_vop3 && op2_vop3->opsel & (1 << i))
+ *opsel |= 1 << shuffle[i + 1];
}
/* check operands */
- unsigned sgpr_id = 0;
- for (unsigned i = 0; i < 3; i++) {
- Operand op = operands[i];
- if (op.isLiteral()) {
- return false;
- } else if (op.isTemp() && op.getTemp().type() == RegType::sgpr) {
- if (sgpr_id && sgpr_id != op.tempId())
- return false;
- sgpr_id = op.tempId();
- }
- }
+ if (!check_vop3_operands(ctx, 3, operands))
+ return false;
return true;
}
void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
- Operand operands[3], bool neg[3], bool abs[3], bool opsel[3],
+ Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
bool clamp, unsigned omod)
{
VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
memcpy(new_instr->abs, abs, sizeof(bool[3]));
- memcpy(new_instr->opsel, opsel, sizeof(bool[3]));
memcpy(new_instr->neg, neg, sizeof(bool[3]));
new_instr->clamp = clamp;
new_instr->omod = omod;
+ new_instr->opsel = opsel;
new_instr->operands[0] = operands[0];
new_instr->operands[1] = operands[1];
new_instr->operands[2] = operands[2];
continue;
Operand operands[3];
- bool neg[3], abs[3], opsel[3], clamp;
- unsigned omod;
+ bool neg[3], abs[3], clamp;
+ uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, op2,
instr.get(), swap, shuffle,
- operands, neg, abs, opsel,
+ operands, neg, abs, &opsel,
&clamp, &omod, NULL, NULL, NULL)) {
ctx.uses[instr->operands[swap].tempId()]--;
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
if (!op2_instr || (op2_instr->opcode != aco_opcode::s_not_b32 && op2_instr->opcode != aco_opcode::s_not_b64))
continue;
+ if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
+ instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
+ continue;
+
ctx.uses[instr->operands[i].tempId()]--;
instr->operands[0] = instr->operands[!i];
instr->operands[1] = op2_instr->operands[0];
if (shift < 1 || shift > 4)
continue;
+ if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
+ instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
+ continue;
+
ctx.uses[instr->operands[i].tempId()]--;
instr->operands[1] = instr->operands[!i];
instr->operands[0] = op2_instr->operands[0];
for (unsigned swap = 0; swap < 2; swap++) {
Operand operands[3];
- bool neg[3], abs[3], opsel[3], clamp, inbetween_neg, inbetween_abs;
- unsigned omod;
+ bool neg[3], abs[3], clamp, inbetween_neg, inbetween_abs;
+ uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
- "012", operands, neg, abs, opsel,
+ "012", operands, neg, abs, &opsel,
&clamp, &omod, &inbetween_neg, &inbetween_abs, NULL)) {
int const0_idx = -1, const1_idx = -1;
uint32_t const0 = 0, const1 = 0;
if (const0_idx < 0 || const1_idx < 0)
continue;
- if (opsel[const0_idx])
+ if (opsel & (1 << const0_idx))
const0 >>= 16;
- if (opsel[const1_idx])
+ if (opsel & (1 << const1_idx))
const1 >>= 16;
int lower_idx = const0_idx;
void apply_sgprs(opt_ctx &ctx, aco_ptr<Instruction>& instr)
{
- /* apply sgprs */
- uint32_t sgpr_idx = 0;
- uint32_t sgpr_info_id = 0;
- bool has_sgpr = false;
- uint32_t sgpr_ssa_id = 0;
- /* find 'best' possible sgpr */
- for (unsigned i = 0; i < instr->operands.size(); i++)
- {
- if (instr->operands[i].isLiteral()) {
- has_sgpr = true;
- break;
- }
+ /* find candidates and create the set of sgprs already read */
+ unsigned sgpr_ids[2] = {0, 0};
+ uint32_t operand_mask = 0;
+ bool has_literal = false;
+ for (unsigned i = 0; i < instr->operands.size(); i++) {
+ if (instr->operands[i].isLiteral())
+ has_literal = true;
if (!instr->operands[i].isTemp())
continue;
if (instr->operands[i].getTemp().type() == RegType::sgpr) {
- has_sgpr = true;
- sgpr_ssa_id = instr->operands[i].tempId();
- continue;
+ if (instr->operands[i].tempId() != sgpr_ids[0])
+ sgpr_ids[!!sgpr_ids[0]] = instr->operands[i].tempId();
}
ssa_info& info = ctx.info[instr->operands[i].tempId()];
- if (info.is_temp() && info.temp.type() == RegType::sgpr) {
+ if (info.is_temp() && info.temp.type() == RegType::sgpr)
+ operand_mask |= 1u << i;
+ }
+ unsigned max_sgprs = 1;
+ if (has_literal)
+ max_sgprs--;
+
+ unsigned num_sgprs = !!sgpr_ids[0] + !!sgpr_ids[1];
+
+ /* keep on applying sgprs until there is nothing left to be done */
+ while (operand_mask) {
+ uint32_t sgpr_idx = 0;
+ uint32_t sgpr_info_id = 0;
+ uint32_t mask = operand_mask;
+ /* choose a sgpr */
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
uint16_t uses = ctx.uses[instr->operands[i].tempId()];
if (sgpr_info_id == 0 || uses < ctx.uses[sgpr_info_id]) {
sgpr_idx = i;
sgpr_info_id = instr->operands[i].tempId();
}
}
- }
- if (!has_sgpr && sgpr_info_id != 0) {
- ssa_info& info = ctx.info[sgpr_info_id];
+ operand_mask &= ~(1u << sgpr_idx);
+
+ /* Applying two sgprs require making it VOP3, so don't do it unless it's
+ * definitively beneficial.
+ * TODO: this is too conservative because later the use count could be reduced to 1 */
+ if (num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3())
+ break;
+
+ Temp sgpr = ctx.info[sgpr_info_id].temp;
+ bool new_sgpr = sgpr.id() != sgpr_ids[0] && sgpr.id() != sgpr_ids[1];
+ if (new_sgpr && num_sgprs >= max_sgprs)
+ continue;
+
if (sgpr_idx == 0 || instr->isVOP3()) {
- instr->operands[sgpr_idx] = Operand(info.temp);
- ctx.uses[sgpr_info_id]--;
- ctx.uses[info.temp.id()]++;
+ instr->operands[sgpr_idx] = Operand(sgpr);
} else if (can_swap_operands(instr)) {
instr->operands[sgpr_idx] = instr->operands[0];
- instr->operands[0] = Operand(info.temp);
- ctx.uses[sgpr_info_id]--;
- ctx.uses[info.temp.id()]++;
+ instr->operands[0] = Operand(sgpr);
+ /* swap bits using a 4-entry LUT */
+ uint32_t swapped = (0x3120 >> (operand_mask & 0x3)) & 0xf;
+ operand_mask = (operand_mask & ~0x3) | swapped;
} else if (can_use_VOP3(instr)) {
to_VOP3(ctx, instr);
- instr->operands[sgpr_idx] = Operand(info.temp);
- ctx.uses[sgpr_info_id]--;
- ctx.uses[info.temp.id()]++;
+ instr->operands[sgpr_idx] = Operand(sgpr);
+ } else {
+ continue;
}
- /* we can have two sgprs on one instruction if it is the same sgpr! */
- } else if (sgpr_info_id != 0 &&
- sgpr_ssa_id == sgpr_info_id &&
- ctx.uses[sgpr_info_id] == 1 &&
- can_use_VOP3(instr)) {
- to_VOP3(ctx, instr);
- instr->operands[sgpr_idx] = Operand(ctx.info[sgpr_info_id].temp);
+ sgpr_ids[num_sgprs++] = sgpr.id();
ctx.uses[sgpr_info_id]--;
- ctx.uses[ctx.info[sgpr_info_id].temp.id()]++;
+ ctx.uses[sgpr.id()]++;
}
}
{
/* check if we could apply omod on predecessor */
if (instr->opcode == aco_opcode::v_mul_f32) {
- if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_omod_success()) {
-
+ bool op0 = instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_omod_success();
+ bool op1 = instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_omod_success();
+ if (op0 || op1) {
+ unsigned idx = op0 ? 0 : 1;
/* omod was successfully applied */
/* if the omod instruction is v_mad, we also have to change the original add */
- if (ctx.info[instr->operands[1].tempId()].is_mad()) {
- Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[1].tempId()].val].add_instr.get();
+ if (ctx.info[instr->operands[idx].tempId()].is_mad()) {
+ Instruction* add_instr = ctx.mad_infos[ctx.info[instr->operands[idx].tempId()].val].add_instr.get();
if (ctx.info[instr->definitions[0].tempId()].is_clamp())
static_cast<VOP3A_instruction*>(add_instr)->clamp = true;
add_instr->definitions[0] = instr->definitions[0];
}
- Instruction* omod_instr = ctx.info[instr->operands[1].tempId()].instr;
+ Instruction* omod_instr = ctx.info[instr->operands[idx].tempId()].instr;
/* check if we have an additional clamp modifier */
- if (ctx.info[instr->definitions[0].tempId()].is_clamp() && ctx.uses[instr->definitions[0].tempId()] == 1) {
+ if (ctx.info[instr->definitions[0].tempId()].is_clamp() && ctx.uses[instr->definitions[0].tempId()] == 1 &&
+ ctx.uses[ctx.info[instr->definitions[0].tempId()].temp.id()]) {
static_cast<VOP3A_instruction*>(omod_instr)->clamp = true;
ctx.info[instr->definitions[0].tempId()].set_clamp_success(omod_instr);
}
omod_instr->definitions[0] = instr->definitions[0];
/* change the definition of instr to something unused, e.g. the original omod def */
- instr->definitions[0] = Definition(instr->operands[1].getTemp());
+ instr->definitions[0] = Definition(instr->operands[idx].getTemp());
ctx.uses[instr->definitions[0].tempId()] = 0;
return true;
}
/* apply omod / clamp modifiers if the def is used only once and the instruction can have modifiers */
if (!instr->definitions.empty() && ctx.uses[instr->definitions[0].tempId()] == 1 &&
can_use_VOP3(instr) && instr_info.can_use_output_modifiers[(int)instr->opcode]) {
- if (can_use_omod && ctx.info[instr->definitions[0].tempId()].is_omod2()) {
+ ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
+ if (can_use_omod && def_info.is_omod2() && ctx.uses[def_info.temp.id()]) {
to_VOP3(ctx, instr);
static_cast<VOP3A_instruction*>(instr.get())->omod = 1;
- ctx.info[instr->definitions[0].tempId()].set_omod_success(instr.get());
- } else if (can_use_omod && ctx.info[instr->definitions[0].tempId()].is_omod4()) {
+ def_info.set_omod_success(instr.get());
+ } else if (can_use_omod && def_info.is_omod4() && ctx.uses[def_info.temp.id()]) {
to_VOP3(ctx, instr);
static_cast<VOP3A_instruction*>(instr.get())->omod = 2;
- ctx.info[instr->definitions[0].tempId()].set_omod_success(instr.get());
- } else if (can_use_omod && ctx.info[instr->definitions[0].tempId()].is_omod5()) {
+ def_info.set_omod_success(instr.get());
+ } else if (can_use_omod && def_info.is_omod5() && ctx.uses[def_info.temp.id()]) {
to_VOP3(ctx, instr);
static_cast<VOP3A_instruction*>(instr.get())->omod = 3;
- ctx.info[instr->definitions[0].tempId()].set_omod_success(instr.get());
- } else if (ctx.info[instr->definitions[0].tempId()].is_clamp()) {
+ def_info.set_omod_success(instr.get());
+ } else if (def_info.is_clamp() && ctx.uses[def_info.temp.id()]) {
to_VOP3(ctx, instr);
static_cast<VOP3A_instruction*>(instr.get())->clamp = true;
- ctx.info[instr->definitions[0].tempId()].set_clamp_success(instr.get());
+ def_info.set_clamp_success(instr.get());
}
}
unsigned omod = 0;
bool clamp = false;
bool need_vop3 = false;
- int num_sgpr = 0;
op[0] = mul_instr->operands[0];
op[1] = mul_instr->operands[1];
op[2] = instr->operands[add_op_idx];
- for (unsigned i = 0; i < 3; i++)
- {
- if (op[i].isLiteral())
- return;
- if (op[i].isTemp() && op[i].getTemp().type() == RegType::sgpr)
- num_sgpr++;
+ // TODO: would be better to check this before selecting a mul instr?
+ if (!check_vop3_operands(ctx, 3, op))
+ return;
+
+ for (unsigned i = 0; i < 3; i++) {
if (!(i == 0 || (op[i].isTemp() && op[i].getTemp().type() == RegType::vgpr)))
need_vop3 = true;
}
- // TODO: would be better to check this before selecting a mul instr?
- if (num_sgpr > 1)
- return;
if (mul_instr->isVOP3()) {
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (mul_instr);
} else if (instr->opcode == aco_opcode::s_not_b64) {
if (combine_inverse_comparison(ctx, instr)) ;
else combine_salu_not_bitwise(ctx, instr);
- } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32) {
- combine_salu_n2(ctx, instr);
- } else if (instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
+ } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 ||
+ instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
if (combine_ordering_test(ctx, instr)) ;
else if (combine_comparison_ordering(ctx, instr)) ;
else if (combine_constant_comparison_ordering(ctx, instr)) ;
{
const uint32_t threshold = 4;
- /* Dead Code Elimination:
- * We remove instructions if they define temporaries which all are unused */
- const bool is_used = instr->definitions.empty() ||
- std::any_of(instr->definitions.begin(), instr->definitions.end(),
- [&ctx](const Definition& def) { return ctx.uses[def.tempId()]; });
- if (!is_used) {
+ if (is_dead(ctx.uses, instr.get())) {
instr.reset();
return;
}
/* first, check profitability */
if (ctx.uses[info->mul_temp_id]) {
ctx.uses[info->mul_temp_id]++;
+ if (instr->operands[0].isTemp())
+ ctx.uses[instr->operands[0].tempId()]--;
+ if (instr->operands[1].isTemp())
+ ctx.uses[instr->operands[1].tempId()]--;
instr.swap(info->add_instr);
/* second, check possible literals */
info->check_literal = true;
info->literal_idx = literal_idx;
}
+ return;
}
- return;
}
/* check for literals */
+ if (!instr->isSALU() && !instr->isVALU())
+ return;
+
+ if (instr->isSDWA() || instr->isDPP() || instr->isVOP3())
+ return; /* some encodings can't ever take literals */
+
/* we do not apply the literals yet as we don't know if it is profitable */
- if (instr->isSALU()) {
- uint32_t literal_idx = 0;
- uint32_t literal_uses = UINT32_MAX;
- bool has_literal = false;
- for (unsigned i = 0; i < instr->operands.size(); i++)
- {
- if (instr->operands[i].isLiteral()) {
- has_literal = true;
- break;
- }
- if (!instr->operands[i].isTemp())
- continue;
- if (ctx.info[instr->operands[i].tempId()].is_literal() &&
- ctx.uses[instr->operands[i].tempId()] < literal_uses) {
- literal_uses = ctx.uses[instr->operands[i].tempId()];
- literal_idx = i;
- }
+ Operand current_literal(s1);
+
+ unsigned literal_id = 0;
+ unsigned literal_uses = UINT32_MAX;
+ Operand literal(s1);
+ unsigned num_operands = instr->isSALU() ? instr->operands.size() : 1;
+
+ unsigned sgpr_ids[2] = {0, 0};
+ bool is_literal_sgpr = false;
+ uint32_t mask = 0;
+
+ /* choose a literal to apply */
+ for (unsigned i = 0; i < num_operands; i++) {
+ Operand op = instr->operands[i];
+ if (op.isLiteral()) {
+ current_literal = op;
+ continue;
+ } else if (!op.isTemp() || !ctx.info[op.tempId()].is_literal()) {
+ if (instr->isVALU() && op.isTemp() && op.getTemp().type() == RegType::sgpr &&
+ op.tempId() != sgpr_ids[0])
+ sgpr_ids[!!sgpr_ids[0]] = op.tempId();
+ continue;
}
- if (!has_literal && literal_uses < threshold) {
- ctx.uses[instr->operands[literal_idx].tempId()]--;
- if (ctx.uses[instr->operands[literal_idx].tempId()] == 0)
- instr->operands[literal_idx] = Operand(ctx.info[instr->operands[literal_idx].tempId()].val);
+
+ if (!can_accept_constant(instr, i))
+ continue;
+
+ if (ctx.uses[op.tempId()] < literal_uses) {
+ is_literal_sgpr = op.getTemp().type() == RegType::sgpr;
+ mask = 0;
+ literal = Operand(ctx.info[op.tempId()].val);
+ literal_uses = ctx.uses[op.tempId()];
+ literal_id = op.tempId();
}
- } else if (instr->isVALU() && valu_can_accept_literal(ctx, instr, 0) &&
- instr->operands[0].isTemp() &&
- ctx.info[instr->operands[0].tempId()].is_literal() &&
- ctx.uses[instr->operands[0].tempId()] < threshold) {
- ctx.uses[instr->operands[0].tempId()]--;
- if (ctx.uses[instr->operands[0].tempId()] == 0)
- instr->operands[0] = Operand(ctx.info[instr->operands[0].tempId()].val);
+
+ mask |= (op.tempId() == literal_id) << i;
}
+
+ /* don't go over the constant bus limit */
+ unsigned const_bus_limit = instr->isVALU() ? 1 : UINT32_MAX;
+ unsigned num_sgprs = !!sgpr_ids[0] + !!sgpr_ids[1];
+ if (num_sgprs == const_bus_limit && !is_literal_sgpr)
+ return;
+
+ if (literal_id && literal_uses < threshold &&
+ (current_literal.isUndefined() ||
+ (current_literal.size() == literal.size() &&
+ current_literal.constantValue() == literal.constantValue()))) {
+ /* mark the literal to be applied */
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+ if (instr->operands[i].isTemp() && instr->operands[i].tempId() == literal_id)
+ ctx.uses[instr->operands[i].tempId()]--;
+ }
+ }
}
if (!instr)
return;
- /* apply literals on SALU */
- if (instr->isSALU()) {
- for (Operand& op : instr->operands) {
- if (!op.isTemp())
- continue;
- if (op.isLiteral())
- break;
- if (ctx.info[op.tempId()].is_literal() &&
- ctx.uses[op.tempId()] == 0)
- op = Operand(ctx.info[op.tempId()].val);
+ /* apply literals on MAD */
+ bool literals_applied = false;
+ if (instr->opcode == aco_opcode::v_mad_f32 && ctx.info[instr->definitions[0].tempId()].is_mad()) {
+ mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val];
+ if (!info->needs_vop3) {
+ aco_ptr<Instruction> new_mad;
+ if (info->check_literal && ctx.uses[instr->operands[info->literal_idx].tempId()] == 0) {
+ if (info->literal_idx == 2) { /* add literal -> madak */
+ new_mad.reset(create_instruction<VOP2_instruction>(aco_opcode::v_madak_f32, Format::VOP2, 3, 1));
+ new_mad->operands[0] = instr->operands[0];
+ new_mad->operands[1] = instr->operands[1];
+ } else { /* mul literal -> madmk */
+ new_mad.reset(create_instruction<VOP2_instruction>(aco_opcode::v_madmk_f32, Format::VOP2, 3, 1));
+ new_mad->operands[0] = instr->operands[1 - info->literal_idx];
+ new_mad->operands[1] = instr->operands[2];
+ }
+ new_mad->operands[2] = Operand(ctx.info[instr->operands[info->literal_idx].tempId()].val);
+ new_mad->definitions[0] = instr->definitions[0];
+ instr.swap(new_mad);
+ }
+ literals_applied = true;
}
}
- /* apply literals on VALU */
- else if (instr->isVALU() && !instr->isVOP3() &&
- instr->operands[0].isTemp() &&
- ctx.info[instr->operands[0].tempId()].is_literal() &&
- ctx.uses[instr->operands[0].tempId()] == 0) {
- instr->operands[0] = Operand(ctx.info[instr->operands[0].tempId()].val);
- }
-
- /* apply literals on MAD */
- else if (instr->opcode == aco_opcode::v_mad_f32 && ctx.info[instr->definitions[0].tempId()].is_mad()) {
- mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val];
- aco_ptr<Instruction> new_mad;
- if (info->check_literal && ctx.uses[instr->operands[info->literal_idx].tempId()] == 0) {
- if (info->literal_idx == 2) { /* add literal -> madak */
- new_mad.reset(create_instruction<VOP2_instruction>(aco_opcode::v_madak_f32, Format::VOP2, 3, 1));
- new_mad->operands[0] = instr->operands[0];
- new_mad->operands[1] = instr->operands[1];
- } else { /* mul literal -> madmk */
- new_mad.reset(create_instruction<VOP2_instruction>(aco_opcode::v_madmk_f32, Format::VOP2, 3, 1));
- new_mad->operands[0] = instr->operands[1 - info->literal_idx];
- new_mad->operands[1] = instr->operands[2];
+ /* apply literals on SALU/VALU */
+ if (!literals_applied && (instr->isSALU() || instr->isVALU())) {
+ for (unsigned i = 0; i < instr->operands.size(); i++) {
+ Operand op = instr->operands[i];
+ if (op.isTemp() && ctx.info[op.tempId()].is_literal() && ctx.uses[op.tempId()] == 0) {
+ Operand literal(ctx.info[op.tempId()].val);
+ if (instr->isVALU() && i > 0)
+ to_VOP3(ctx, instr);
+ instr->operands[i] = literal;
}
- new_mad->operands[2] = Operand(ctx.info[instr->operands[info->literal_idx].tempId()].val);
- new_mad->definitions[0] = instr->definitions[0];
- instr.swap(new_mad);
}
}