int subvector_begin_pos = -1;
};
+static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
+{
+ if (sel & sdwa_isra) {
+ unsigned size = sdwa_rasize & sel;
+ if (size == 1)
+ return reg.byte();
+ else /* size == 2 */
+ return sdwa_isword | (reg.byte() >> 1);
+ }
+ return sel & sdwa_asuint;
+}
+
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{
uint32_t instr_offset = out.size() * 4u;
}
encoding |= (sdwa->clamp ? 1 : 0) << 13;
} else {
- encoding |= (uint32_t)(sdwa->dst_sel & sdwa_asuint) << 8;
+ encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8;
uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0;
+ if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra))
+ dst_u = 2;
encoding |= dst_u << 11;
encoding |= (sdwa->clamp ? 1 : 0) << 13;
encoding |= sdwa->omod << 14;
}
- encoding |= (uint32_t)(sdwa->sel[0] & sdwa_asuint) << 16;
+ encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16;
encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0;
encoding |= sdwa->abs[0] << 21;
encoding |= sdwa->neg[0] << 20;
if (instr->operands.size() >= 2) {
- encoding |= (uint32_t)(sdwa->sel[1] & sdwa_asuint) << 24;
+ encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24;
encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0;
encoding |= sdwa->abs[1] << 29;
encoding |= sdwa->neg[1] << 28;
/* masks */
sdwa_wordnum = 0x1,
sdwa_bytenum = 0x3,
- sdwa_asuint = 0x7,
+ sdwa_asuint = 0x7 | 0x10,
+ sdwa_rasize = 0x3,
/* flags */
sdwa_isword = 0x4,
sdwa_sext = 0x8,
+ sdwa_isra = 0x10,
/* specific values */
sdwa_ubyte0 = 0,
sdwa_sword0 = sdwa_uword0 | sdwa_sext,
sdwa_sword1 = sdwa_uword1 | sdwa_sext,
sdwa_sdword = sdwa_udword | sdwa_sext,
+
+ /* register-allocated */
+ sdwa_ubyte = 1 | sdwa_isra,
+ sdwa_uword = 2 | sdwa_isra,
+ sdwa_sbyte = sdwa_ubyte | sdwa_sext,
+ sdwa_sword = sdwa_uword | sdwa_sext,
};
/**
struct SDWA_instruction : public Instruction {
/* these destination modifiers aren't available with VOPC except for
* clamp on GFX8 */
- unsigned dst_sel:4;
+ unsigned dst_sel:8;
bool dst_preserve:1;
bool clamp:1;
unsigned omod:2; /* GFX9+ */