+unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr<Instruction>& instr, unsigned idx, RegClass rc)
+{
+ /* v_readfirstlane_b32 cannot use SDWA */
+ if (instr->opcode == aco_opcode::p_as_uniform)
+ return 4;
+ if (instr->format == Format::PSEUDO && chip >= GFX8)
+ return rc.bytes() % 2 == 0 ? 2 : 1;
+
+ if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
+ return 1;
+ } else if (can_use_SDWA(chip, instr)) {
+ return rc.bytes() % 2 == 0 ? 2 : 1;
+ } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, 1)) {
+ return 2;
+ }
+
+ switch (instr->opcode) {
+ case aco_opcode::ds_write_b8:
+ case aco_opcode::ds_write_b16:
+ return chip >= GFX8 ? 2 : 4;
+ case aco_opcode::buffer_store_byte:
+ case aco_opcode::buffer_store_short:
+ case aco_opcode::flat_store_byte:
+ case aco_opcode::flat_store_short:
+ case aco_opcode::scratch_store_byte:
+ case aco_opcode::scratch_store_short:
+ case aco_opcode::global_store_byte:
+ case aco_opcode::global_store_short:
+ return chip >= GFX9 ? 2 : 4;
+ default:
+ break;
+ }
+
+ return 4;
+}
+
+void add_subdword_operand(chip_class chip, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte, RegClass rc)
+{
+ if (instr->format == Format::PSEUDO || byte == 0)
+ return;
+
+ assert(rc.bytes() <= 2);
+
+ if (!instr->usesModifiers() && instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
+ switch (byte) {
+ case 0:
+ instr->opcode = aco_opcode::v_cvt_f32_ubyte0;
+ break;
+ case 1:
+ instr->opcode = aco_opcode::v_cvt_f32_ubyte1;
+ break;
+ case 2:
+ instr->opcode = aco_opcode::v_cvt_f32_ubyte2;
+ break;
+ case 3:
+ instr->opcode = aco_opcode::v_cvt_f32_ubyte3;
+ break;
+ }
+ return;
+ } else if (can_use_SDWA(chip, instr)) {
+ convert_to_SDWA(chip, instr);
+ return;
+ } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
+ VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get());
+ vop3->opsel |= (byte / 2) << idx;
+ return;
+ }
+
+ if (chip >= GFX8 && instr->opcode == aco_opcode::ds_write_b8 && byte == 2) {
+ instr->opcode = aco_opcode::ds_write_b8_d16_hi;
+ return;
+ }
+ if (chip >= GFX8 && instr->opcode == aco_opcode::ds_write_b16 && byte == 2) {
+ instr->opcode = aco_opcode::ds_write_b16_d16_hi;
+ return;
+ }
+
+ if (chip >= GFX9 && byte == 2) {
+ if (instr->opcode == aco_opcode::buffer_store_byte)
+ instr->opcode = aco_opcode::buffer_store_byte_d16_hi;
+ else if (instr->opcode == aco_opcode::buffer_store_short)
+ instr->opcode = aco_opcode::buffer_store_short_d16_hi;
+ else if (instr->opcode == aco_opcode::flat_store_byte)
+ instr->opcode = aco_opcode::flat_store_byte_d16_hi;
+ else if (instr->opcode == aco_opcode::flat_store_short)
+ instr->opcode = aco_opcode::flat_store_short_d16_hi;
+ else if (instr->opcode == aco_opcode::scratch_store_byte)
+ instr->opcode = aco_opcode::scratch_store_byte_d16_hi;
+ else if (instr->opcode == aco_opcode::scratch_store_short)
+ instr->opcode = aco_opcode::scratch_store_short_d16_hi;
+ else if (instr->opcode == aco_opcode::global_store_byte)
+ instr->opcode = aco_opcode::global_store_byte_d16_hi;
+ else if (instr->opcode == aco_opcode::global_store_short)
+ instr->opcode = aco_opcode::global_store_short_d16_hi;
+ else
+ unreachable("Something went wrong: Impossible register assignment.");
+ }
+}
+
+/* minimum_stride, bytes_written */
+std::pair<unsigned, unsigned> get_subdword_definition_info(Program *program, const aco_ptr<Instruction>& instr, RegClass rc)
+{
+ chip_class chip = program->chip_class;
+
+ if (instr->format == Format::PSEUDO && chip >= GFX8)
+ return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes());
+ else if (instr->format == Format::PSEUDO)
+ return std::make_pair(4, rc.size() * 4u);
+
+ unsigned bytes_written = chip >= GFX10 ? rc.bytes() : 4u;
+ switch (instr->opcode) {
+ case aco_opcode::v_mad_f16:
+ case aco_opcode::v_mad_u16:
+ case aco_opcode::v_mad_i16:
+ case aco_opcode::v_fma_f16:
+ case aco_opcode::v_div_fixup_f16:
+ case aco_opcode::v_interp_p2_f16:
+ bytes_written = chip >= GFX9 ? rc.bytes() : 4u;
+ break;
+ default:
+ break;
+ }
+ bytes_written = MAX2(bytes_written, instr_info.definition_size[(int)instr->opcode] / 8u);
+
+ if (can_use_SDWA(chip, instr)) {
+ return std::make_pair(rc.bytes(), rc.bytes());
+ } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, 1)) {
+ return std::make_pair(2u, bytes_written);
+ }
+
+ switch (instr->opcode) {
+ case aco_opcode::buffer_load_ubyte_d16:
+ case aco_opcode::buffer_load_short_d16:
+ case aco_opcode::flat_load_ubyte_d16:
+ case aco_opcode::flat_load_short_d16:
+ case aco_opcode::scratch_load_ubyte_d16:
+ case aco_opcode::scratch_load_short_d16:
+ case aco_opcode::global_load_ubyte_d16:
+ case aco_opcode::global_load_short_d16:
+ case aco_opcode::ds_read_u8_d16:
+ case aco_opcode::ds_read_u16_d16:
+ if (chip >= GFX9 && !program->sram_ecc_enabled)
+ return std::make_pair(2u, 2u);
+ else
+ return std::make_pair(2u, 4u);
+ default:
+ break;
+ }
+
+ return std::make_pair(4u, bytes_written);
+}
+
+void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg, bool is_partial)
+{
+ RegClass rc = instr->definitions[idx].regClass();
+ chip_class chip = program->chip_class;
+
+ instr->definitions[idx].setFixed(reg);
+
+ if (instr->format == Format::PSEUDO) {
+ return;
+ } else if (can_use_SDWA(chip, instr)) {
+ if (reg.byte() || (is_partial && chip < GFX10))
+ convert_to_SDWA(chip, instr);
+ return;
+ } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
+ VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get());
+ if (reg.byte() == 2)
+ vop3->opsel |= (1 << 3); /* dst in high half */
+ return;
+ }
+
+ if (reg.byte() == 2) {
+ if (instr->opcode == aco_opcode::buffer_load_ubyte_d16)
+ instr->opcode = aco_opcode::buffer_load_ubyte_d16_hi;
+ else if (instr->opcode == aco_opcode::buffer_load_short_d16)
+ instr->opcode = aco_opcode::buffer_load_short_d16_hi;
+ else if (instr->opcode == aco_opcode::flat_load_ubyte_d16)
+ instr->opcode = aco_opcode::flat_load_ubyte_d16_hi;
+ else if (instr->opcode == aco_opcode::flat_load_short_d16)
+ instr->opcode = aco_opcode::flat_load_short_d16_hi;
+ else if (instr->opcode == aco_opcode::scratch_load_ubyte_d16)
+ instr->opcode = aco_opcode::scratch_load_ubyte_d16_hi;
+ else if (instr->opcode == aco_opcode::scratch_load_short_d16)
+ instr->opcode = aco_opcode::scratch_load_short_d16_hi;
+ else if (instr->opcode == aco_opcode::global_load_ubyte_d16)
+ instr->opcode = aco_opcode::global_load_ubyte_d16_hi;
+ else if (instr->opcode == aco_opcode::global_load_short_d16)
+ instr->opcode = aco_opcode::global_load_short_d16_hi;
+ else if (instr->opcode == aco_opcode::ds_read_u8_d16)
+ instr->opcode = aco_opcode::ds_read_u8_d16_hi;
+ else if (instr->opcode == aco_opcode::ds_read_u16_d16)
+ instr->opcode = aco_opcode::ds_read_u16_d16_hi;
+ else
+ unreachable("Something went wrong: Impossible register assignment.");
+ }
+}
+