+bool validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, unsigned index)
+{
+ Operand op = instr->operands[index];
+ unsigned byte = op.physReg().byte();
+
+ if (instr->opcode == aco_opcode::p_as_uniform)
+ return byte == 0;
+ if (instr->format == Format::PSEUDO && chip >= GFX8)
+ return true;
+ if (instr->isSDWA() && (static_cast<SDWA_instruction *>(instr.get())->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
+ return true;
+ if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
+ return true;
+
+ switch (instr->opcode) {
+ case aco_opcode::v_cvt_f32_ubyte1:
+ if (byte == 1)
+ return true;
+ break;
+ case aco_opcode::v_cvt_f32_ubyte2:
+ if (byte == 2)
+ return true;
+ break;
+ case aco_opcode::v_cvt_f32_ubyte3:
+ if (byte == 3)
+ return true;
+ break;
+ case aco_opcode::ds_write_b8_d16_hi:
+ case aco_opcode::ds_write_b16_d16_hi:
+ if (byte == 2 && index == 1)
+ return true;
+ break;
+ case aco_opcode::buffer_store_byte_d16_hi:
+ case aco_opcode::buffer_store_short_d16_hi:
+ if (byte == 2 && index == 3)
+ return true;
+ break;
+ case aco_opcode::flat_store_byte_d16_hi:
+ case aco_opcode::flat_store_short_d16_hi:
+ case aco_opcode::scratch_store_byte_d16_hi:
+ case aco_opcode::scratch_store_short_d16_hi:
+ case aco_opcode::global_store_byte_d16_hi:
+ case aco_opcode::global_store_short_d16_hi:
+ if (byte == 2 && index == 2)
+ return true;
+ default:
+ break;
+ }
+
+ return byte == 0;
+}
+
+bool validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
+{
+ Definition def = instr->definitions[0];
+ unsigned byte = def.physReg().byte();
+
+ if (instr->format == Format::PSEUDO && chip >= GFX8)
+ return true;
+ if (instr->isSDWA() && static_cast<SDWA_instruction *>(instr.get())->dst_sel == (sdwa_isra | def.bytes()))
+ return true;
+ if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
+ return true;
+
+ switch (instr->opcode) {
+ case aco_opcode::buffer_load_ubyte_d16_hi:
+ case aco_opcode::buffer_load_short_d16_hi:
+ case aco_opcode::flat_load_ubyte_d16_hi:
+ case aco_opcode::flat_load_short_d16_hi:
+ case aco_opcode::scratch_load_ubyte_d16_hi:
+ case aco_opcode::scratch_load_short_d16_hi:
+ case aco_opcode::global_load_ubyte_d16_hi:
+ case aco_opcode::global_load_short_d16_hi:
+ case aco_opcode::ds_read_u8_d16_hi:
+ case aco_opcode::ds_read_u16_d16_hi:
+ return byte == 2;
+ default:
+ break;
+ }
+
+ return byte == 0;
+}
+
+unsigned get_subdword_bytes_written(Program *program, const aco_ptr<Instruction>& instr, unsigned index)
+{
+ chip_class chip = program->chip_class;
+ Definition def = instr->definitions[index];
+
+ if (instr->format == Format::PSEUDO)
+ return chip >= GFX8 ? def.bytes() : def.size() * 4u;
+ if (instr->isSDWA() && static_cast<SDWA_instruction *>(instr.get())->dst_sel == (sdwa_isra | def.bytes()))
+ return def.bytes();
+
+ switch (instr->opcode) {
+ case aco_opcode::buffer_load_ubyte_d16:
+ case aco_opcode::buffer_load_short_d16:
+ case aco_opcode::flat_load_ubyte_d16:
+ case aco_opcode::flat_load_short_d16:
+ case aco_opcode::scratch_load_ubyte_d16:
+ case aco_opcode::scratch_load_short_d16:
+ case aco_opcode::global_load_ubyte_d16:
+ case aco_opcode::global_load_short_d16:
+ case aco_opcode::ds_read_u8_d16:
+ case aco_opcode::ds_read_u16_d16:
+ case aco_opcode::buffer_load_ubyte_d16_hi:
+ case aco_opcode::buffer_load_short_d16_hi:
+ case aco_opcode::flat_load_ubyte_d16_hi:
+ case aco_opcode::flat_load_short_d16_hi:
+ case aco_opcode::scratch_load_ubyte_d16_hi:
+ case aco_opcode::scratch_load_short_d16_hi:
+ case aco_opcode::global_load_ubyte_d16_hi:
+ case aco_opcode::global_load_short_d16_hi:
+ case aco_opcode::ds_read_u8_d16_hi:
+ case aco_opcode::ds_read_u16_d16_hi:
+ return program->sram_ecc_enabled ? 4 : 2;
+ case aco_opcode::v_mad_f16:
+ case aco_opcode::v_mad_u16:
+ case aco_opcode::v_mad_i16:
+ case aco_opcode::v_fma_f16:
+ case aco_opcode::v_div_fixup_f16:
+ case aco_opcode::v_interp_p2_f16:
+ if (chip >= GFX9)
+ return 2;
+ default:
+ break;
+ }
+
+ return MAX2(chip >= GFX10 ? def.bytes() : 4, instr_info.definition_size[(int)instr->opcode] / 8u);
+}
+