definition.setFixed(reg);
}
- if (!definition.isFixed())
- definition.setFixed(get_reg(ctx, register_file, definition.getTemp(), parallelcopy, instr));
+ if (!definition.isFixed()) {
+ Temp tmp = definition.getTemp();
+ /* subdword instructions before RDNA write full registers */
+ if (tmp.regClass().is_subdword() &&
+ !instr_can_access_subdword(instr) &&
+ ctx.program->chip_class <= GFX9) {
+ assert(tmp.bytes() <= 4);
+ tmp = Temp(definition.tempId(), v1);
+ }
+ definition.setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr));
+ }
assert(definition.isFixed() && ((definition.getTemp().type() == RegType::vgpr && definition.physReg() >= 256) ||
(definition.getTemp().type() != RegType::vgpr && definition.physReg() < 256)));
}
#endif
+bool instr_can_access_subdword(aco_ptr<Instruction>& instr)
+{
+ return instr->isSDWA() || instr->format == Format::PSEUDO;
+}
+
void validate(Program* program, FILE * output)
{
if (!(debug_flags & DEBUG_VALIDATE))
/* check subdword definitions */
for (unsigned i = 0; i < instr->definitions.size(); i++) {
if (instr->definitions[i].regClass().is_subdword())
- check(instr->isSDWA() || instr->format == Format::PSEUDO, "Only SDWA and Pseudo instructions can write subdword registers", instr.get());
+ check(instr_can_access_subdword(instr) || instr->definitions[i].bytes() <= 4, "Only SDWA and Pseudo instructions can write subdword registers larger than 4 bytes", instr.get());
}
if (instr->isSALU() || instr->isVALU()) {
err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
if (op.physReg() == vcc && !program->needs_vcc)
err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i);
- if (!(instr->isSDWA() || instr->format == Format::PSEUDO) && op.regClass().is_subdword() && op.physReg().byte())
+ if (!instr_can_access_subdword(instr) && op.regClass().is_subdword() && op.physReg().byte())
err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d must be aligned to a full register", i);
if (!assignments[op.tempId()].firstloc.block)
assignments[op.tempId()].firstloc = loc;
err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
if (def.physReg() == vcc && !program->needs_vcc)
err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i);
+ if (!instr_can_access_subdword(instr) && def.regClass().is_subdword() && def.physReg().byte())
+ err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d must be aligned to a full register", i);
if (!assignments[def.tempId()].firstloc.block)
assignments[def.tempId()].firstloc = loc;
assignments[def.tempId()].defloc = loc;
PhysReg reg = assignments.at(tmp.id()).reg;
for (unsigned j = 0; j < tmp.bytes(); j++) {
if (regs[reg.reg_b + j])
- err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + i]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
+ err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
regs[reg.reg_b + j] = tmp.id();
}
+ if (def.regClass().is_subdword() && !instr_can_access_subdword(instr)) {
+ for (unsigned j = tmp.bytes(); j < 4; j++)
+ if (reg.reg_b + j)
+ err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
+ }
}
for (const Definition& def : instr->definitions) {