namespace aco {
namespace {
+struct assignment {
+ PhysReg reg;
+ RegClass rc;
+ uint8_t assigned = 0;
+ assignment() = default;
+ assignment(PhysReg reg, RegClass rc) : reg(reg), rc(rc), assigned(-1) {}
+};
+
struct ra_ctx {
std::bitset<512> war_hint;
Program* program;
- std::unordered_map<unsigned, std::pair<PhysReg, RegClass>> assignments;
+ std::vector<assignment> assignments;
std::map<unsigned, Temp> orig_names;
unsigned max_used_sgpr = 0;
unsigned max_used_vgpr = 0;
std::bitset<64> defs_done; /* see MAX_ARGS in aco_instruction_selection_setup.cpp */
- ra_ctx(Program* program) : program(program) {}
+ ra_ctx(Program* program) : program(program), assignments(program->peekAllocationId()) {}
};
class RegisterFile {
}
// FIXME: if a definition got moved, change the target location and remove the parallelcopy
copy.second.setTemp(Temp(ctx.program->allocateId(), copy.second.regClass()));
- ctx.assignments[copy.second.tempId()] = {copy.second.physReg(), copy.second.regClass()};
+ ctx.assignments.emplace_back(copy.second.physReg(), copy.second.regClass());
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
reg_file.fill(copy.second);
/* check if we moved an operand */
/* collect variables from a register area and clear reg_file */
std::set<std::pair<unsigned, unsigned>> collect_vars(ra_ctx& ctx, RegisterFile& reg_file,
- PhysReg reg, unsigned size)
+ PhysReg reg, unsigned size)
{
std::set<std::pair<unsigned, unsigned>> vars;
for (unsigned j = reg; j < reg + size; j++) {
for (unsigned k = 0; k < 4; k++) {
unsigned id = reg_file.subdword_regs[j][k];
if (id) {
- std::pair<PhysReg, RegClass> assignment = ctx.assignments.at(id);
- vars.emplace(assignment.second.bytes(), id);
- reg_file.clear(assignment.first, assignment.second);
+ assignment& var = ctx.assignments[id];
+ vars.emplace(var.rc.bytes(), id);
+ reg_file.clear(var.reg, var.rc);
if (!reg_file[j])
break;
}
}
} else if (reg_file[j] != 0) {
unsigned id = reg_file[j];
- std::pair<PhysReg, RegClass> assignment = ctx.assignments.at(id);
- vars.emplace(assignment.second.bytes(), id);
- reg_file.clear(assignment.first, assignment.second);
+ assignment& var = ctx.assignments[id];
+ vars.emplace(var.rc.bytes(), id);
+ reg_file.clear(var.reg, var.rc);
}
}
return vars;
/* NOTE: variables are also sorted by ID. this only affects a very small number of shaders slightly though. */
for (std::set<std::pair<unsigned, unsigned>>::const_reverse_iterator it = vars.rbegin(); it != vars.rend(); ++it) {
unsigned id = it->second;
- std::pair<PhysReg, RegClass> var = ctx.assignments[id];
- uint32_t size = var.second.size();
+ assignment& var = ctx.assignments[id];
+ uint32_t size = var.rc.size();
uint32_t stride = 1;
- if (var.second.type() == RegType::sgpr) {
+ if (var.rc.type() == RegType::sgpr) {
if (size == 2)
stride = 2;
if (size > 3)
}
}
} else {
- res = get_reg_simple(ctx, reg_file, def_reg_lo, def_reg_hi + 1, size, stride, var.second);
+ res = get_reg_simple(ctx, reg_file, def_reg_lo, def_reg_hi + 1, size, stride, var.rc);
}
} else {
- res = get_reg_simple(ctx, reg_file, lb, def_reg_lo, size, stride, var.second);
+ res = get_reg_simple(ctx, reg_file, lb, def_reg_lo, size, stride, var.rc);
if (!res.second) {
unsigned lb = (def_reg_hi + stride) & ~(stride - 1);
- res = get_reg_simple(ctx, reg_file, lb, ub, size, stride, var.second);
+ res = get_reg_simple(ctx, reg_file, lb, ub, size, stride, var.rc);
}
}
if (res.second) {
/* mark the area as blocked */
- reg_file.block(res.first, var.second.bytes());
+ reg_file.block(res.first, var.rc.bytes());
/* create parallelcopy pair (without definition id) */
- Temp tmp = Temp(id, var.second);
+ Temp tmp = Temp(id, var.rc);
Operand pc_op = Operand(tmp);
- pc_op.setFixed(var.first);
+ pc_op.setFixed(var.reg);
Definition pc_def = Definition(res.first, pc_op.regClass());
parallelcopies.emplace_back(pc_op, pc_def);
continue;
continue;
}
/* we cannot split live ranges of linear vgprs */
- if (ctx.assignments[reg_file[j]].second & (1 << 6)) {
+ if (ctx.assignments[reg_file[j]].rc & (1 << 6)) {
found = false;
break;
}
break;
}
}
- if (!is_kill && ctx.assignments[reg_file[j]].second.size() >= size) {
+ if (!is_kill && ctx.assignments[reg_file[j]].rc.size() >= size) {
found = false;
break;
}
- k += ctx.assignments[reg_file[j]].second.size();
+ k += ctx.assignments[reg_file[j]].rc.size();
last_var = reg_file[j];
n++;
if (k > num_moves || (k == num_moves && n <= num_vars)) {
if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, lb, ub, instr, def_reg_lo, def_reg_hi))
return false;
- adjust_max_used_regs(ctx, var.second, reg_lo);
+ adjust_max_used_regs(ctx, var.rc, reg_lo);
/* create parallelcopy pair (without definition id) */
- Temp tmp = Temp(id, var.second);
+ Temp tmp = Temp(id, var.rc);
Operand pc_op = Operand(tmp);
- pc_op.setFixed(var.first);
+ pc_op.setFixed(var.reg);
Definition pc_def = Definition(PhysReg{reg_lo}, pc_op.regClass());
parallelcopies.emplace_back(pc_op, pc_def);
}
continue;
}
- if (ctx.assignments[reg_file[j]].second.size() >= size) {
+ if (ctx.assignments[reg_file[j]].rc.size() >= size) {
found = false;
break;
}
-
/* we cannot split live ranges of linear vgprs */
- if (ctx.assignments[reg_file[j]].second & (1 << 6)) {
+ if (ctx.assignments[reg_file[j]].rc & (1 << 6)) {
found = false;
break;
}
- k += ctx.assignments[reg_file[j]].second.size();
+ k += ctx.assignments[reg_file[j]].rc.size();
n++;
last_var = reg_file[j];
}
if (reg_file[j] != 0) {
k++;
/* we cannot split live ranges of linear vgprs */
- if (ctx.assignments[reg_file[j]].second & (1 << 6))
+ if (ctx.assignments[reg_file[j]].rc & (1 << 6))
linear_vgpr = true;
}
war_hint |= ctx.war_hint[j];
/* if the block is not sealed yet, we create an incomplete phi (which might later get removed again) */
new_val = Temp{program->allocateId(), val.regClass()};
+ ctx.assignments.emplace_back();
aco_opcode opcode = val.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
aco_ptr<Instruction> phi{create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
phi->definitions[0] = Definition(new_val);
phi->definitions[0] = Definition(new_val);
for (unsigned i = 0; i < preds.size(); i++) {
phi->operands[i] = Operand(ops[i]);
- phi->operands[i].setFixed(ctx.assignments[ops[i].id()].first);
+ phi->operands[i].setFixed(ctx.assignments[ops[i].id()].reg);
if (ops[i].regClass() == new_val.regClass())
affinities[new_val.id()] = ops[i].id();
}
+ ctx.assignments.emplace_back();
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
phi_map.emplace(new_val.id(), phi_info{phi.get(), block->index});
block->instructions.insert(block->instructions.begin(), std::move(phi));
}
for (Temp t : live) {
Temp renamed = handle_live_in(t, &block);
- if (ctx.assignments.find(renamed.id()) != ctx.assignments.end())
- register_file.fill(ctx.assignments[renamed.id()].first, t.size(), renamed.id());
+ if (ctx.assignments[renamed.id()].assigned)
+ register_file.fill(ctx.assignments[renamed.id()].reg, t.size(), renamed.id());
}
std::vector<aco_ptr<Instruction>> instructions;
if (definition.isKill()) {
for (Operand& op : phi->operands) {
assert(op.isTemp());
- if (ctx.assignments.find(op.tempId()) == ctx.assignments.end() ||
- ctx.assignments[op.tempId()].first != exec) {
+ if (!ctx.assignments[op.tempId()].assigned ||
+ ctx.assignments[op.tempId()].reg != exec) {
definition.setKill(false);
break;
}
continue;
if (affinities.find(definition.tempId()) != affinities.end() &&
- ctx.assignments.find(affinities[definition.tempId()]) != ctx.assignments.end()) {
- assert(ctx.assignments[affinities[definition.tempId()]].second == definition.regClass());
- PhysReg reg = ctx.assignments[affinities[definition.tempId()]].first;
+ ctx.assignments[affinities[definition.tempId()]].assigned) {
+ assert(ctx.assignments[affinities[definition.tempId()]].rc == definition.regClass());
+ PhysReg reg = ctx.assignments[affinities[definition.tempId()]].reg;
bool try_use_special_reg = reg == scc || reg == exec;
if (try_use_special_reg) {
for (const Operand& op : phi->operands) {
- if (!op.isTemp() ||
- ctx.assignments.find(op.tempId()) == ctx.assignments.end() ||
- !(ctx.assignments[op.tempId()].first == reg)) {
+ if (!(op.isTemp() && ctx.assignments[op.tempId()].assigned &&
+ ctx.assignments[op.tempId()].reg == reg)) {
try_use_special_reg = false;
break;
}
std::vector<std::pair<Operand, Definition>> parallelcopy;
/* try to find a register that is used by at least one operand */
for (const Operand& op : phi->operands) {
- if (!op.isTemp() ||
- ctx.assignments.find(op.tempId()) == ctx.assignments.end())
+ if (!(op.isTemp() && ctx.assignments[op.tempId()].assigned))
continue;
- PhysReg reg = ctx.assignments[op.tempId()].first;
+ PhysReg reg = ctx.assignments[op.tempId()].reg;
/* we tried this already on the previous loop */
if (reg == scc || reg == exec)
continue;
phi->operands[idx].getTemp().type() == RegType::sgpr &&
phi->operands[idx].isFirstKillBeforeDef()) {
Temp phi_op = read_variable(phi->operands[idx].getTemp(), block.index);
- PhysReg reg = ctx.assignments[phi_op.id()].first;
+ PhysReg reg = ctx.assignments[phi_op.id()].reg;
assert(register_file[reg] == phi_op.id());
register_file[reg] = 0;
}
/* check if the operand is fixed */
if (operand.isFixed()) {
- if (operand.physReg() == ctx.assignments[operand.tempId()].first) {
+ if (operand.physReg() == ctx.assignments[operand.tempId()].reg) {
/* we are fine: the operand is already assigned the correct reg */
} else {
/* check if target reg is blocked, and move away the blocking var */
if (register_file[operand.physReg().reg()]) {
uint32_t blocking_id = register_file[operand.physReg().reg()];
- RegClass rc = ctx.assignments[blocking_id].second;
+ RegClass rc = ctx.assignments[blocking_id].rc;
Operand pc_op = Operand(Temp{blocking_id, rc});
pc_op.setFixed(operand.physReg());
Definition pc_def = Definition(Temp{program->allocateId(), pc_op.regClass()});
/* find free reg */
PhysReg reg = get_reg(ctx, register_file, pc_op.regClass(), parallelcopy, instr);
pc_def.setFixed(reg);
- ctx.assignments[pc_def.tempId()] = {reg, pc_def.regClass()};
+ ctx.assignments.emplace_back(reg, pc_def.regClass());
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
register_file.clear(pc_op);
register_file.fill(pc_def);
parallelcopy.emplace_back(pc_op, pc_def);
Temp tmp = Temp{program->allocateId(), operand.regClass()};
Definition pc_def = Definition(tmp);
pc_def.setFixed(operand.physReg());
- pc_op.setFixed(ctx.assignments[operand.tempId()].first);
+ pc_op.setFixed(ctx.assignments[operand.tempId()].reg);
operand.setTemp(tmp);
- ctx.assignments[tmp.id()] = {pc_def.physReg(), pc_def.regClass()};
+ ctx.assignments.emplace_back(pc_def.physReg(), pc_def.regClass());
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
operand.setFixed(pc_def.physReg());
register_file.clear(pc_op);
register_file.fill(pc_def);
parallelcopy.emplace_back(pc_op, pc_def);
}
} else {
- assert(ctx.assignments.find(operand.tempId()) != ctx.assignments.end());
- PhysReg reg = ctx.assignments[operand.tempId()].first;
+ assert(ctx.assignments[operand.tempId()].assigned);
+ PhysReg reg = ctx.assignments[operand.tempId()].reg;
if (operand_can_use_reg(instr, i, reg)) {
- operand.setFixed(ctx.assignments[operand.tempId()].first);
+ operand.setFixed(ctx.assignments[operand.tempId()].reg);
} else {
Operand pc_op = operand;
pc_op.setFixed(reg);
PhysReg new_reg = get_reg(ctx, register_file, operand.regClass(), parallelcopy, instr);
Definition pc_def = Definition(program->allocateId(), new_reg, pc_op.regClass());
- ctx.assignments[pc_def.tempId()] = {new_reg, pc_def.regClass()};
+ ctx.assignments.emplace_back(new_reg, pc_def.regClass());
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
register_file.clear(pc_op);
register_file.fill(pc_def);
parallelcopy.emplace_back(pc_op, pc_def);
/* check if the target register is blocked */
if (register_file[definition.physReg().reg()] != 0) {
/* create parallelcopy pair to move blocking var */
- Temp tmp = {register_file[definition.physReg()], ctx.assignments[register_file[definition.physReg()]].second};
+ Temp tmp = {register_file[definition.physReg()], ctx.assignments[register_file[definition.physReg()]].rc};
Operand pc_op = Operand(tmp);
- pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg()]].first);
+ pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg()]].reg);
RegClass rc = pc_op.regClass();
tmp = Temp{program->allocateId(), rc};
Definition pc_def = Definition(tmp);
/* re-enable the killed operands, so that we don't move the blocking var there */
for (const Operand& op : instr->operands) {
if (op.isTemp() && op.isFirstKillBeforeDef())
- register_file.fill(op.physReg(), op.size(), 0xFFFF);
+ register_file.fill(op);
}
/* find a new register for the blocking variable */
pc_def.setFixed(reg);
/* finish assignment of parallelcopy */
- ctx.assignments[pc_def.tempId()] = {reg, pc_def.regClass()};
+ ctx.assignments.emplace_back(reg, pc_def.regClass());
+ assert(ctx.assignments.size() == ctx.program->peekAllocationId());
parallelcopy.emplace_back(pc_op, pc_def);
/* add changes to reg_file */
parallelcopy, instr);
definition.setFixed(reg);
} else if (affinities.find(definition.tempId()) != affinities.end() &&
- ctx.assignments.find(affinities[definition.tempId()]) != ctx.assignments.end()) {
- PhysReg reg = ctx.assignments[affinities[definition.tempId()]].first;
+ ctx.assignments[affinities[definition.tempId()]].assigned) {
+ PhysReg reg = ctx.assignments[affinities[definition.tempId()]].reg;
if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg))
definition.setFixed(reg);
else
if (op.isTemp() &&
op.tempId() != definition.tempId() &&
op.getTemp().type() == definition.getTemp().type() &&
- ctx.assignments.find(op.tempId()) != ctx.assignments.end()) {
- PhysReg reg = ctx.assignments[op.tempId()].first;
+ ctx.assignments[op.tempId()].assigned) {
+ PhysReg reg = ctx.assignments[op.tempId()].reg;
reg.reg_b += (byte_offset - k);
if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) {
definition.setFixed(reg);
break;
}
}
- aco_ptr<Instruction> mov;
- if (can_sgpr)
- mov.reset(create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32, Format::SOP1, 1, 1));
- else
- mov.reset(create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1));
- mov->operands[0] = instr->operands[0];
- Temp tmp = {program->allocateId(), can_sgpr ? s1 : v1};
- mov->definitions[0] = Definition(tmp);
/* disable definitions and re-enable operands */
for (const Definition& def : instr->definitions)
register_file.clear(def);
if (op.isTemp() && op.isFirstKill())
register_file.fill(op.physReg(), op.size(), 0xFFFF);
}
- mov->definitions[0].setFixed(get_reg(ctx, register_file, tmp.regClass(), parallelcopy, mov));
+ RegClass rc = can_sgpr ? s1 : v1;
+ PhysReg reg = get_reg(ctx, register_file, rc, parallelcopy, instr);
+ Temp tmp = {program->allocateId(), rc};
+ ctx.assignments.emplace_back(reg, rc);
+
+ aco_ptr<Instruction> mov;
+ if (can_sgpr)
+ mov.reset(create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32, Format::SOP1, 1, 1));
+ else
+ mov.reset(create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1));
+ mov->operands[0] = instr->operands[0];
+ mov->definitions[0] = Definition(tmp);
+ mov->definitions[0].setFixed(reg);
+
instr->operands[0] = Operand(tmp);
- instr->operands[0].setFixed(mov->definitions[0].physReg());
+ instr->operands[0].setFixed(reg);
instructions.emplace_back(std::move(mov));
/* re-enable live vars */
for (const Operand& op : instr->operands) {
std::vector<unsigned> preds = phi->definitions[0].getTemp().is_linear() ? succ.linear_preds : succ.logical_preds;
for (unsigned i = 0; i < phi->operands.size(); i++) {
phi->operands[i].setTemp(read_variable(phi->operands[i].getTemp(), preds[i]));
- phi->operands[i].setFixed(ctx.assignments[phi->operands[i].tempId()].first);
+ phi->operands[i].setFixed(ctx.assignments[phi->operands[i].tempId()].reg);
}
try_remove_trivial_phi(phi_map.find(phi->definitions[0].tempId()));
}
if (!operand.isTemp())
continue;
operand.setTemp(read_variable(operand.getTemp(), preds[i]));
- operand.setFixed(ctx.assignments[operand.tempId()].first);
+ operand.setFixed(ctx.assignments[operand.tempId()].reg);
std::map<unsigned, phi_info>::iterator phi = phi_map.find(operand.getTemp().id());
if (phi != phi_map.end())
phi->second.uses.emplace(instr.get());