std::stack<Block*> loop_header;
std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start;
std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end;
- std::vector<std::pair<RegClass, std::set<uint32_t>>> interferences;
+ std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences;
std::vector<std::vector<uint32_t>> affinities;
std::vector<bool> is_reloaded;
std::map<Temp, remat_info> remat;
}
}
+ void add_interference(uint32_t first, uint32_t second)
+ {
+ if (interferences[first].first.type() != interferences[second].first.type())
+ return;
+
+ bool inserted = interferences[first].second.insert(second).second;
+ if (inserted)
+ interferences[second].second.insert(first);
+ }
+
uint32_t allocate_spill_id(RegClass rc)
{
- interferences.emplace_back(rc, std::set<uint32_t>());
+ interferences.emplace_back(rc, std::unordered_set<uint32_t>());
is_reloaded.push_back(false);
return next_spill_id++;
}
}
unsigned loop_end = i;
+ /* keep live-through spilled */
+ for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block_idx - 1]) {
+ if (pair.second.first < loop_end)
+ continue;
+
+ Temp to_spill = pair.first;
+ auto it = ctx.spills_exit[block_idx - 1].find(to_spill);
+ if (it == ctx.spills_exit[block_idx - 1].end())
+ continue;
+
+ ctx.spills_entry[block_idx][to_spill] = it->second;
+ spilled_registers += to_spill;
+ }
+
/* select live-through vgpr variables */
while (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) {
unsigned distance = 0;
assert(idx != 0 && "loop without phis: TODO");
idx--;
RegisterDemand reg_pressure = ctx.register_demand[block_idx][idx] - spilled_registers;
+ /* Consider register pressure from linear predecessors. This can affect
+ * reg_pressure if the branch instructions define sgprs. */
+ for (unsigned pred : block->linear_preds) {
+ reg_pressure.sgpr = std::max<int16_t>(
+ reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr - spilled_registers.sgpr);
+ }
+
while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
unsigned distance = 0;
Temp to_spill;
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
if (pair.first.type() == RegType::sgpr &&
ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
- ctx.next_use_distances_start[block_idx][pair.first].second > block_idx) {
+ ctx.next_use_distances_start[block_idx][pair.first].first != block_idx) {
ctx.spills_entry[block_idx].insert(pair);
spilled_registers.sgpr += pair.first.size();
}
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
if (pair.first.type() == RegType::vgpr &&
ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
- ctx.next_use_distances_start[block_idx][pair.first].second > block_idx) {
+ ctx.next_use_distances_start[block_idx][pair.first].first != block_idx) {
ctx.spills_entry[block_idx].insert(pair);
spilled_registers.vgpr += pair.first.size();
}
}
reg_pressure += ctx.register_demand[block_idx][idx] - spilled_registers;
+ /* Consider register pressure from linear predecessors. This can affect
+ * reg_pressure if the branch instructions define sgprs. */
+ for (unsigned pred : block->linear_preds) {
+ reg_pressure.sgpr = std::max<int16_t>(
+ reg_pressure.sgpr, ctx.register_demand[pred].back().sgpr - spilled_registers.sgpr);
+ }
+
while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
assert(!partial_spills.empty());
std::set<Temp>::iterator it = partial_spills.begin();
- Temp to_spill = *it;
- unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
+ Temp to_spill = Temp();
+ unsigned distance = 0;
while (it != partial_spills.end()) {
assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
assert(!partial_spills.empty());
std::set<Temp>::iterator it = partial_spills.begin();
- Temp to_spill = *it;
- unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
+ Temp to_spill = Temp();
+ unsigned distance = 0;
while (it != partial_spills.end()) {
assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
if (var == pair.first)
continue;
- ctx.interferences[def_spill_id].second.emplace(pair.second);
- ctx.interferences[pair.second].second.emplace(def_spill_id);
+ ctx.add_interference(def_spill_id, pair.second);
}
/* check if variable is already spilled at predecessor */
for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx]) {
if (exit_spill.first == pair.first)
continue;
- ctx.interferences[exit_spill.second].second.emplace(pair.second);
- ctx.interferences[pair.second].second.emplace(exit_spill.second);
+ ctx.add_interference(exit_spill.second, pair.second);
}
/* variable is in register at predecessor and has to be spilled */
uint32_t spill_id = ctx.allocate_spill_id(to_spill.regClass());
/* add interferences with currently spilled variables */
- for (std::pair<Temp, uint32_t> pair : current_spills) {
- ctx.interferences[spill_id].second.emplace(pair.second);
- ctx.interferences[pair.second].second.emplace(spill_id);
- }
- for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads) {
- ctx.interferences[spill_id].second.emplace(pair.second.second);
- ctx.interferences[pair.second.second].second.emplace(spill_id);
- }
+ for (std::pair<Temp, uint32_t> pair : current_spills)
+ ctx.add_interference(spill_id, pair.second);
+ for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads)
+ ctx.add_interference(spill_id, pair.second.second);
current_spills[to_spill] = spill_id;
spilled_registers += to_spill;
RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
/* add interferences for spilled variables */
- for (std::pair<Temp, uint32_t> x : ctx.spills_entry[block_idx]) {
- for (std::pair<Temp, uint32_t> y : ctx.spills_entry[block_idx])
- if (x.second != y.second)
- ctx.interferences[x.second].second.emplace(y.second);
+ for (auto it = ctx.spills_entry[block_idx].begin(); it != ctx.spills_entry[block_idx].end(); ++it) {
+ for (auto it2 = std::next(it); it2 != ctx.spills_entry[block_idx].end(); ++it2)
+ ctx.add_interference(it->second, it2->second);
}
bool is_loop_header = block->loop_nest_depth && ctx.loop_header.top()->index == block_idx;
std::vector<uint32_t>& slots, std::vector<bool>& slots_used,
unsigned id)
{
- RegType type = ctx.interferences[id].first.type();
-
for (unsigned other : ctx.interferences[id].second) {
if (!is_assigned[other])
continue;
RegClass other_rc = ctx.interferences[other].first;
- if (other_rc.type() == type) {
- unsigned slot = slots[other];
- std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
- }
+ unsigned slot = slots[other];
+ std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
}
}
for (unsigned i = 0; i < temp.size(); i++)
split->definitions[i] = bld.def(v1);
bld.insert(split);
- for (unsigned i = 0; i < temp.size(); i++)
- bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
+ for (unsigned i = 0; i < temp.size(); i++) {
+ Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true);
+ static_cast<MUBUF_instruction *>(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+ }
} else {
- bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false);
+ Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true);
+ static_cast<MUBUF_instruction *>(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
} else {
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
for (unsigned i = 0; i < def.size(); i++) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
- bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false);
+ Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true);
+ static_cast<MUBUF_instruction *>(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
bld.insert(vec);
} else {
- bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false);
+ Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true);
+ static_cast<MUBUF_instruction *>(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
}
} else {
uint32_t spill_slot = slots[spill_id];