From 47d7e1e6628616f678b353d29daa23595d6a7c42 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 7 Jul 2020 13:10:38 +0100 Subject: [PATCH] aco: rewrite graph coloring in spiller MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I don't think this is much of an optimization in the typical case, but for very complex shaders this should work much better. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_spill.cpp | 257 +++++++++++++++------------------ 1 file changed, 120 insertions(+), 137 deletions(-) diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 7db3248e20f..faba524b76d 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1327,145 +1327,131 @@ Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Operand(rsrc_conf)); } -void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { - std::map sgpr_slot; - std::map vgpr_slot; - std::vector is_assigned(ctx.interferences.size()); +void add_interferences(spill_ctx& ctx, std::vector& is_assigned, + std::vector& slots, std::vector& slots_used, + unsigned id) +{ + RegType type = ctx.interferences[id].first.type(); - /* first, handle affinities: just merge all interferences into both spill ids */ - for (std::vector& vec : ctx.affinities) { - for (unsigned i = 0; i < vec.size(); i++) { - for (unsigned j = i + 1; j < vec.size(); j++) { - assert(vec[i] != vec[j]); - for (uint32_t id : ctx.interferences[vec[i]].second) - ctx.interferences[id].second.insert(vec[j]); - for (uint32_t id : ctx.interferences[vec[j]].second) - ctx.interferences[id].second.insert(vec[i]); - ctx.interferences[vec[i]].second.insert(ctx.interferences[vec[j]].second.begin(), ctx.interferences[vec[j]].second.end()); - ctx.interferences[vec[j]].second.insert(ctx.interferences[vec[i]].second.begin(), ctx.interferences[vec[i]].second.end()); + for (unsigned other : ctx.interferences[id].second) { + if (!is_assigned[other]) + continue; - bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]]; - ctx.is_reloaded[vec[i]] = reloaded; - ctx.is_reloaded[vec[j]] = reloaded; + RegClass other_rc = ctx.interferences[other].first; + if (other_rc.type() == type) { + unsigned slot = slots[other]; + std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true); + } + } +} + +unsigned find_available_slot(std::vector& used, unsigned wave_size, + unsigned size, bool is_sgpr, unsigned *num_slots) +{ + unsigned wave_size_minus_one = wave_size - 1; + unsigned slot = 0; + + while (true) { + bool available = true; + for (unsigned i = 0; i < size; i++) { + if (slot + i < used.size() && used[slot + i]) { + available = false; + break; } } + if (!available) { + slot++; + continue; + } + + if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) { + slot = align(slot, wave_size); + continue; + } + + std::fill(used.begin(), used.end(), false); + + if (slot + size > used.size()) + used.resize(slot + size); + + return slot; } - for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++) - for (ASSERTED uint32_t id : ctx.interferences[i].second) - assert(i != id); +} - /* for each spill slot, assign as many spill ids as possible */ - std::vector> spill_slot_interferences; - unsigned slot_idx = 0; - bool done = false; - - /* assign sgpr spill slots */ - while (!done) { - done = true; - for (unsigned id = 0; id < ctx.interferences.size(); id++) { - if (is_assigned[id] || !ctx.is_reloaded[id]) - continue; - if (ctx.interferences[id].first.type() != RegType::sgpr) - continue; +void assign_spill_slots_helper(spill_ctx& ctx, RegType type, + std::vector& is_assigned, + std::vector& slots, + unsigned *num_slots) +{ + std::vector slots_used(*num_slots); - /* check interferences */ - bool interferes = false; - for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) { - if (i == spill_slot_interferences.size()) - spill_slot_interferences.emplace_back(std::set()); - if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / ctx.wave_size != slot_idx / ctx.wave_size) { - interferes = true; - break; - } - } - if (interferes) { - done = false; + /* assign slots for ids with affinities first */ + for (std::vector& vec : ctx.affinities) { + if (ctx.interferences[vec[0]].first.type() != type) + continue; + + for (unsigned id : vec) { + if (!ctx.is_reloaded[id]) continue; - } - /* we found a spill id which can be assigned to current spill slot */ - sgpr_slot[id] = slot_idx; - is_assigned[id] = true; - for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) - spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end()); - - /* add all affinities: there are no additional interferences */ - for (std::vector& vec : ctx.affinities) { - bool found_affinity = false; - for (uint32_t entry : vec) { - if (entry == id) { - found_affinity = true; - break; - } - } - if (!found_affinity) - continue; - for (uint32_t entry : vec) { - sgpr_slot[entry] = slot_idx; - is_assigned[entry] = true; - } + add_interferences(ctx, is_assigned, slots, slots_used, id); + } + + unsigned slot = find_available_slot(slots_used, ctx.wave_size, + ctx.interferences[vec[0]].first.size(), + type == RegType::sgpr, num_slots); + + for (unsigned id : vec) { + assert(!is_assigned[id]); + + if (ctx.is_reloaded[id]) { + slots[id] = slot; + is_assigned[id] = true; } } - slot_idx++; } - unsigned sgpr_spill_slots = spill_slot_interferences.size(); - spill_slot_interferences.clear(); - slot_idx = 0; - done = false; + /* assign slots for ids without affinities */ + for (unsigned id = 0; id < ctx.interferences.size(); id++) { + if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type) + continue; - /* assign vgpr spill slots */ - while (!done) { - done = true; - for (unsigned id = 0; id < ctx.interferences.size(); id++) { - if (is_assigned[id] || !ctx.is_reloaded[id]) - continue; - if (ctx.interferences[id].first.type() != RegType::vgpr) - continue; + add_interferences(ctx, is_assigned, slots, slots_used, id); - /* check interferences */ - bool interferes = false; - for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) { - if (i == spill_slot_interferences.size()) - spill_slot_interferences.emplace_back(std::set()); - /* check for interference and ensure that vector regs are stored next to each other */ - if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end()) { - interferes = true; - break; - } - } - if (interferes) { - done = false; - continue; - } + unsigned slot = find_available_slot(slots_used, ctx.wave_size, + ctx.interferences[id].first.size(), + type == RegType::sgpr, num_slots); - /* we found a spill id which can be assigned to current spill slot */ - vgpr_slot[id] = slot_idx; - is_assigned[id] = true; - for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) - spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end()); - - /* add all affinities: there are no additional interferences */ - for (std::vector& vec : ctx.affinities) { - bool found_affinity = false; - for (uint32_t entry : vec) { - if (entry == id) { - found_affinity = true; - break; - } - } - if (!found_affinity) - continue; - for (uint32_t entry : vec) { - vgpr_slot[entry] = slot_idx; - is_assigned[entry] = true; - } + slots[id] = slot; + is_assigned[id] = true; + } + + *num_slots = slots_used.size(); +} + +void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { + std::vector slots(ctx.interferences.size()); + std::vector is_assigned(ctx.interferences.size()); + + /* first, handle affinities: just merge all interferences into both spill ids */ + for (std::vector& vec : ctx.affinities) { + for (unsigned i = 0; i < vec.size(); i++) { + for (unsigned j = i + 1; j < vec.size(); j++) { + assert(vec[i] != vec[j]); + bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]]; + ctx.is_reloaded[vec[i]] = reloaded; + ctx.is_reloaded[vec[j]] = reloaded; } } - slot_idx++; } + for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++) + for (ASSERTED uint32_t id : ctx.interferences[i].second) + assert(i != id); - unsigned vgpr_spill_slots = spill_slot_interferences.size(); + /* for each spill slot, assign as many spill ids as possible */ + unsigned sgpr_spill_slots = 0, vgpr_spill_slots = 0; + assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &sgpr_spill_slots); + assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &vgpr_spill_slots); for (unsigned id = 0; id < is_assigned.size(); id++) assert(is_assigned[id] || !ctx.is_reloaded[id]); @@ -1478,10 +1464,7 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { continue; assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]); assert(ctx.interferences[vec[i]].first.type() == ctx.interferences[vec[j]].first.type()); - if (ctx.interferences[vec[i]].first.type() == RegType::sgpr) - assert(sgpr_slot[vec[i]] == sgpr_slot[vec[j]]); - else - assert(vgpr_slot[vec[i]] == vgpr_slot[vec[j]]); + assert(slots[vec[i]] == slots[vec[j]]); } } } @@ -1531,8 +1514,8 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { bool can_destroy = true; for (std::pair pair : ctx.spills_exit[block.linear_preds[0]]) { - if (sgpr_slot.find(pair.second) != sgpr_slot.end() && - sgpr_slot[pair.second] / ctx.wave_size == i) { + if (ctx.interferences[pair.second].first.type() == RegType::sgpr && + slots[pair.second] / ctx.wave_size == i) { can_destroy = false; break; } @@ -1553,10 +1536,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { if (!ctx.is_reloaded[spill_id]) { /* never reloaded, so don't spill */ - } else if (vgpr_slot.find(spill_id) != vgpr_slot.end()) { + } else if (!is_assigned[spill_id]) { + unreachable("No spill slot assigned for spill id"); + } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) { /* spill vgpr */ ctx.program->config->spilled_vgprs += (*it)->operands[0].size(); - uint32_t spill_slot = vgpr_slot[spill_id]; + uint32_t spill_slot = slots[spill_id]; bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096; unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; @@ -1586,10 +1571,10 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { } else { bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false); } - } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) { + } else { ctx.program->config->spilled_sgprs += (*it)->operands[0].size(); - uint32_t spill_slot = sgpr_slot[spill_id]; + uint32_t spill_slot = slots[spill_id]; /* check if the linear vgpr already exists */ if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { @@ -1615,17 +1600,17 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { spill->operands[1] = Operand(spill_slot % ctx.wave_size); spill->operands[2] = (*it)->operands[0]; instructions.emplace_back(aco_ptr(spill)); - } else { - unreachable("No spill slot assigned for spill id"); } } else if ((*it)->opcode == aco_opcode::p_reload) { uint32_t spill_id = (*it)->operands[0].constantValue(); assert(ctx.is_reloaded[spill_id]); - if (vgpr_slot.find(spill_id) != vgpr_slot.end()) { + if (!is_assigned[spill_id]) { + unreachable("No spill slot assigned for spill id"); + } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) { /* reload vgpr */ - uint32_t spill_slot = vgpr_slot[spill_id]; + uint32_t spill_slot = slots[spill_id]; bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096; unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; @@ -1654,8 +1639,8 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { } else { bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false); } - } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) { - uint32_t spill_slot = sgpr_slot[spill_id]; + } else { + uint32_t spill_slot = slots[spill_id]; reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0; /* check if the linear vgpr already exists */ @@ -1682,8 +1667,6 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { reload->operands[1] = Operand(spill_slot % ctx.wave_size); reload->definitions[0] = (*it)->definitions[0]; instructions.emplace_back(aco_ptr(reload)); - } else { - unreachable("No spill slot assigned for spill id"); } } else if (!ctx.remat_used.count(it->get()) || ctx.remat_used[it->get()]) { instructions.emplace_back(std::move(*it)); -- 2.30.2