+Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
+ std::vector<aco_ptr<Instruction>>& instructions,
+ unsigned offset, bool is_top_level)
+{
+ Builder bld(ctx.program);
+ if (is_top_level) {
+ bld.reset(&instructions);
+ } else {
+ /* find p_logical_end */
+ unsigned idx = instructions.size() - 1;
+ while (instructions[idx]->opcode != aco_opcode::p_logical_end)
+ idx--;
+ bld.reset(&instructions, std::next(instructions.begin(), idx));
+ }
+
+ Temp private_segment_buffer = ctx.program->private_segment_buffer;
+ if (ctx.program->stage != compute_cs)
+ private_segment_buffer = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand(0u));
+
+ if (offset)
+ scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), scratch_offset, Operand(offset));
+
+ uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) |
+ S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2);
+
+ if (ctx.program->chip_class >= GFX10) {
+ rsrc_conf |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else if (ctx.program->chip_class <= GFX7) { /* dfmt modifies stride on GFX8/GFX9 when ADD_TID_EN=1 */
+ rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+ /* older generations need element size = 4 bytes. element size removed in GFX9 */
+ if (ctx.program->chip_class <= GFX8)
+ rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
+
+ return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
+ private_segment_buffer, Operand(-1u),
+ Operand(rsrc_conf));
+}
+
+void add_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned,
+ std::vector<uint32_t>& slots, std::vector<bool>& slots_used,
+ unsigned id)
+{
+ for (unsigned other : ctx.interferences[id].second) {
+ if (!is_assigned[other])
+ continue;
+
+ RegClass other_rc = ctx.interferences[other].first;
+ unsigned slot = slots[other];
+ std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
+ }
+}
+
+unsigned find_available_slot(std::vector<bool>& used, unsigned wave_size,
+ unsigned size, bool is_sgpr, unsigned *num_slots)
+{
+ unsigned wave_size_minus_one = wave_size - 1;
+ unsigned slot = 0;
+
+ while (true) {
+ bool available = true;
+ for (unsigned i = 0; i < size; i++) {
+ if (slot + i < used.size() && used[slot + i]) {
+ available = false;
+ break;
+ }
+ }
+ if (!available) {
+ slot++;
+ continue;
+ }
+
+ if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) {
+ slot = align(slot, wave_size);
+ continue;
+ }
+
+ std::fill(used.begin(), used.end(), false);
+
+ if (slot + size > used.size())
+ used.resize(slot + size);
+
+ return slot;
+ }
+}
+
+void assign_spill_slots_helper(spill_ctx& ctx, RegType type,
+ std::vector<bool>& is_assigned,
+ std::vector<uint32_t>& slots,
+ unsigned *num_slots)
+{
+ std::vector<bool> slots_used(*num_slots);
+
+ /* assign slots for ids with affinities first */
+ for (std::vector<uint32_t>& vec : ctx.affinities) {
+ if (ctx.interferences[vec[0]].first.type() != type)
+ continue;
+
+ for (unsigned id : vec) {
+ if (!ctx.is_reloaded[id])
+ continue;
+
+ add_interferences(ctx, is_assigned, slots, slots_used, id);
+ }
+
+ unsigned slot = find_available_slot(slots_used, ctx.wave_size,
+ ctx.interferences[vec[0]].first.size(),
+ type == RegType::sgpr, num_slots);
+
+ for (unsigned id : vec) {
+ assert(!is_assigned[id]);
+
+ if (ctx.is_reloaded[id]) {
+ slots[id] = slot;
+ is_assigned[id] = true;
+ }
+ }
+ }
+
+ /* assign slots for ids without affinities */
+ for (unsigned id = 0; id < ctx.interferences.size(); id++) {
+ if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type)
+ continue;
+
+ add_interferences(ctx, is_assigned, slots, slots_used, id);
+
+ unsigned slot = find_available_slot(slots_used, ctx.wave_size,
+ ctx.interferences[id].first.size(),
+ type == RegType::sgpr, num_slots);
+
+ slots[id] = slot;
+ is_assigned[id] = true;
+ }
+
+ *num_slots = slots_used.size();
+}
+