aco: take LDS into account when calculating num_waves
[mesa.git] / src / amd / compiler / aco_spill.cpp
1 /*
2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
26 #include "aco_ir.h"
27 #include <map>
28 #include <stack>
29 #include "vulkan/radv_shader.h"
30
31
32 /*
33 * Implements the spilling algorithm on SSA-form from
34 * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
35 * by Matthias Braun and Sebastian Hack.
36 */
37
38 namespace aco {
39
40 namespace {
41
42 struct remat_info {
43 Instruction *instr;
44 };
45
46 struct spill_ctx {
47 RegisterDemand target_pressure;
48 Program* program;
49 std::vector<std::vector<RegisterDemand>> register_demand;
50 std::vector<std::map<Temp, Temp>> renames;
51 std::vector<std::map<Temp, uint32_t>> spills_entry;
52 std::vector<std::map<Temp, uint32_t>> spills_exit;
53 std::vector<bool> processed;
54 std::stack<Block*> loop_header;
55 std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_start;
56 std::vector<std::map<Temp, std::pair<uint32_t, uint32_t>>> next_use_distances_end;
57 std::vector<std::pair<RegClass, std::set<uint32_t>>> interferences;
58 std::vector<std::pair<uint32_t, uint32_t>> affinities;
59 std::vector<bool> is_reloaded;
60 std::map<Temp, remat_info> remat;
61 std::map<Instruction *, bool> remat_used;
62
63 spill_ctx(const RegisterDemand target_pressure, Program* program,
64 std::vector<std::vector<RegisterDemand>> register_demand)
65 : target_pressure(target_pressure), program(program),
66 register_demand(register_demand), renames(program->blocks.size()),
67 spills_entry(program->blocks.size()), spills_exit(program->blocks.size()),
68 processed(program->blocks.size(), false) {}
69
70 uint32_t allocate_spill_id(RegClass rc)
71 {
72 interferences.emplace_back(rc, std::set<uint32_t>());
73 is_reloaded.push_back(false);
74 return next_spill_id++;
75 }
76
77 uint32_t next_spill_id = 0;
78 };
79
80 int32_t get_dominator(int idx_a, int idx_b, Program* program, bool is_linear)
81 {
82
83 if (idx_a == -1)
84 return idx_b;
85 if (idx_b == -1)
86 return idx_a;
87 if (is_linear) {
88 while (idx_a != idx_b) {
89 if (idx_a > idx_b)
90 idx_a = program->blocks[idx_a].linear_idom;
91 else
92 idx_b = program->blocks[idx_b].linear_idom;
93 }
94 } else {
95 while (idx_a != idx_b) {
96 if (idx_a > idx_b)
97 idx_a = program->blocks[idx_a].logical_idom;
98 else
99 idx_b = program->blocks[idx_b].logical_idom;
100 }
101 }
102 assert(idx_a != -1);
103 return idx_a;
104 }
105
106 void next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set<uint32_t>& worklist)
107 {
108 Block* block = &ctx.program->blocks[block_idx];
109 std::map<Temp, std::pair<uint32_t, uint32_t>> next_uses = ctx.next_use_distances_end[block_idx];
110
111 /* to compute the next use distance at the beginning of the block, we have to add the block's size */
112 for (std::map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = next_uses.begin(); it != next_uses.end();) {
113 it->second.second = it->second.second + block->instructions.size();
114
115 /* remove the live out exec mask as we really don't want to spill it */
116 if (it->first == block->live_out_exec)
117 it = next_uses.erase(it);
118 else
119 ++it;
120 }
121
122 int idx = block->instructions.size() - 1;
123 while (idx >= 0) {
124 aco_ptr<Instruction>& instr = block->instructions[idx];
125
126 if (instr->opcode == aco_opcode::p_linear_phi ||
127 instr->opcode == aco_opcode::p_phi)
128 break;
129
130 for (const Definition& def : instr->definitions) {
131 if (def.isTemp())
132 next_uses.erase(def.getTemp());
133 }
134
135 for (const Operand& op : instr->operands) {
136 /* omit exec mask */
137 if (op.isFixed() && op.physReg() == exec)
138 continue;
139 if (op.isTemp())
140 next_uses[op.getTemp()] = {block_idx, idx};
141 }
142 idx--;
143 }
144
145 assert(block_idx != 0 || next_uses.empty());
146 ctx.next_use_distances_start[block_idx] = next_uses;
147 while (idx >= 0) {
148 aco_ptr<Instruction>& instr = block->instructions[idx];
149 assert(instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi);
150
151 for (unsigned i = 0; i < instr->operands.size(); i++) {
152 unsigned pred_idx = instr->opcode == aco_opcode::p_phi ?
153 block->logical_preds[i] :
154 block->linear_preds[i];
155 if (instr->operands[i].isTemp()) {
156 if (ctx.next_use_distances_end[pred_idx].find(instr->operands[i].getTemp()) == ctx.next_use_distances_end[pred_idx].end() ||
157 ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] != std::pair<uint32_t, uint32_t>{block_idx, 0})
158 worklist.insert(pred_idx);
159 ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] = {block_idx, 0};
160 }
161 }
162 next_uses.erase(instr->definitions[0].getTemp());
163 idx--;
164 }
165
166 /* all remaining live vars must be live-out at the predecessors */
167 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : next_uses) {
168 Temp temp = pair.first;
169 uint32_t distance = pair.second.second;
170 uint32_t dom = pair.second.first;
171 std::vector<unsigned>& preds = temp.is_linear() ? block->linear_preds : block->logical_preds;
172 for (unsigned pred_idx : preds) {
173 if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth)
174 distance += 0xFFFF;
175 if (ctx.next_use_distances_end[pred_idx].find(temp) != ctx.next_use_distances_end[pred_idx].end()) {
176 dom = get_dominator(dom, ctx.next_use_distances_end[pred_idx][temp].first, ctx.program, temp.is_linear());
177 distance = std::min(ctx.next_use_distances_end[pred_idx][temp].second, distance);
178 }
179 if (ctx.next_use_distances_end[pred_idx][temp] != std::pair<uint32_t, uint32_t>{dom, distance})
180 worklist.insert(pred_idx);
181 ctx.next_use_distances_end[pred_idx][temp] = {dom, distance};
182 }
183 }
184
185 }
186
187 void compute_global_next_uses(spill_ctx& ctx, std::vector<std::set<Temp>>& live_out)
188 {
189 ctx.next_use_distances_start.resize(ctx.program->blocks.size());
190 ctx.next_use_distances_end.resize(ctx.program->blocks.size());
191 std::set<uint32_t> worklist;
192 for (Block& block : ctx.program->blocks)
193 worklist.insert(block.index);
194
195 while (!worklist.empty()) {
196 std::set<unsigned>::reverse_iterator b_it = worklist.rbegin();
197 unsigned block_idx = *b_it;
198 worklist.erase(block_idx);
199 next_uses_per_block(ctx, block_idx, worklist);
200 }
201 }
202
203 bool should_rematerialize(aco_ptr<Instruction>& instr)
204 {
205 /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
206 if (instr->format != Format::VOP1 && instr->format != Format::SOP1 && instr->format != Format::PSEUDO)
207 return false;
208 /* TODO: pseudo-instruction rematerialization is only supported for p_create_vector */
209 if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_create_vector)
210 return false;
211
212 for (const Operand& op : instr->operands) {
213 /* TODO: rematerialization using temporaries isn't yet supported */
214 if (op.isTemp())
215 return false;
216 }
217
218 /* TODO: rematerialization with multiple definitions isn't yet supported */
219 if (instr->definitions.size() > 1)
220 return false;
221
222 return true;
223 }
224
225 aco_ptr<Instruction> do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
226 {
227 std::map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
228 if (remat != ctx.remat.end()) {
229 Instruction *instr = remat->second.instr;
230 assert((instr->format == Format::VOP1 || instr->format == Format::SOP1 || instr->format == Format::PSEUDO) && "unsupported");
231 assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector) && "unsupported");
232 assert(instr->definitions.size() == 1 && "unsupported");
233
234 aco_ptr<Instruction> res;
235 if (instr->format == Format::VOP1) {
236 res.reset(create_instruction<VOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
237 } else if (instr->format == Format::SOP1) {
238 res.reset(create_instruction<SOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
239 } else if (instr->format == Format::PSEUDO) {
240 res.reset(create_instruction<Instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
241 }
242 for (unsigned i = 0; i < instr->operands.size(); i++) {
243 res->operands[i] = instr->operands[i];
244 if (instr->operands[i].isTemp()) {
245 assert(false && "unsupported");
246 if (ctx.remat.count(instr->operands[i].getTemp()))
247 ctx.remat_used[ctx.remat[instr->operands[i].getTemp()].instr] = true;
248 }
249 }
250 res->definitions[0] = Definition(new_name);
251 return res;
252 } else {
253 aco_ptr<Pseudo_instruction> reload{create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
254 reload->operands[0] = Operand(spill_id);
255 reload->definitions[0] = Definition(new_name);
256 ctx.is_reloaded[spill_id] = true;
257 return reload;
258 }
259 }
260
261 void get_rematerialize_info(spill_ctx& ctx)
262 {
263 for (Block& block : ctx.program->blocks) {
264 bool logical = false;
265 for (aco_ptr<Instruction>& instr : block.instructions) {
266 if (instr->opcode == aco_opcode::p_logical_start)
267 logical = true;
268 else if (instr->opcode == aco_opcode::p_logical_end)
269 logical = false;
270 if (logical && should_rematerialize(instr)) {
271 for (const Definition& def : instr->definitions) {
272 if (def.isTemp()) {
273 ctx.remat[def.getTemp()] = (remat_info){instr.get()};
274 ctx.remat_used[instr.get()] = false;
275 }
276 }
277 }
278 }
279 }
280 }
281
282 std::vector<std::map<Temp, uint32_t>> local_next_uses(spill_ctx& ctx, Block* block)
283 {
284 std::vector<std::map<Temp, uint32_t>> local_next_uses(block->instructions.size());
285
286 std::map<Temp, uint32_t> next_uses;
287 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block->index]) {
288 /* omit live out exec mask */
289 if (pair.first == block->live_out_exec)
290 continue;
291
292 next_uses[pair.first] = pair.second.second + block->instructions.size();
293 }
294
295 for (int idx = block->instructions.size() - 1; idx >= 0; idx--) {
296 aco_ptr<Instruction>& instr = block->instructions[idx];
297 if (!instr)
298 break;
299 if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi)
300 break;
301
302 for (const Operand& op : instr->operands) {
303 if (op.isFixed() && op.physReg() == exec)
304 continue;
305 if (op.isTemp())
306 next_uses[op.getTemp()] = idx;
307 }
308 for (const Definition& def : instr->definitions) {
309 if (def.isTemp())
310 next_uses.erase(def.getTemp());
311 }
312 local_next_uses[idx] = next_uses;
313 }
314 return local_next_uses;
315 }
316
317
318 RegisterDemand init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
319 {
320 RegisterDemand spilled_registers;
321
322 /* first block, nothing was spilled before */
323 if (block_idx == 0)
324 return {0, 0};
325
326 /* loop header block */
327 if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) {
328 assert(block->linear_preds[0] == block_idx - 1);
329 assert(block->logical_preds[0] == block_idx - 1);
330
331 /* create new loop_info */
332 ctx.loop_header.emplace(block);
333
334 /* check how many live-through variables should be spilled */
335 RegisterDemand new_demand;
336 unsigned i = block_idx;
337 while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth) {
338 assert(ctx.program->blocks.size() > i);
339 new_demand.update(ctx.program->blocks[i].register_demand);
340 i++;
341 }
342 unsigned loop_end = i;
343
344 /* select live-through vgpr variables */
345 while (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) {
346 unsigned distance = 0;
347 Temp to_spill;
348 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block_idx - 1]) {
349 if (pair.first.type() == RegType::vgpr &&
350 pair.second.first >= loop_end &&
351 pair.second.second > distance &&
352 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
353 to_spill = pair.first;
354 distance = pair.second.second;
355 }
356 }
357 if (distance == 0)
358 break;
359
360 uint32_t spill_id;
361 if (ctx.spills_exit[block_idx - 1].find(to_spill) == ctx.spills_exit[block_idx - 1].end()) {
362 spill_id = ctx.allocate_spill_id(to_spill.regClass());
363 } else {
364 spill_id = ctx.spills_exit[block_idx - 1][to_spill];
365 }
366
367 ctx.spills_entry[block_idx][to_spill] = spill_id;
368 spilled_registers.vgpr += to_spill.size();
369 }
370
371 /* select live-through sgpr variables */
372 while (new_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) {
373 unsigned distance = 0;
374 Temp to_spill;
375 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_end[block_idx - 1]) {
376 if (pair.first.type() == RegType::sgpr &&
377 pair.second.first >= loop_end &&
378 pair.second.second > distance &&
379 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
380 to_spill = pair.first;
381 distance = pair.second.second;
382 }
383 }
384 if (distance == 0)
385 break;
386
387 uint32_t spill_id;
388 if (ctx.spills_exit[block_idx - 1].find(to_spill) == ctx.spills_exit[block_idx - 1].end()) {
389 spill_id = ctx.allocate_spill_id(to_spill.regClass());
390 } else {
391 spill_id = ctx.spills_exit[block_idx - 1][to_spill];
392 }
393
394 ctx.spills_entry[block_idx][to_spill] = spill_id;
395 spilled_registers.sgpr += to_spill.size();
396 }
397
398
399
400 /* shortcut */
401 if (!RegisterDemand(new_demand - spilled_registers).exceeds(ctx.target_pressure))
402 return spilled_registers;
403
404 /* if reg pressure is too high at beginning of loop, add variables with furthest use */
405 unsigned idx = 0;
406 while (block->instructions[idx]->opcode == aco_opcode::p_phi || block->instructions[idx]->opcode == aco_opcode::p_linear_phi)
407 idx++;
408
409 assert(idx != 0 && "loop without phis: TODO");
410 idx--;
411 RegisterDemand reg_pressure = ctx.register_demand[block_idx][idx] - spilled_registers;
412 while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
413 unsigned distance = 0;
414 Temp to_spill;
415 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
416 if (pair.first.type() == RegType::sgpr &&
417 pair.second.second > distance &&
418 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
419 to_spill = pair.first;
420 distance = pair.second.second;
421 }
422 }
423 assert(distance != 0);
424
425 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
426 spilled_registers.sgpr += to_spill.size();
427 reg_pressure.sgpr -= to_spill.size();
428 }
429 while (reg_pressure.vgpr > ctx.target_pressure.vgpr) {
430 unsigned distance = 0;
431 Temp to_spill;
432 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
433 if (pair.first.type() == RegType::vgpr &&
434 pair.second.second > distance &&
435 ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) {
436 to_spill = pair.first;
437 distance = pair.second.second;
438 }
439 }
440 assert(distance != 0);
441 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
442 spilled_registers.vgpr += to_spill.size();
443 reg_pressure.vgpr -= to_spill.size();
444 }
445
446 return spilled_registers;
447 }
448
449 /* branch block */
450 if (block->linear_preds.size() == 1) {
451 /* keep variables spilled if they are alive and not used in the current block */
452 unsigned pred_idx = block->linear_preds[0];
453 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
454 if (pair.first.type() == RegType::sgpr &&
455 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
456 ctx.next_use_distances_start[block_idx][pair.first].second > block_idx) {
457 ctx.spills_entry[block_idx].insert(pair);
458 spilled_registers.sgpr += pair.first.size();
459 }
460 }
461 if (block->logical_preds.size() == 1) {
462 pred_idx = block->logical_preds[0];
463 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
464 if (pair.first.type() == RegType::vgpr &&
465 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
466 ctx.next_use_distances_end[pred_idx][pair.first].second > block_idx) {
467 ctx.spills_entry[block_idx].insert(pair);
468 spilled_registers.vgpr += pair.first.size();
469 }
470 }
471 }
472
473 /* if register demand is still too high, we just keep all spilled live vars and process the block */
474 if (block->register_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) {
475 pred_idx = block->linear_preds[0];
476 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
477 if (pair.first.type() == RegType::sgpr &&
478 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
479 ctx.spills_entry[block_idx].insert(pair).second) {
480 spilled_registers.sgpr += pair.first.size();
481 }
482 }
483 }
484 if (block->register_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr && block->logical_preds.size() == 1) {
485 pred_idx = block->logical_preds[0];
486 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
487 if (pair.first.type() == RegType::vgpr &&
488 ctx.next_use_distances_start[block_idx].find(pair.first) != ctx.next_use_distances_start[block_idx].end() &&
489 ctx.spills_entry[block_idx].insert(pair).second) {
490 spilled_registers.vgpr += pair.first.size();
491 }
492 }
493 }
494
495 return spilled_registers;
496 }
497
498 /* else: merge block */
499 std::set<Temp> partial_spills;
500
501 /* keep variables spilled on all incoming paths */
502 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
503 std::vector<unsigned>& preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
504 /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload it.
505 * Otherwise, if any predecessor reloads it, ensure it's reloaded on all other predecessors.
506 * The idea is that it's better in practice to rematerialize redundantly than to create lots of phis. */
507 /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db doesn't seem to exercise this path much) */
508 bool remat = ctx.remat.count(pair.first);
509 bool spill = !remat;
510 uint32_t spill_id = 0;
511 for (unsigned pred_idx : preds) {
512 /* variable is not even live at the predecessor: probably from a phi */
513 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end()) {
514 spill = false;
515 break;
516 }
517 if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end()) {
518 if (!remat)
519 spill = false;
520 } else {
521 partial_spills.insert(pair.first);
522 /* it might be that on one incoming path, the variable has a different spill_id, but add_couple_code() will take care of that. */
523 spill_id = ctx.spills_exit[pred_idx][pair.first];
524 if (remat)
525 spill = true;
526 }
527 }
528 if (spill) {
529 ctx.spills_entry[block_idx][pair.first] = spill_id;
530 partial_spills.erase(pair.first);
531 spilled_registers += pair.first;
532 }
533 }
534
535 /* same for phis */
536 unsigned idx = 0;
537 while (block->instructions[idx]->opcode == aco_opcode::p_linear_phi ||
538 block->instructions[idx]->opcode == aco_opcode::p_phi) {
539 aco_ptr<Instruction>& phi = block->instructions[idx];
540 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
541 bool spill = true;
542
543 for (unsigned i = 0; i < phi->operands.size(); i++) {
544 if (!phi->operands[i].isTemp())
545 spill = false;
546 else if (ctx.spills_exit[preds[i]].find(phi->operands[i].getTemp()) == ctx.spills_exit[preds[i]].end())
547 spill = false;
548 else
549 partial_spills.insert(phi->definitions[0].getTemp());
550 }
551 if (spill) {
552 ctx.spills_entry[block_idx][phi->definitions[0].getTemp()] = ctx.allocate_spill_id(phi->definitions[0].regClass());
553 partial_spills.erase(phi->definitions[0].getTemp());
554 spilled_registers += phi->definitions[0].getTemp();
555 }
556
557 idx++;
558 }
559
560 /* if reg pressure at first instruction is still too high, add partially spilled variables */
561 RegisterDemand reg_pressure;
562 if (idx == 0) {
563 for (const Definition& def : block->instructions[idx]->definitions) {
564 if (def.isTemp()) {
565 reg_pressure -= def.getTemp();
566 }
567 }
568 for (const Operand& op : block->instructions[idx]->operands) {
569 if (op.isTemp() && op.isFirstKill()) {
570 reg_pressure += op.getTemp();
571 }
572 }
573 } else {
574 idx--;
575 }
576 reg_pressure += ctx.register_demand[block_idx][idx] - spilled_registers;
577
578 while (reg_pressure.sgpr > ctx.target_pressure.sgpr) {
579 assert(!partial_spills.empty());
580
581 std::set<Temp>::iterator it = partial_spills.begin();
582 Temp to_spill = *it;
583 unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
584 while (it != partial_spills.end()) {
585 assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
586
587 if (it->type() == RegType::sgpr && ctx.next_use_distances_start[block_idx][*it].second > distance) {
588 distance = ctx.next_use_distances_start[block_idx][*it].second;
589 to_spill = *it;
590 }
591 ++it;
592 }
593 assert(distance != 0);
594
595 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
596 partial_spills.erase(to_spill);
597 spilled_registers.sgpr += to_spill.size();
598 reg_pressure.sgpr -= to_spill.size();
599 }
600
601 while (reg_pressure.vgpr > ctx.target_pressure.vgpr) {
602 assert(!partial_spills.empty());
603
604 std::set<Temp>::iterator it = partial_spills.begin();
605 Temp to_spill = *it;
606 unsigned distance = ctx.next_use_distances_start[block_idx][*it].second;
607 while (it != partial_spills.end()) {
608 assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end());
609
610 if (it->type() == RegType::vgpr && ctx.next_use_distances_start[block_idx][*it].second > distance) {
611 distance = ctx.next_use_distances_start[block_idx][*it].second;
612 to_spill = *it;
613 }
614 ++it;
615 }
616 assert(distance != 0);
617
618 ctx.spills_entry[block_idx][to_spill] = ctx.allocate_spill_id(to_spill.regClass());
619 partial_spills.erase(to_spill);
620 spilled_registers.vgpr += to_spill.size();
621 reg_pressure.vgpr -= to_spill.size();
622 }
623
624 return spilled_registers;
625 }
626
627
628 void add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
629 {
630 /* no coupling code necessary */
631 if (block->linear_preds.size() == 0)
632 return;
633
634 std::vector<aco_ptr<Instruction>> instructions;
635 /* branch block: TODO take other branch into consideration */
636 if (block->linear_preds.size() == 1) {
637 assert(ctx.processed[block->linear_preds[0]]);
638
639 if (block->logical_preds.size() == 1) {
640 unsigned pred_idx = block->logical_preds[0];
641 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> live : ctx.next_use_distances_start[block_idx]) {
642 if (live.first.type() == RegType::sgpr)
643 continue;
644 /* still spilled */
645 if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end())
646 continue;
647
648 /* in register at end of predecessor */
649 if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) {
650 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first);
651 if (it != ctx.renames[pred_idx].end())
652 ctx.renames[block_idx].insert(*it);
653 continue;
654 }
655
656 /* variable is spilled at predecessor and live at current block: create reload instruction */
657 Temp new_name = {ctx.program->allocateId(), live.first.regClass()};
658 aco_ptr<Instruction> reload = do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]);
659 instructions.emplace_back(std::move(reload));
660 ctx.renames[block_idx][live.first] = new_name;
661 }
662 }
663
664 unsigned pred_idx = block->linear_preds[0];
665 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> live : ctx.next_use_distances_start[block_idx]) {
666 if (live.first.type() == RegType::vgpr)
667 continue;
668 /* still spilled */
669 if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end())
670 continue;
671
672 /* in register at end of predecessor */
673 if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) {
674 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(live.first);
675 if (it != ctx.renames[pred_idx].end())
676 ctx.renames[block_idx].insert(*it);
677 continue;
678 }
679
680 /* variable is spilled at predecessor and live at current block: create reload instruction */
681 Temp new_name = {ctx.program->allocateId(), live.first.regClass()};
682 aco_ptr<Instruction> reload = do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]);
683 instructions.emplace_back(std::move(reload));
684 ctx.renames[block_idx][live.first] = new_name;
685 }
686
687 /* combine new reload instructions with original block */
688 if (!instructions.empty()) {
689 unsigned insert_idx = 0;
690 while (block->instructions[insert_idx]->opcode == aco_opcode::p_phi ||
691 block->instructions[insert_idx]->opcode == aco_opcode::p_linear_phi) {
692 insert_idx++;
693 }
694 ctx.register_demand[block->index].insert(std::next(ctx.register_demand[block->index].begin(), insert_idx),
695 instructions.size(), RegisterDemand());
696 block->instructions.insert(std::next(block->instructions.begin(), insert_idx),
697 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(instructions.begin()),
698 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(instructions.end()));
699 }
700 return;
701 }
702
703 /* loop header and merge blocks: check if all (linear) predecessors have been processed */
704 for (ASSERTED unsigned pred : block->linear_preds)
705 assert(ctx.processed[pred]);
706
707 /* iterate the phi nodes for which operands to spill at the predecessor */
708 for (aco_ptr<Instruction>& phi : block->instructions) {
709 if (phi->opcode != aco_opcode::p_phi &&
710 phi->opcode != aco_opcode::p_linear_phi)
711 break;
712
713 /* if the phi is not spilled, add to instructions */
714 if (ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == ctx.spills_entry[block_idx].end()) {
715 instructions.emplace_back(std::move(phi));
716 continue;
717 }
718
719 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
720 uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
721
722 for (unsigned i = 0; i < phi->operands.size(); i++) {
723 unsigned pred_idx = preds[i];
724
725 /* we have to spill constants to the same memory address */
726 if (phi->operands[i].isConstant()) {
727 uint32_t spill_id = ctx.allocate_spill_id(phi->definitions[0].regClass());
728 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
729 ctx.interferences[def_spill_id].second.emplace(pair.second);
730 ctx.interferences[pair.second].second.emplace(def_spill_id);
731 }
732 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{def_spill_id, spill_id});
733
734 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
735 spill->operands[0] = phi->operands[i];
736 spill->operands[1] = Operand(spill_id);
737 Block& pred = ctx.program->blocks[pred_idx];
738 unsigned idx = pred.instructions.size();
739 do {
740 assert(idx != 0);
741 idx--;
742 } while (phi->opcode == aco_opcode::p_phi && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
743 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
744 pred.instructions.insert(it, std::move(spill));
745 continue;
746 }
747 if (!phi->operands[i].isTemp())
748 continue;
749
750 /* build interferences between the phi def and all spilled variables at the predecessor blocks */
751 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
752 if (phi->operands[i].getTemp() == pair.first)
753 continue;
754 ctx.interferences[def_spill_id].second.emplace(pair.second);
755 ctx.interferences[pair.second].second.emplace(def_spill_id);
756 }
757
758 /* variable is already spilled at predecessor */
759 std::map<Temp, uint32_t>::iterator spilled = ctx.spills_exit[pred_idx].find(phi->operands[i].getTemp());
760 if (spilled != ctx.spills_exit[pred_idx].end()) {
761 if (spilled->second != def_spill_id)
762 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{def_spill_id, spilled->second});
763 continue;
764 }
765
766 /* rename if necessary */
767 Temp var = phi->operands[i].getTemp();
768 std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
769 if (rename_it != ctx.renames[pred_idx].end()) {
770 var = rename_it->second;
771 ctx.renames[pred_idx].erase(rename_it);
772 }
773
774 uint32_t spill_id = ctx.allocate_spill_id(phi->definitions[0].regClass());
775 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{def_spill_id, spill_id});
776 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
777 spill->operands[0] = Operand(var);
778 spill->operands[1] = Operand(spill_id);
779 Block& pred = ctx.program->blocks[pred_idx];
780 unsigned idx = pred.instructions.size();
781 do {
782 assert(idx != 0);
783 idx--;
784 } while (phi->opcode == aco_opcode::p_phi && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
785 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
786 pred.instructions.insert(it, std::move(spill));
787 ctx.spills_exit[pred_idx][phi->operands[i].getTemp()] = spill_id;
788 }
789
790 /* remove phi from instructions */
791 phi.reset();
792 }
793
794 /* iterate all (other) spilled variables for which to spill at the predecessor */
795 // TODO: would be better to have them sorted: first vgprs and first with longest distance
796 for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
797 std::vector<unsigned> preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
798
799 for (unsigned pred_idx : preds) {
800 /* add interferences between spilled variable and predecessors exit spills */
801 for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx]) {
802 if (exit_spill.first == pair.first)
803 continue;
804 ctx.interferences[exit_spill.second].second.emplace(pair.second);
805 ctx.interferences[pair.second].second.emplace(exit_spill.second);
806 }
807
808 /* variable is already spilled at predecessor */
809 std::map<Temp, uint32_t>::iterator spilled = ctx.spills_exit[pred_idx].find(pair.first);
810 if (spilled != ctx.spills_exit[pred_idx].end()) {
811 if (spilled->second != pair.second)
812 ctx.affinities.emplace_back(std::pair<uint32_t, uint32_t>{pair.second, spilled->second});
813 continue;
814 }
815
816 /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */ // FIXME: lower_to_cssa()
817 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end())
818 continue;
819
820 /* variable is in register at predecessor and has to be spilled */
821 /* rename if necessary */
822 Temp var = pair.first;
823 std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
824 if (rename_it != ctx.renames[pred_idx].end()) {
825 var = rename_it->second;
826 ctx.renames[pred_idx].erase(rename_it);
827 }
828
829 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
830 spill->operands[0] = Operand(var);
831 spill->operands[1] = Operand(pair.second);
832 Block& pred = ctx.program->blocks[pred_idx];
833 unsigned idx = pred.instructions.size();
834 do {
835 assert(idx != 0);
836 idx--;
837 } while (pair.first.type() == RegType::vgpr && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
838 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
839 pred.instructions.insert(it, std::move(spill));
840 ctx.spills_exit[pred.index][pair.first] = pair.second;
841 }
842 }
843
844 /* iterate phis for which operands to reload */
845 for (aco_ptr<Instruction>& phi : instructions) {
846 assert(phi->opcode == aco_opcode::p_phi || phi->opcode == aco_opcode::p_linear_phi);
847 assert(ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == ctx.spills_entry[block_idx].end());
848
849 std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
850 for (unsigned i = 0; i < phi->operands.size(); i++) {
851 if (!phi->operands[i].isTemp())
852 continue;
853 unsigned pred_idx = preds[i];
854
855 /* rename operand */
856 if (ctx.spills_exit[pred_idx].find(phi->operands[i].getTemp()) == ctx.spills_exit[pred_idx].end()) {
857 std::map<Temp, Temp>::iterator it = ctx.renames[pred_idx].find(phi->operands[i].getTemp());
858 if (it != ctx.renames[pred_idx].end())
859 phi->operands[i].setTemp(it->second);
860 continue;
861 }
862
863 Temp tmp = phi->operands[i].getTemp();
864
865 /* reload phi operand at end of predecessor block */
866 Temp new_name = {ctx.program->allocateId(), tmp.regClass()};
867 Block& pred = ctx.program->blocks[pred_idx];
868 unsigned idx = pred.instructions.size();
869 do {
870 assert(idx != 0);
871 idx--;
872 } while (phi->opcode == aco_opcode::p_phi && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
873 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
874
875 aco_ptr<Instruction> reload = do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]);
876 pred.instructions.insert(it, std::move(reload));
877
878 ctx.spills_exit[pred_idx].erase(tmp);
879 ctx.renames[pred_idx][tmp] = new_name;
880 phi->operands[i].setTemp(new_name);
881 }
882 }
883
884 /* iterate live variables for which to reload */
885 // TODO: reload at current block if variable is spilled on all predecessors
886 for (std::pair<Temp, std::pair<uint32_t, uint32_t>> pair : ctx.next_use_distances_start[block_idx]) {
887 /* skip spilled variables */
888 if (ctx.spills_entry[block_idx].find(pair.first) != ctx.spills_entry[block_idx].end())
889 continue;
890 std::vector<unsigned> preds = pair.first.type() == RegType::vgpr ? block->logical_preds : block->linear_preds;
891
892 /* variable is dead at predecessor, it must be from a phi */
893 bool is_dead = false;
894 for (unsigned pred_idx : preds) {
895 if (ctx.next_use_distances_end[pred_idx].find(pair.first) == ctx.next_use_distances_end[pred_idx].end())
896 is_dead = true;
897 }
898 if (is_dead)
899 continue;
900 for (unsigned pred_idx : preds) {
901 /* the variable is not spilled at the predecessor */
902 if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end())
903 continue;
904
905 /* variable is spilled at predecessor and has to be reloaded */
906 Temp new_name = {ctx.program->allocateId(), pair.first.regClass()};
907 Block& pred = ctx.program->blocks[pred_idx];
908 unsigned idx = pred.instructions.size();
909 do {
910 assert(idx != 0);
911 idx--;
912 } while (pair.first.type() == RegType::vgpr && pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
913 std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
914
915 aco_ptr<Instruction> reload = do_reload(ctx, pair.first, new_name, ctx.spills_exit[pred.index][pair.first]);
916 pred.instructions.insert(it, std::move(reload));
917
918 ctx.spills_exit[pred.index].erase(pair.first);
919 ctx.renames[pred.index][pair.first] = new_name;
920 }
921
922 /* check if we have to create a new phi for this variable */
923 Temp rename = Temp();
924 bool is_same = true;
925 for (unsigned pred_idx : preds) {
926 if (ctx.renames[pred_idx].find(pair.first) == ctx.renames[pred_idx].end()) {
927 if (rename == Temp())
928 rename = pair.first;
929 else
930 is_same = rename == pair.first;
931 } else {
932 if (rename == Temp())
933 rename = ctx.renames[pred_idx][pair.first];
934 else
935 is_same = rename == ctx.renames[pred_idx][pair.first];
936 }
937
938 if (!is_same)
939 break;
940 }
941
942 if (!is_same) {
943 /* the variable was renamed differently in the predecessors: we have to create a phi */
944 aco_opcode opcode = pair.first.type() == RegType::vgpr ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
945 aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(opcode, Format::PSEUDO, preds.size(), 1)};
946 rename = {ctx.program->allocateId(), pair.first.regClass()};
947 for (unsigned i = 0; i < phi->operands.size(); i++) {
948 Temp tmp;
949 if (ctx.renames[preds[i]].find(pair.first) != ctx.renames[preds[i]].end())
950 tmp = ctx.renames[preds[i]][pair.first];
951 else if (preds[i] >= block_idx)
952 tmp = rename;
953 else
954 tmp = pair.first;
955 phi->operands[i] = Operand(tmp);
956 }
957 phi->definitions[0] = Definition(rename);
958 instructions.emplace_back(std::move(phi));
959 }
960
961 /* the variable was renamed: add new name to renames */
962 if (!(rename == Temp() || rename == pair.first))
963 ctx.renames[block_idx][pair.first] = rename;
964 }
965
966 /* combine phis with instructions */
967 unsigned idx = 0;
968 while (!block->instructions[idx]) {
969 idx++;
970 }
971
972 ctx.register_demand[block->index].erase(ctx.register_demand[block->index].begin(), ctx.register_demand[block->index].begin() + idx);
973 ctx.register_demand[block->index].insert(ctx.register_demand[block->index].begin(), instructions.size(), RegisterDemand());
974
975 std::vector<aco_ptr<Instruction>>::iterator start = std::next(block->instructions.begin(), idx);
976 instructions.insert(instructions.end(), std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(start),
977 std::move_iterator<std::vector<aco_ptr<Instruction>>::iterator>(block->instructions.end()));
978 block->instructions = std::move(instructions);
979 }
980
981 void process_block(spill_ctx& ctx, unsigned block_idx, Block* block,
982 std::map<Temp, uint32_t> &current_spills, RegisterDemand spilled_registers)
983 {
984 std::vector<std::map<Temp, uint32_t>> local_next_use_distance;
985 std::vector<aco_ptr<Instruction>> instructions;
986 unsigned idx = 0;
987
988 /* phis are handled separetely */
989 while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
990 block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
991 aco_ptr<Instruction>& instr = block->instructions[idx];
992 for (const Operand& op : instr->operands) {
993 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
994 if (op.isTemp() && ctx.remat.count(op.getTemp()))
995 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
996 }
997 instructions.emplace_back(std::move(instr));
998 idx++;
999 }
1000
1001 if (block->register_demand.exceeds(ctx.target_pressure))
1002 local_next_use_distance = local_next_uses(ctx, block);
1003
1004 while (idx < block->instructions.size()) {
1005 aco_ptr<Instruction>& instr = block->instructions[idx];
1006
1007 std::map<Temp, std::pair<Temp, uint32_t>> reloads;
1008 std::map<Temp, uint32_t> spills;
1009 /* rename and reload operands */
1010 for (Operand& op : instr->operands) {
1011 if (!op.isTemp())
1012 continue;
1013 if (current_spills.find(op.getTemp()) == current_spills.end()) {
1014 /* the Operand is in register: check if it was renamed */
1015 if (ctx.renames[block_idx].find(op.getTemp()) != ctx.renames[block_idx].end())
1016 op.setTemp(ctx.renames[block_idx][op.getTemp()]);
1017 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
1018 if (ctx.remat.count(op.getTemp()))
1019 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
1020 continue;
1021 }
1022 /* the Operand is spilled: add it to reloads */
1023 Temp new_tmp = {ctx.program->allocateId(), op.regClass()};
1024 ctx.renames[block_idx][op.getTemp()] = new_tmp;
1025 reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]);
1026 current_spills.erase(op.getTemp());
1027 op.setTemp(new_tmp);
1028 spilled_registers -= new_tmp;
1029 }
1030
1031 /* check if register demand is low enough before and after the current instruction */
1032 if (block->register_demand.exceeds(ctx.target_pressure)) {
1033
1034 RegisterDemand new_demand = ctx.register_demand[block_idx][idx];
1035 if (idx == 0) {
1036 for (const Definition& def : instr->definitions) {
1037 if (!def.isTemp())
1038 continue;
1039 new_demand += def.getTemp();
1040 }
1041 } else {
1042 new_demand.update(ctx.register_demand[block_idx][idx - 1]);
1043 }
1044
1045 assert(!local_next_use_distance.empty());
1046
1047 /* if reg pressure is too high, spill variable with furthest next use */
1048 while (RegisterDemand(new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
1049 unsigned distance = 0;
1050 Temp to_spill;
1051 bool do_rematerialize = false;
1052 if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) {
1053 for (std::pair<Temp, uint32_t> pair : local_next_use_distance[idx]) {
1054 bool can_rematerialize = ctx.remat.count(pair.first);
1055 if (pair.first.type() == RegType::vgpr &&
1056 ((pair.second > distance && can_rematerialize == do_rematerialize) ||
1057 (can_rematerialize && !do_rematerialize && pair.second > idx)) &&
1058 current_spills.find(pair.first) == current_spills.end() &&
1059 ctx.spills_exit[block_idx].find(pair.first) == ctx.spills_exit[block_idx].end()) {
1060 to_spill = pair.first;
1061 distance = pair.second;
1062 do_rematerialize = can_rematerialize;
1063 }
1064 }
1065 } else {
1066 for (std::pair<Temp, uint32_t> pair : local_next_use_distance[idx]) {
1067 bool can_rematerialize = ctx.remat.count(pair.first);
1068 if (pair.first.type() == RegType::sgpr &&
1069 ((pair.second > distance && can_rematerialize == do_rematerialize) ||
1070 (can_rematerialize && !do_rematerialize && pair.second > idx)) &&
1071 current_spills.find(pair.first) == current_spills.end() &&
1072 ctx.spills_exit[block_idx].find(pair.first) == ctx.spills_exit[block_idx].end()) {
1073 to_spill = pair.first;
1074 distance = pair.second;
1075 do_rematerialize = can_rematerialize;
1076 }
1077 }
1078 }
1079
1080 assert(distance != 0 && distance > idx);
1081 uint32_t spill_id = ctx.allocate_spill_id(to_spill.regClass());
1082
1083 /* add interferences with currently spilled variables */
1084 for (std::pair<Temp, uint32_t> pair : current_spills) {
1085 ctx.interferences[spill_id].second.emplace(pair.second);
1086 ctx.interferences[pair.second].second.emplace(spill_id);
1087 }
1088 for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads) {
1089 ctx.interferences[spill_id].second.emplace(pair.second.second);
1090 ctx.interferences[pair.second.second].second.emplace(spill_id);
1091 }
1092
1093 current_spills[to_spill] = spill_id;
1094 spilled_registers += to_spill;
1095
1096 /* rename if necessary */
1097 if (ctx.renames[block_idx].find(to_spill) != ctx.renames[block_idx].end()) {
1098 to_spill = ctx.renames[block_idx][to_spill];
1099 }
1100
1101 /* add spill to new instructions */
1102 aco_ptr<Pseudo_instruction> spill{create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
1103 spill->operands[0] = Operand(to_spill);
1104 spill->operands[1] = Operand(spill_id);
1105 instructions.emplace_back(std::move(spill));
1106 }
1107 }
1108
1109 /* add reloads and instruction to new instructions */
1110 for (std::pair<Temp, std::pair<Temp, uint32_t>> pair : reloads) {
1111 aco_ptr<Instruction> reload = do_reload(ctx, pair.second.first, pair.first, pair.second.second);
1112 instructions.emplace_back(std::move(reload));
1113 }
1114 instructions.emplace_back(std::move(instr));
1115 idx++;
1116 }
1117
1118 block->instructions = std::move(instructions);
1119 ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end());
1120 }
1121
1122 void spill_block(spill_ctx& ctx, unsigned block_idx)
1123 {
1124 Block* block = &ctx.program->blocks[block_idx];
1125 ctx.processed[block_idx] = true;
1126
1127 /* determine set of variables which are spilled at the beginning of the block */
1128 RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
1129
1130 /* add interferences for spilled variables */
1131 for (std::pair<Temp, uint32_t> x : ctx.spills_entry[block_idx]) {
1132 for (std::pair<Temp, uint32_t> y : ctx.spills_entry[block_idx])
1133 if (x.second != y.second)
1134 ctx.interferences[x.second].second.emplace(y.second);
1135 }
1136
1137 bool is_loop_header = block->loop_nest_depth && ctx.loop_header.top()->index == block_idx;
1138 if (!is_loop_header) {
1139 /* add spill/reload code on incoming control flow edges */
1140 add_coupling_code(ctx, block, block_idx);
1141 }
1142
1143 std::map<Temp, uint32_t> current_spills = ctx.spills_entry[block_idx];
1144
1145 /* check conditions to process this block */
1146 bool process = RegisterDemand(block->register_demand - spilled_registers).exceeds(ctx.target_pressure) ||
1147 !ctx.renames[block_idx].empty() ||
1148 ctx.remat_used.size();
1149
1150 std::map<Temp, uint32_t>::iterator it = current_spills.begin();
1151 while (!process && it != current_spills.end()) {
1152 if (ctx.next_use_distances_start[block_idx][it->first].first == block_idx)
1153 process = true;
1154 ++it;
1155 }
1156
1157 if (process)
1158 process_block(ctx, block_idx, block, current_spills, spilled_registers);
1159 else
1160 ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end());
1161
1162 /* check if the next block leaves the current loop */
1163 if (block->loop_nest_depth == 0 || ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth)
1164 return;
1165
1166 Block* loop_header = ctx.loop_header.top();
1167
1168 /* preserve original renames at end of loop header block */
1169 std::map<Temp, Temp> renames = std::move(ctx.renames[loop_header->index]);
1170
1171 /* add coupling code to all loop header predecessors */
1172 add_coupling_code(ctx, loop_header, loop_header->index);
1173
1174 /* update remat_used for phis added in add_coupling_code() */
1175 for (aco_ptr<Instruction>& instr : loop_header->instructions) {
1176 if (!is_phi(instr))
1177 break;
1178 for (const Operand& op : instr->operands) {
1179 if (op.isTemp() && ctx.remat.count(op.getTemp()))
1180 ctx.remat_used[ctx.remat[op.getTemp()].instr] = true;
1181 }
1182 }
1183
1184 /* propagate new renames through loop: i.e. repair the SSA */
1185 renames.swap(ctx.renames[loop_header->index]);
1186 for (std::pair<Temp, Temp> rename : renames) {
1187 for (unsigned idx = loop_header->index; idx <= block_idx; idx++) {
1188 Block& current = ctx.program->blocks[idx];
1189 std::vector<aco_ptr<Instruction>>::iterator instr_it = current.instructions.begin();
1190
1191 /* first rename phis */
1192 while (instr_it != current.instructions.end()) {
1193 aco_ptr<Instruction>& phi = *instr_it;
1194 if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
1195 break;
1196 /* no need to rename the loop header phis once again. this happened in add_coupling_code() */
1197 if (idx == loop_header->index) {
1198 instr_it++;
1199 continue;
1200 }
1201
1202 for (Operand& op : phi->operands) {
1203 if (!op.isTemp())
1204 continue;
1205 if (op.getTemp() == rename.first)
1206 op.setTemp(rename.second);
1207 }
1208 instr_it++;
1209 }
1210
1211 std::map<Temp, std::pair<uint32_t, uint32_t>>::iterator it = ctx.next_use_distances_start[idx].find(rename.first);
1212
1213 /* variable is not live at beginning of this block */
1214 if (it == ctx.next_use_distances_start[idx].end())
1215 continue;
1216
1217 /* if the variable is live at the block's exit, add rename */
1218 if (ctx.next_use_distances_end[idx].find(rename.first) != ctx.next_use_distances_end[idx].end())
1219 ctx.renames[idx].insert(rename);
1220
1221 /* rename all uses in this block */
1222 bool renamed = false;
1223 while (!renamed && instr_it != current.instructions.end()) {
1224 aco_ptr<Instruction>& instr = *instr_it;
1225 for (Operand& op : instr->operands) {
1226 if (!op.isTemp())
1227 continue;
1228 if (op.getTemp() == rename.first) {
1229 op.setTemp(rename.second);
1230 /* we can stop with this block as soon as the variable is spilled */
1231 if (instr->opcode == aco_opcode::p_spill)
1232 renamed = true;
1233 }
1234 }
1235 instr_it++;
1236 }
1237 }
1238 }
1239
1240 /* remove loop header info from stack */
1241 ctx.loop_header.pop();
1242 }
1243
1244 void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
1245 std::map<uint32_t, uint32_t> sgpr_slot;
1246 std::map<uint32_t, uint32_t> vgpr_slot;
1247 std::vector<bool> is_assigned(ctx.interferences.size());
1248
1249 /* first, handle affinities: just merge all interferences into both spill ids */
1250 for (std::pair<uint32_t, uint32_t> pair : ctx.affinities) {
1251 assert(pair.first != pair.second);
1252 for (uint32_t id : ctx.interferences[pair.first].second)
1253 ctx.interferences[id].second.insert(pair.second);
1254 for (uint32_t id : ctx.interferences[pair.second].second)
1255 ctx.interferences[id].second.insert(pair.first);
1256 ctx.interferences[pair.first].second.insert(ctx.interferences[pair.second].second.begin(), ctx.interferences[pair.second].second.end());
1257 ctx.interferences[pair.second].second.insert(ctx.interferences[pair.first].second.begin(), ctx.interferences[pair.first].second.end());
1258
1259 bool reloaded = ctx.is_reloaded[pair.first] || ctx.is_reloaded[pair.second];
1260 ctx.is_reloaded[pair.first] = ctx.is_reloaded[pair.second] = reloaded;
1261 }
1262 for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++)
1263 for (ASSERTED uint32_t id : ctx.interferences[i].second)
1264 assert(i != id);
1265
1266 /* for each spill slot, assign as many spill ids as possible */
1267 std::vector<std::set<uint32_t>> spill_slot_interferences;
1268 unsigned slot_idx = 0;
1269 bool done = false;
1270
1271 /* assign sgpr spill slots */
1272 while (!done) {
1273 done = true;
1274 for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1275 if (is_assigned[id] || !ctx.is_reloaded[id])
1276 continue;
1277 if (ctx.interferences[id].first.type() != RegType::sgpr)
1278 continue;
1279
1280 /* check interferences */
1281 bool interferes = false;
1282 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
1283 if (i == spill_slot_interferences.size())
1284 spill_slot_interferences.emplace_back(std::set<uint32_t>());
1285 if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / 64 != slot_idx / 64) {
1286 interferes = true;
1287 break;
1288 }
1289 }
1290 if (interferes) {
1291 done = false;
1292 continue;
1293 }
1294
1295 /* we found a spill id which can be assigned to current spill slot */
1296 sgpr_slot[id] = slot_idx;
1297 is_assigned[id] = true;
1298 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
1299 spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
1300 }
1301 slot_idx++;
1302 }
1303
1304 slot_idx = 0;
1305 done = false;
1306
1307 /* assign vgpr spill slots */
1308 while (!done) {
1309 done = true;
1310 for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1311 if (is_assigned[id] || !ctx.is_reloaded[id])
1312 continue;
1313 if (ctx.interferences[id].first.type() != RegType::vgpr)
1314 continue;
1315
1316 /* check interferences */
1317 bool interferes = false;
1318 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++) {
1319 if (i == spill_slot_interferences.size())
1320 spill_slot_interferences.emplace_back(std::set<uint32_t>());
1321 /* check for interference and ensure that vector regs are stored next to each other */
1322 if (spill_slot_interferences[i].find(id) != spill_slot_interferences[i].end() || i / 64 != slot_idx / 64) {
1323 interferes = true;
1324 break;
1325 }
1326 }
1327 if (interferes) {
1328 done = false;
1329 continue;
1330 }
1331
1332 /* we found a spill id which can be assigned to current spill slot */
1333 vgpr_slot[id] = slot_idx;
1334 is_assigned[id] = true;
1335 for (unsigned i = slot_idx; i < slot_idx + ctx.interferences[id].first.size(); i++)
1336 spill_slot_interferences[i].insert(ctx.interferences[id].second.begin(), ctx.interferences[id].second.end());
1337 }
1338 slot_idx++;
1339 }
1340
1341 for (unsigned id = 0; id < is_assigned.size(); id++)
1342 assert(is_assigned[id] || !ctx.is_reloaded[id]);
1343
1344 for (std::pair<uint32_t, uint32_t> pair : ctx.affinities) {
1345 assert(is_assigned[pair.first] == is_assigned[pair.second]);
1346 if (!is_assigned[pair.first])
1347 continue;
1348 assert(ctx.is_reloaded[pair.first] == ctx.is_reloaded[pair.second]);
1349 assert(ctx.interferences[pair.first].first.type() == ctx.interferences[pair.second].first.type());
1350 if (ctx.interferences[pair.first].first.type() == RegType::sgpr)
1351 assert(sgpr_slot[pair.first] == sgpr_slot[pair.second]);
1352 else
1353 assert(vgpr_slot[pair.first] == vgpr_slot[pair.second]);
1354 }
1355
1356 /* hope, we didn't mess up */
1357 std::vector<Temp> vgpr_spill_temps((spill_slot_interferences.size() + 63) / 64);
1358 assert(vgpr_spill_temps.size() <= spills_to_vgpr);
1359
1360 /* replace pseudo instructions with actual hardware instructions */
1361 unsigned last_top_level_block_idx = 0;
1362 std::vector<bool> reload_in_loop(vgpr_spill_temps.size());
1363 for (Block& block : ctx.program->blocks) {
1364
1365 /* after loops, we insert a user if there was a reload inside the loop */
1366 if (block.loop_nest_depth == 0) {
1367 int end_vgprs = 0;
1368 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1369 if (reload_in_loop[i])
1370 end_vgprs++;
1371 }
1372
1373 if (end_vgprs > 0) {
1374 aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)};
1375 int k = 0;
1376 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1377 if (reload_in_loop[i])
1378 destr->operands[k++] = Operand(vgpr_spill_temps[i]);
1379 reload_in_loop[i] = false;
1380 }
1381 /* find insertion point */
1382 std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1383 while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
1384 ++it;
1385 block.instructions.insert(it, std::move(destr));
1386 }
1387 }
1388
1389 if (block.kind & block_kind_top_level && !block.linear_preds.empty()) {
1390 last_top_level_block_idx = block.index;
1391
1392 /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
1393 for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1394 if (vgpr_spill_temps[i] == Temp())
1395 continue;
1396
1397 bool can_destroy = true;
1398 for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[block.linear_preds[0]]) {
1399
1400 if (sgpr_slot.find(pair.second) != sgpr_slot.end() &&
1401 sgpr_slot[pair.second] / 64 == i) {
1402 can_destroy = false;
1403 break;
1404 }
1405 }
1406 if (can_destroy)
1407 vgpr_spill_temps[i] = Temp();
1408 }
1409 }
1410
1411 std::vector<aco_ptr<Instruction>>::iterator it;
1412 std::vector<aco_ptr<Instruction>> instructions;
1413 instructions.reserve(block.instructions.size());
1414 for (it = block.instructions.begin(); it != block.instructions.end(); ++it) {
1415
1416 if ((*it)->opcode == aco_opcode::p_spill) {
1417 uint32_t spill_id = (*it)->operands[1].constantValue();
1418
1419 if (!ctx.is_reloaded[spill_id]) {
1420 /* never reloaded, so don't spill */
1421 } else if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
1422 /* spill vgpr */
1423 ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
1424
1425 assert(false && "vgpr spilling not yet implemented.");
1426 } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
1427 ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
1428
1429 uint32_t spill_slot = sgpr_slot[spill_id];
1430
1431 /* check if the linear vgpr already exists */
1432 if (vgpr_spill_temps[spill_slot / 64] == Temp()) {
1433 Temp linear_vgpr = {ctx.program->allocateId(), v1.as_linear()};
1434 vgpr_spill_temps[spill_slot / 64] = linear_vgpr;
1435 aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1436 create->definitions[0] = Definition(linear_vgpr);
1437 /* find the right place to insert this definition */
1438 if (last_top_level_block_idx == block.index) {
1439 /* insert right before the current instruction */
1440 instructions.emplace_back(std::move(create));
1441 } else {
1442 assert(last_top_level_block_idx < block.index);
1443 /* insert before the branch at last top level block */
1444 std::vector<aco_ptr<Instruction>>& instructions = ctx.program->blocks[last_top_level_block_idx].instructions;
1445 instructions.insert(std::next(instructions.begin(), instructions.size() - 1), std::move(create));
1446 }
1447 }
1448
1449 /* spill sgpr: just add the vgpr temp to operands */
1450 Pseudo_instruction* spill = create_instruction<Pseudo_instruction>(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
1451 spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / 64]);
1452 spill->operands[1] = Operand(spill_slot % 64);
1453 spill->operands[2] = (*it)->operands[0];
1454 instructions.emplace_back(aco_ptr<Instruction>(spill));
1455 } else {
1456 unreachable("No spill slot assigned for spill id");
1457 }
1458
1459 } else if ((*it)->opcode == aco_opcode::p_reload) {
1460 uint32_t spill_id = (*it)->operands[0].constantValue();
1461 assert(ctx.is_reloaded[spill_id]);
1462
1463 if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
1464 /* reload vgpr */
1465 assert(false && "vgpr spilling not yet implemented.");
1466
1467 } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
1468 uint32_t spill_slot = sgpr_slot[spill_id];
1469 reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;
1470
1471 /* check if the linear vgpr already exists */
1472 if (vgpr_spill_temps[spill_slot / 64] == Temp()) {
1473 Temp linear_vgpr = {ctx.program->allocateId(), v1.as_linear()};
1474 vgpr_spill_temps[spill_slot / 64] = linear_vgpr;
1475 aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1476 create->definitions[0] = Definition(linear_vgpr);
1477 /* find the right place to insert this definition */
1478 if (last_top_level_block_idx == block.index) {
1479 /* insert right before the current instruction */
1480 instructions.emplace_back(std::move(create));
1481 } else {
1482 assert(last_top_level_block_idx < block.index);
1483 /* insert before the branch at last top level block */
1484 std::vector<aco_ptr<Instruction>>& instructions = ctx.program->blocks[last_top_level_block_idx].instructions;
1485 instructions.insert(std::next(instructions.begin(), instructions.size() - 1), std::move(create));
1486 }
1487 }
1488
1489 /* reload sgpr: just add the vgpr temp to operands */
1490 Pseudo_instruction* reload = create_instruction<Pseudo_instruction>(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
1491 reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / 64]);
1492 reload->operands[1] = Operand(spill_slot % 64);
1493 reload->definitions[0] = (*it)->definitions[0];
1494 instructions.emplace_back(aco_ptr<Instruction>(reload));
1495 } else {
1496 unreachable("No spill slot assigned for spill id");
1497 }
1498 } else if (!ctx.remat_used.count(it->get()) || ctx.remat_used[it->get()]) {
1499 instructions.emplace_back(std::move(*it));
1500 }
1501
1502 }
1503 block.instructions = std::move(instructions);
1504 }
1505
1506 /* SSA elimination inserts copies for logical phis right before p_logical_end
1507 * So if a linear vgpr is used between that p_logical_end and the branch,
1508 * we need to ensure logical phis don't choose a definition which aliases
1509 * the linear vgpr.
1510 * TODO: Moving the spills and reloads to before p_logical_end might produce
1511 * slightly better code. */
1512 for (Block& block : ctx.program->blocks) {
1513 /* loops exits are already handled */
1514 if (block.logical_preds.size() <= 1)
1515 continue;
1516
1517 bool has_logical_phis = false;
1518 for (aco_ptr<Instruction>& instr : block.instructions) {
1519 if (instr->opcode == aco_opcode::p_phi) {
1520 has_logical_phis = true;
1521 break;
1522 } else if (instr->opcode != aco_opcode::p_linear_phi) {
1523 break;
1524 }
1525 }
1526 if (!has_logical_phis)
1527 continue;
1528
1529 std::set<Temp> vgprs;
1530 for (unsigned pred_idx : block.logical_preds) {
1531 Block& pred = ctx.program->blocks[pred_idx];
1532 for (int i = pred.instructions.size() - 1; i >= 0; i--) {
1533 aco_ptr<Instruction>& pred_instr = pred.instructions[i];
1534 if (pred_instr->opcode == aco_opcode::p_logical_end) {
1535 break;
1536 } else if (pred_instr->opcode == aco_opcode::p_spill ||
1537 pred_instr->opcode == aco_opcode::p_reload) {
1538 vgprs.insert(pred_instr->operands[0].getTemp());
1539 }
1540 }
1541 }
1542 if (!vgprs.size())
1543 continue;
1544
1545 aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)};
1546 int k = 0;
1547 for (Temp tmp : vgprs) {
1548 destr->operands[k++] = Operand(tmp);
1549 }
1550 /* find insertion point */
1551 std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1552 while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
1553 ++it;
1554 block.instructions.insert(it, std::move(destr));
1555 }
1556 }
1557
1558 } /* end namespace */
1559
1560
1561 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options)
1562 {
1563 program->config->spilled_vgprs = 0;
1564 program->config->spilled_sgprs = 0;
1565
1566 /* no spilling when wave count is already high */
1567 if (program->num_waves >= 6)
1568 return;
1569
1570 /* else, we check if we can improve things a bit */
1571
1572 /* calculate target register demand */
1573 RegisterDemand max_reg_demand;
1574 for (Block& block : program->blocks) {
1575 max_reg_demand.update(block.register_demand);
1576 }
1577
1578 RegisterDemand target_pressure = {256, int16_t(program->sgpr_limit)};
1579 unsigned num_waves = 1;
1580 int spills_to_vgpr = (max_reg_demand.sgpr - program->sgpr_limit + 63) / 64;
1581
1582 /* test if it possible to increase occupancy with little spilling */
1583 for (unsigned num_waves_next = 2; num_waves_next <= program->max_waves; num_waves_next++) {
1584 RegisterDemand target_pressure_next = {int16_t((256 / num_waves_next) & ~3),
1585 int16_t(get_addr_sgpr_from_waves(program, num_waves_next))};
1586
1587 /* Currently no vgpr spilling supported.
1588 * Spill as many sgprs as necessary to not hinder occupancy */
1589 if (max_reg_demand.vgpr > target_pressure_next.vgpr)
1590 break;
1591 /* check that we have enough free vgprs to spill sgprs to */
1592 if (max_reg_demand.sgpr > target_pressure_next.sgpr) {
1593 /* add some buffer in case graph coloring is not perfect ... */
1594 const int spills_to_vgpr_next = (max_reg_demand.sgpr - target_pressure_next.sgpr + 63 + 32) / 64;
1595 if (spills_to_vgpr_next + max_reg_demand.vgpr > target_pressure_next.vgpr)
1596 break;
1597 spills_to_vgpr = spills_to_vgpr_next;
1598 }
1599
1600 target_pressure = target_pressure_next;
1601 num_waves = num_waves_next;
1602 }
1603
1604 assert(max_reg_demand.vgpr <= target_pressure.vgpr && "VGPR spilling not yet supported.");
1605 /* nothing to do */
1606 if (num_waves == program->num_waves)
1607 return;
1608
1609 /* initialize ctx */
1610 spill_ctx ctx(target_pressure, program, live_vars.register_demand);
1611 compute_global_next_uses(ctx, live_vars.live_out);
1612 get_rematerialize_info(ctx);
1613
1614 /* create spills and reloads */
1615 for (unsigned i = 0; i < program->blocks.size(); i++)
1616 spill_block(ctx, i);
1617
1618 /* assign spill slots and DCE rematerialized code */
1619 assign_spill_slots(ctx, spills_to_vgpr);
1620
1621 /* update live variable information */
1622 live_vars = live_var_analysis(program, options);
1623
1624 assert(program->num_waves >= num_waves);
1625 }
1626
1627 }
1628