2 * Copyright © 2018 Valve Corporation
3 * Copyright © 2018 Google
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 #include "vulkan/radv_shader.h"
33 * Implements the spilling algorithm on SSA-form from
34 * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
35 * by Matthias Braun and Sebastian Hack.
47 RegisterDemand target_pressure
;
49 std::vector
<std::vector
<RegisterDemand
>> register_demand
;
50 std::vector
<std::map
<Temp
, Temp
>> renames
;
51 std::vector
<std::map
<Temp
, uint32_t>> spills_entry
;
52 std::vector
<std::map
<Temp
, uint32_t>> spills_exit
;
53 std::vector
<bool> processed
;
54 std::stack
<Block
*> loop_header
;
55 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_start
;
56 std::vector
<std::map
<Temp
, std::pair
<uint32_t, uint32_t>>> next_use_distances_end
;
57 std::vector
<std::pair
<RegClass
, std::set
<uint32_t>>> interferences
;
58 std::vector
<std::pair
<uint32_t, uint32_t>> affinities
;
59 std::vector
<bool> is_reloaded
;
60 std::map
<Temp
, remat_info
> remat
;
61 std::map
<Instruction
*, bool> remat_used
;
63 spill_ctx(const RegisterDemand target_pressure
, Program
* program
,
64 std::vector
<std::vector
<RegisterDemand
>> register_demand
)
65 : target_pressure(target_pressure
), program(program
),
66 register_demand(register_demand
), renames(program
->blocks
.size()),
67 spills_entry(program
->blocks
.size()), spills_exit(program
->blocks
.size()),
68 processed(program
->blocks
.size(), false) {}
70 uint32_t allocate_spill_id(RegClass rc
)
72 interferences
.emplace_back(rc
, std::set
<uint32_t>());
73 is_reloaded
.push_back(false);
74 return next_spill_id
++;
77 uint32_t next_spill_id
= 0;
80 int32_t get_dominator(int idx_a
, int idx_b
, Program
* program
, bool is_linear
)
88 while (idx_a
!= idx_b
) {
90 idx_a
= program
->blocks
[idx_a
].linear_idom
;
92 idx_b
= program
->blocks
[idx_b
].linear_idom
;
95 while (idx_a
!= idx_b
) {
97 idx_a
= program
->blocks
[idx_a
].logical_idom
;
99 idx_b
= program
->blocks
[idx_b
].logical_idom
;
106 void next_uses_per_block(spill_ctx
& ctx
, unsigned block_idx
, std::set
<uint32_t>& worklist
)
108 Block
* block
= &ctx
.program
->blocks
[block_idx
];
109 std::map
<Temp
, std::pair
<uint32_t, uint32_t>> next_uses
= ctx
.next_use_distances_end
[block_idx
];
111 /* to compute the next use distance at the beginning of the block, we have to add the block's size */
112 for (std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= next_uses
.begin(); it
!= next_uses
.end();) {
113 it
->second
.second
= it
->second
.second
+ block
->instructions
.size();
115 /* remove the live out exec mask as we really don't want to spill it */
116 if (it
->first
== block
->live_out_exec
)
117 it
= next_uses
.erase(it
);
122 int idx
= block
->instructions
.size() - 1;
124 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
126 if (instr
->opcode
== aco_opcode::p_linear_phi
||
127 instr
->opcode
== aco_opcode::p_phi
)
130 for (const Definition
& def
: instr
->definitions
) {
132 next_uses
.erase(def
.getTemp());
135 for (const Operand
& op
: instr
->operands
) {
137 if (op
.isFixed() && op
.physReg() == exec
)
140 next_uses
[op
.getTemp()] = {block_idx
, idx
};
145 assert(block_idx
!= 0 || next_uses
.empty());
146 ctx
.next_use_distances_start
[block_idx
] = next_uses
;
148 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
149 assert(instr
->opcode
== aco_opcode::p_linear_phi
|| instr
->opcode
== aco_opcode::p_phi
);
151 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
152 unsigned pred_idx
= instr
->opcode
== aco_opcode::p_phi
?
153 block
->logical_preds
[i
] :
154 block
->linear_preds
[i
];
155 if (instr
->operands
[i
].isTemp()) {
156 if (ctx
.next_use_distances_end
[pred_idx
].find(instr
->operands
[i
].getTemp()) == ctx
.next_use_distances_end
[pred_idx
].end() ||
157 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] != std::pair
<uint32_t, uint32_t>{block_idx
, 0})
158 worklist
.insert(pred_idx
);
159 ctx
.next_use_distances_end
[pred_idx
][instr
->operands
[i
].getTemp()] = {block_idx
, 0};
162 next_uses
.erase(instr
->definitions
[0].getTemp());
166 /* all remaining live vars must be live-out at the predecessors */
167 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: next_uses
) {
168 Temp temp
= pair
.first
;
169 uint32_t distance
= pair
.second
.second
;
170 uint32_t dom
= pair
.second
.first
;
171 std::vector
<unsigned>& preds
= temp
.is_linear() ? block
->linear_preds
: block
->logical_preds
;
172 for (unsigned pred_idx
: preds
) {
173 if (ctx
.program
->blocks
[pred_idx
].loop_nest_depth
> block
->loop_nest_depth
)
175 if (ctx
.next_use_distances_end
[pred_idx
].find(temp
) != ctx
.next_use_distances_end
[pred_idx
].end()) {
176 dom
= get_dominator(dom
, ctx
.next_use_distances_end
[pred_idx
][temp
].first
, ctx
.program
, temp
.is_linear());
177 distance
= std::min(ctx
.next_use_distances_end
[pred_idx
][temp
].second
, distance
);
179 if (ctx
.next_use_distances_end
[pred_idx
][temp
] != std::pair
<uint32_t, uint32_t>{dom
, distance
})
180 worklist
.insert(pred_idx
);
181 ctx
.next_use_distances_end
[pred_idx
][temp
] = {dom
, distance
};
187 void compute_global_next_uses(spill_ctx
& ctx
, std::vector
<std::set
<Temp
>>& live_out
)
189 ctx
.next_use_distances_start
.resize(ctx
.program
->blocks
.size());
190 ctx
.next_use_distances_end
.resize(ctx
.program
->blocks
.size());
191 std::set
<uint32_t> worklist
;
192 for (Block
& block
: ctx
.program
->blocks
)
193 worklist
.insert(block
.index
);
195 while (!worklist
.empty()) {
196 std::set
<unsigned>::reverse_iterator b_it
= worklist
.rbegin();
197 unsigned block_idx
= *b_it
;
198 worklist
.erase(block_idx
);
199 next_uses_per_block(ctx
, block_idx
, worklist
);
203 bool should_rematerialize(aco_ptr
<Instruction
>& instr
)
205 /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
206 if (instr
->format
!= Format::VOP1
&& instr
->format
!= Format::SOP1
&& instr
->format
!= Format::PSEUDO
)
208 /* TODO: pseudo-instruction rematerialization is only supported for p_create_vector */
209 if (instr
->format
== Format::PSEUDO
&& instr
->opcode
!= aco_opcode::p_create_vector
)
212 for (const Operand
& op
: instr
->operands
) {
213 /* TODO: rematerialization using temporaries isn't yet supported */
218 /* TODO: rematerialization with multiple definitions isn't yet supported */
219 if (instr
->definitions
.size() > 1)
225 aco_ptr
<Instruction
> do_reload(spill_ctx
& ctx
, Temp tmp
, Temp new_name
, uint32_t spill_id
)
227 std::map
<Temp
, remat_info
>::iterator remat
= ctx
.remat
.find(tmp
);
228 if (remat
!= ctx
.remat
.end()) {
229 Instruction
*instr
= remat
->second
.instr
;
230 assert((instr
->format
== Format::VOP1
|| instr
->format
== Format::SOP1
|| instr
->format
== Format::PSEUDO
) && "unsupported");
231 assert((instr
->format
!= Format::PSEUDO
|| instr
->opcode
== aco_opcode::p_create_vector
) && "unsupported");
232 assert(instr
->definitions
.size() == 1 && "unsupported");
234 aco_ptr
<Instruction
> res
;
235 if (instr
->format
== Format::VOP1
) {
236 res
.reset(create_instruction
<VOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
237 } else if (instr
->format
== Format::SOP1
) {
238 res
.reset(create_instruction
<SOP1_instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
239 } else if (instr
->format
== Format::PSEUDO
) {
240 res
.reset(create_instruction
<Instruction
>(instr
->opcode
, instr
->format
, instr
->operands
.size(), instr
->definitions
.size()));
242 for (unsigned i
= 0; i
< instr
->operands
.size(); i
++) {
243 res
->operands
[i
] = instr
->operands
[i
];
244 if (instr
->operands
[i
].isTemp()) {
245 assert(false && "unsupported");
246 if (ctx
.remat
.count(instr
->operands
[i
].getTemp()))
247 ctx
.remat_used
[ctx
.remat
[instr
->operands
[i
].getTemp()].instr
] = true;
250 res
->definitions
[0] = Definition(new_name
);
253 aco_ptr
<Pseudo_instruction
> reload
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 1, 1)};
254 reload
->operands
[0] = Operand(spill_id
);
255 reload
->definitions
[0] = Definition(new_name
);
256 ctx
.is_reloaded
[spill_id
] = true;
261 void get_rematerialize_info(spill_ctx
& ctx
)
263 for (Block
& block
: ctx
.program
->blocks
) {
264 bool logical
= false;
265 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
266 if (instr
->opcode
== aco_opcode::p_logical_start
)
268 else if (instr
->opcode
== aco_opcode::p_logical_end
)
270 if (logical
&& should_rematerialize(instr
)) {
271 for (const Definition
& def
: instr
->definitions
) {
273 ctx
.remat
[def
.getTemp()] = (remat_info
){instr
.get()};
274 ctx
.remat_used
[instr
.get()] = false;
282 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(spill_ctx
& ctx
, Block
* block
)
284 std::vector
<std::map
<Temp
, uint32_t>> local_next_uses(block
->instructions
.size());
286 std::map
<Temp
, uint32_t> next_uses
;
287 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block
->index
]) {
288 /* omit live out exec mask */
289 if (pair
.first
== block
->live_out_exec
)
292 next_uses
[pair
.first
] = pair
.second
.second
+ block
->instructions
.size();
295 for (int idx
= block
->instructions
.size() - 1; idx
>= 0; idx
--) {
296 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
299 if (instr
->opcode
== aco_opcode::p_phi
|| instr
->opcode
== aco_opcode::p_linear_phi
)
302 for (const Operand
& op
: instr
->operands
) {
303 if (op
.isFixed() && op
.physReg() == exec
)
306 next_uses
[op
.getTemp()] = idx
;
308 for (const Definition
& def
: instr
->definitions
) {
310 next_uses
.erase(def
.getTemp());
312 local_next_uses
[idx
] = next_uses
;
314 return local_next_uses
;
318 RegisterDemand
init_live_in_vars(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
320 RegisterDemand spilled_registers
;
322 /* first block, nothing was spilled before */
326 /* loop header block */
327 if (block
->loop_nest_depth
> ctx
.program
->blocks
[block_idx
- 1].loop_nest_depth
) {
328 assert(block
->linear_preds
[0] == block_idx
- 1);
329 assert(block
->logical_preds
[0] == block_idx
- 1);
331 /* create new loop_info */
332 ctx
.loop_header
.emplace(block
);
334 /* check how many live-through variables should be spilled */
335 RegisterDemand new_demand
;
336 unsigned i
= block_idx
;
337 while (ctx
.program
->blocks
[i
].loop_nest_depth
>= block
->loop_nest_depth
) {
338 assert(ctx
.program
->blocks
.size() > i
);
339 new_demand
.update(ctx
.program
->blocks
[i
].register_demand
);
342 unsigned loop_end
= i
;
344 /* select live-through vgpr variables */
345 while (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
346 unsigned distance
= 0;
348 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
349 if (pair
.first
.type() == RegType::vgpr
&&
350 pair
.second
.first
>= loop_end
&&
351 pair
.second
.second
> distance
&&
352 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
353 to_spill
= pair
.first
;
354 distance
= pair
.second
.second
;
361 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
362 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
364 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
367 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
368 spilled_registers
.vgpr
+= to_spill
.size();
371 /* select live-through sgpr variables */
372 while (new_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
373 unsigned distance
= 0;
375 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_end
[block_idx
- 1]) {
376 if (pair
.first
.type() == RegType::sgpr
&&
377 pair
.second
.first
>= loop_end
&&
378 pair
.second
.second
> distance
&&
379 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
380 to_spill
= pair
.first
;
381 distance
= pair
.second
.second
;
388 if (ctx
.spills_exit
[block_idx
- 1].find(to_spill
) == ctx
.spills_exit
[block_idx
- 1].end()) {
389 spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
391 spill_id
= ctx
.spills_exit
[block_idx
- 1][to_spill
];
394 ctx
.spills_entry
[block_idx
][to_spill
] = spill_id
;
395 spilled_registers
.sgpr
+= to_spill
.size();
401 if (!RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
))
402 return spilled_registers
;
404 /* if reg pressure is too high at beginning of loop, add variables with furthest use */
406 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
|| block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
)
409 assert(idx
!= 0 && "loop without phis: TODO");
411 RegisterDemand reg_pressure
= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
412 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
413 unsigned distance
= 0;
415 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
416 if (pair
.first
.type() == RegType::sgpr
&&
417 pair
.second
.second
> distance
&&
418 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
419 to_spill
= pair
.first
;
420 distance
= pair
.second
.second
;
423 assert(distance
!= 0);
425 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
426 spilled_registers
.sgpr
+= to_spill
.size();
427 reg_pressure
.sgpr
-= to_spill
.size();
429 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
430 unsigned distance
= 0;
432 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
433 if (pair
.first
.type() == RegType::vgpr
&&
434 pair
.second
.second
> distance
&&
435 ctx
.spills_entry
[block_idx
].find(pair
.first
) == ctx
.spills_entry
[block_idx
].end()) {
436 to_spill
= pair
.first
;
437 distance
= pair
.second
.second
;
440 assert(distance
!= 0);
441 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
442 spilled_registers
.vgpr
+= to_spill
.size();
443 reg_pressure
.vgpr
-= to_spill
.size();
446 return spilled_registers
;
450 if (block
->linear_preds
.size() == 1) {
451 /* keep variables spilled if they are alive and not used in the current block */
452 unsigned pred_idx
= block
->linear_preds
[0];
453 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
454 if (pair
.first
.type() == RegType::sgpr
&&
455 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
456 ctx
.next_use_distances_start
[block_idx
][pair
.first
].second
> block_idx
) {
457 ctx
.spills_entry
[block_idx
].insert(pair
);
458 spilled_registers
.sgpr
+= pair
.first
.size();
461 if (block
->logical_preds
.size() == 1) {
462 pred_idx
= block
->logical_preds
[0];
463 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
464 if (pair
.first
.type() == RegType::vgpr
&&
465 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
466 ctx
.next_use_distances_end
[pred_idx
][pair
.first
].second
> block_idx
) {
467 ctx
.spills_entry
[block_idx
].insert(pair
);
468 spilled_registers
.vgpr
+= pair
.first
.size();
473 /* if register demand is still too high, we just keep all spilled live vars and process the block */
474 if (block
->register_demand
.sgpr
- spilled_registers
.sgpr
> ctx
.target_pressure
.sgpr
) {
475 pred_idx
= block
->linear_preds
[0];
476 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
477 if (pair
.first
.type() == RegType::sgpr
&&
478 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
479 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
480 spilled_registers
.sgpr
+= pair
.first
.size();
484 if (block
->register_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
&& block
->logical_preds
.size() == 1) {
485 pred_idx
= block
->logical_preds
[0];
486 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
487 if (pair
.first
.type() == RegType::vgpr
&&
488 ctx
.next_use_distances_start
[block_idx
].find(pair
.first
) != ctx
.next_use_distances_start
[block_idx
].end() &&
489 ctx
.spills_entry
[block_idx
].insert(pair
).second
) {
490 spilled_registers
.vgpr
+= pair
.first
.size();
495 return spilled_registers
;
498 /* else: merge block */
499 std::set
<Temp
> partial_spills
;
501 /* keep variables spilled on all incoming paths */
502 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
503 std::vector
<unsigned>& preds
= pair
.first
.type() == RegType::vgpr
? block
->logical_preds
: block
->linear_preds
;
504 /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload it.
505 * Otherwise, if any predecessor reloads it, ensure it's reloaded on all other predecessors.
506 * The idea is that it's better in practice to rematerialize redundantly than to create lots of phis. */
507 /* TODO: test this idea with more than Dawn of War III shaders (the current pipeline-db doesn't seem to exercise this path much) */
508 bool remat
= ctx
.remat
.count(pair
.first
);
510 uint32_t spill_id
= 0;
511 for (unsigned pred_idx
: preds
) {
512 /* variable is not even live at the predecessor: probably from a phi */
513 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end()) {
517 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
521 partial_spills
.insert(pair
.first
);
522 /* it might be that on one incoming path, the variable has a different spill_id, but add_couple_code() will take care of that. */
523 spill_id
= ctx
.spills_exit
[pred_idx
][pair
.first
];
529 ctx
.spills_entry
[block_idx
][pair
.first
] = spill_id
;
530 partial_spills
.erase(pair
.first
);
531 spilled_registers
+= pair
.first
;
537 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
||
538 block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
) {
539 aco_ptr
<Instruction
>& phi
= block
->instructions
[idx
];
540 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
543 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
544 if (phi
->operands
[i
].isUndefined())
546 assert(phi
->operands
[i
].isTemp());
547 if (ctx
.spills_exit
[preds
[i
]].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[preds
[i
]].end())
550 partial_spills
.insert(phi
->definitions
[0].getTemp());
553 ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()] = ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
554 partial_spills
.erase(phi
->definitions
[0].getTemp());
555 spilled_registers
+= phi
->definitions
[0].getTemp();
561 /* if reg pressure at first instruction is still too high, add partially spilled variables */
562 RegisterDemand reg_pressure
;
564 for (const Definition
& def
: block
->instructions
[idx
]->definitions
) {
566 reg_pressure
-= def
.getTemp();
569 for (const Operand
& op
: block
->instructions
[idx
]->operands
) {
570 if (op
.isTemp() && op
.isFirstKill()) {
571 reg_pressure
+= op
.getTemp();
577 reg_pressure
+= ctx
.register_demand
[block_idx
][idx
] - spilled_registers
;
579 while (reg_pressure
.sgpr
> ctx
.target_pressure
.sgpr
) {
580 assert(!partial_spills
.empty());
582 std::set
<Temp
>::iterator it
= partial_spills
.begin();
584 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
585 while (it
!= partial_spills
.end()) {
586 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
588 if (it
->type() == RegType::sgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
589 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
594 assert(distance
!= 0);
596 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
597 partial_spills
.erase(to_spill
);
598 spilled_registers
.sgpr
+= to_spill
.size();
599 reg_pressure
.sgpr
-= to_spill
.size();
602 while (reg_pressure
.vgpr
> ctx
.target_pressure
.vgpr
) {
603 assert(!partial_spills
.empty());
605 std::set
<Temp
>::iterator it
= partial_spills
.begin();
607 unsigned distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
608 while (it
!= partial_spills
.end()) {
609 assert(ctx
.spills_entry
[block_idx
].find(*it
) == ctx
.spills_entry
[block_idx
].end());
611 if (it
->type() == RegType::vgpr
&& ctx
.next_use_distances_start
[block_idx
][*it
].second
> distance
) {
612 distance
= ctx
.next_use_distances_start
[block_idx
][*it
].second
;
617 assert(distance
!= 0);
619 ctx
.spills_entry
[block_idx
][to_spill
] = ctx
.allocate_spill_id(to_spill
.regClass());
620 partial_spills
.erase(to_spill
);
621 spilled_registers
.vgpr
+= to_spill
.size();
622 reg_pressure
.vgpr
-= to_spill
.size();
625 return spilled_registers
;
629 void add_coupling_code(spill_ctx
& ctx
, Block
* block
, unsigned block_idx
)
631 /* no coupling code necessary */
632 if (block
->linear_preds
.size() == 0)
635 std::vector
<aco_ptr
<Instruction
>> instructions
;
636 /* branch block: TODO take other branch into consideration */
637 if (block
->linear_preds
.size() == 1) {
638 assert(ctx
.processed
[block
->linear_preds
[0]]);
640 if (block
->logical_preds
.size() == 1) {
641 unsigned pred_idx
= block
->logical_preds
[0];
642 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
643 if (live
.first
.type() == RegType::sgpr
)
646 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
649 /* in register at end of predecessor */
650 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
651 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
652 if (it
!= ctx
.renames
[pred_idx
].end())
653 ctx
.renames
[block_idx
].insert(*it
);
657 /* variable is spilled at predecessor and live at current block: create reload instruction */
658 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
659 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
660 instructions
.emplace_back(std::move(reload
));
661 ctx
.renames
[block_idx
][live
.first
] = new_name
;
665 unsigned pred_idx
= block
->linear_preds
[0];
666 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> live
: ctx
.next_use_distances_start
[block_idx
]) {
667 if (live
.first
.type() == RegType::vgpr
)
670 if (ctx
.spills_entry
[block_idx
].find(live
.first
) != ctx
.spills_entry
[block_idx
].end())
673 /* in register at end of predecessor */
674 if (ctx
.spills_exit
[pred_idx
].find(live
.first
) == ctx
.spills_exit
[pred_idx
].end()) {
675 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(live
.first
);
676 if (it
!= ctx
.renames
[pred_idx
].end())
677 ctx
.renames
[block_idx
].insert(*it
);
681 /* variable is spilled at predecessor and live at current block: create reload instruction */
682 Temp new_name
= {ctx
.program
->allocateId(), live
.first
.regClass()};
683 aco_ptr
<Instruction
> reload
= do_reload(ctx
, live
.first
, new_name
, ctx
.spills_exit
[pred_idx
][live
.first
]);
684 instructions
.emplace_back(std::move(reload
));
685 ctx
.renames
[block_idx
][live
.first
] = new_name
;
688 /* combine new reload instructions with original block */
689 if (!instructions
.empty()) {
690 unsigned insert_idx
= 0;
691 while (block
->instructions
[insert_idx
]->opcode
== aco_opcode::p_phi
||
692 block
->instructions
[insert_idx
]->opcode
== aco_opcode::p_linear_phi
) {
695 ctx
.register_demand
[block
->index
].insert(std::next(ctx
.register_demand
[block
->index
].begin(), insert_idx
),
696 instructions
.size(), RegisterDemand());
697 block
->instructions
.insert(std::next(block
->instructions
.begin(), insert_idx
),
698 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(instructions
.begin()),
699 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(instructions
.end()));
704 /* loop header and merge blocks: check if all (linear) predecessors have been processed */
705 for (ASSERTED
unsigned pred
: block
->linear_preds
)
706 assert(ctx
.processed
[pred
]);
708 /* iterate the phi nodes for which operands to spill at the predecessor */
709 for (aco_ptr
<Instruction
>& phi
: block
->instructions
) {
710 if (phi
->opcode
!= aco_opcode::p_phi
&&
711 phi
->opcode
!= aco_opcode::p_linear_phi
)
714 /* if the phi is not spilled, add to instructions */
715 if (ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end()) {
716 instructions
.emplace_back(std::move(phi
));
720 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
721 uint32_t def_spill_id
= ctx
.spills_entry
[block_idx
][phi
->definitions
[0].getTemp()];
723 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
724 if (phi
->operands
[i
].isUndefined())
727 unsigned pred_idx
= preds
[i
];
728 assert(phi
->operands
[i
].isTemp() && phi
->operands
[i
].isKill());
729 Temp var
= phi
->operands
[i
].getTemp();
731 /* build interferences between the phi def and all spilled variables at the predecessor blocks */
732 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[pred_idx
]) {
733 if (var
== pair
.first
)
735 ctx
.interferences
[def_spill_id
].second
.emplace(pair
.second
);
736 ctx
.interferences
[pair
.second
].second
.emplace(def_spill_id
);
739 /* check if variable is already spilled at predecessor */
740 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(var
);
741 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
742 if (spilled
->second
!= def_spill_id
)
743 ctx
.affinities
.emplace_back(std::pair
<uint32_t, uint32_t>{def_spill_id
, spilled
->second
});
747 /* rename if necessary */
748 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
749 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
750 var
= rename_it
->second
;
751 ctx
.renames
[pred_idx
].erase(rename_it
);
754 uint32_t spill_id
= ctx
.allocate_spill_id(phi
->definitions
[0].regClass());
755 ctx
.affinities
.emplace_back(std::pair
<uint32_t, uint32_t>{def_spill_id
, spill_id
});
756 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
757 spill
->operands
[0] = Operand(var
);
758 spill
->operands
[1] = Operand(spill_id
);
759 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
760 unsigned idx
= pred
.instructions
.size();
764 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
765 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
766 pred
.instructions
.insert(it
, std::move(spill
));
767 ctx
.spills_exit
[pred_idx
][phi
->operands
[i
].getTemp()] = spill_id
;
770 /* remove phi from instructions */
774 /* iterate all (other) spilled variables for which to spill at the predecessor */
775 // TODO: would be better to have them sorted: first vgprs and first with longest distance
776 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_entry
[block_idx
]) {
777 std::vector
<unsigned> preds
= pair
.first
.type() == RegType::vgpr
? block
->logical_preds
: block
->linear_preds
;
779 for (unsigned pred_idx
: preds
) {
780 /* add interferences between spilled variable and predecessors exit spills */
781 for (std::pair
<Temp
, uint32_t> exit_spill
: ctx
.spills_exit
[pred_idx
]) {
782 if (exit_spill
.first
== pair
.first
)
784 ctx
.interferences
[exit_spill
.second
].second
.emplace(pair
.second
);
785 ctx
.interferences
[pair
.second
].second
.emplace(exit_spill
.second
);
788 /* variable is already spilled at predecessor */
789 std::map
<Temp
, uint32_t>::iterator spilled
= ctx
.spills_exit
[pred_idx
].find(pair
.first
);
790 if (spilled
!= ctx
.spills_exit
[pred_idx
].end()) {
791 if (spilled
->second
!= pair
.second
)
792 ctx
.affinities
.emplace_back(std::pair
<uint32_t, uint32_t>{pair
.second
, spilled
->second
});
796 /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */
797 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
800 /* variable is in register at predecessor and has to be spilled */
801 /* rename if necessary */
802 Temp var
= pair
.first
;
803 std::map
<Temp
, Temp
>::iterator rename_it
= ctx
.renames
[pred_idx
].find(var
);
804 if (rename_it
!= ctx
.renames
[pred_idx
].end()) {
805 var
= rename_it
->second
;
806 ctx
.renames
[pred_idx
].erase(rename_it
);
809 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
810 spill
->operands
[0] = Operand(var
);
811 spill
->operands
[1] = Operand(pair
.second
);
812 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
813 unsigned idx
= pred
.instructions
.size();
817 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
818 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
819 pred
.instructions
.insert(it
, std::move(spill
));
820 ctx
.spills_exit
[pred
.index
][pair
.first
] = pair
.second
;
824 /* iterate phis for which operands to reload */
825 for (aco_ptr
<Instruction
>& phi
: instructions
) {
826 assert(phi
->opcode
== aco_opcode::p_phi
|| phi
->opcode
== aco_opcode::p_linear_phi
);
827 assert(ctx
.spills_entry
[block_idx
].find(phi
->definitions
[0].getTemp()) == ctx
.spills_entry
[block_idx
].end());
829 std::vector
<unsigned>& preds
= phi
->opcode
== aco_opcode::p_phi
? block
->logical_preds
: block
->linear_preds
;
830 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
831 if (!phi
->operands
[i
].isTemp())
833 unsigned pred_idx
= preds
[i
];
836 if (ctx
.spills_exit
[pred_idx
].find(phi
->operands
[i
].getTemp()) == ctx
.spills_exit
[pred_idx
].end()) {
837 std::map
<Temp
, Temp
>::iterator it
= ctx
.renames
[pred_idx
].find(phi
->operands
[i
].getTemp());
838 if (it
!= ctx
.renames
[pred_idx
].end())
839 phi
->operands
[i
].setTemp(it
->second
);
843 Temp tmp
= phi
->operands
[i
].getTemp();
845 /* reload phi operand at end of predecessor block */
846 Temp new_name
= {ctx
.program
->allocateId(), tmp
.regClass()};
847 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
848 unsigned idx
= pred
.instructions
.size();
852 } while (phi
->opcode
== aco_opcode::p_phi
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
853 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
855 aco_ptr
<Instruction
> reload
= do_reload(ctx
, tmp
, new_name
, ctx
.spills_exit
[pred_idx
][tmp
]);
856 pred
.instructions
.insert(it
, std::move(reload
));
858 ctx
.spills_exit
[pred_idx
].erase(tmp
);
859 ctx
.renames
[pred_idx
][tmp
] = new_name
;
860 phi
->operands
[i
].setTemp(new_name
);
864 /* iterate live variables for which to reload */
865 // TODO: reload at current block if variable is spilled on all predecessors
866 for (std::pair
<Temp
, std::pair
<uint32_t, uint32_t>> pair
: ctx
.next_use_distances_start
[block_idx
]) {
867 /* skip spilled variables */
868 if (ctx
.spills_entry
[block_idx
].find(pair
.first
) != ctx
.spills_entry
[block_idx
].end())
870 std::vector
<unsigned> preds
= pair
.first
.type() == RegType::vgpr
? block
->logical_preds
: block
->linear_preds
;
872 /* variable is dead at predecessor, it must be from a phi */
873 bool is_dead
= false;
874 for (unsigned pred_idx
: preds
) {
875 if (ctx
.next_use_distances_end
[pred_idx
].find(pair
.first
) == ctx
.next_use_distances_end
[pred_idx
].end())
880 for (unsigned pred_idx
: preds
) {
881 /* the variable is not spilled at the predecessor */
882 if (ctx
.spills_exit
[pred_idx
].find(pair
.first
) == ctx
.spills_exit
[pred_idx
].end())
885 /* variable is spilled at predecessor and has to be reloaded */
886 Temp new_name
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
887 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
888 unsigned idx
= pred
.instructions
.size();
892 } while (pair
.first
.type() == RegType::vgpr
&& pred
.instructions
[idx
]->opcode
!= aco_opcode::p_logical_end
);
893 std::vector
<aco_ptr
<Instruction
>>::iterator it
= std::next(pred
.instructions
.begin(), idx
);
895 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.first
, new_name
, ctx
.spills_exit
[pred
.index
][pair
.first
]);
896 pred
.instructions
.insert(it
, std::move(reload
));
898 ctx
.spills_exit
[pred
.index
].erase(pair
.first
);
899 ctx
.renames
[pred
.index
][pair
.first
] = new_name
;
902 /* check if we have to create a new phi for this variable */
903 Temp rename
= Temp();
905 for (unsigned pred_idx
: preds
) {
906 if (ctx
.renames
[pred_idx
].find(pair
.first
) == ctx
.renames
[pred_idx
].end()) {
907 if (rename
== Temp())
910 is_same
= rename
== pair
.first
;
912 if (rename
== Temp())
913 rename
= ctx
.renames
[pred_idx
][pair
.first
];
915 is_same
= rename
== ctx
.renames
[pred_idx
][pair
.first
];
923 /* the variable was renamed differently in the predecessors: we have to create a phi */
924 aco_opcode opcode
= pair
.first
.type() == RegType::vgpr
? aco_opcode::p_phi
: aco_opcode::p_linear_phi
;
925 aco_ptr
<Pseudo_instruction
> phi
{create_instruction
<Pseudo_instruction
>(opcode
, Format::PSEUDO
, preds
.size(), 1)};
926 rename
= {ctx
.program
->allocateId(), pair
.first
.regClass()};
927 for (unsigned i
= 0; i
< phi
->operands
.size(); i
++) {
929 if (ctx
.renames
[preds
[i
]].find(pair
.first
) != ctx
.renames
[preds
[i
]].end())
930 tmp
= ctx
.renames
[preds
[i
]][pair
.first
];
931 else if (preds
[i
] >= block_idx
)
935 phi
->operands
[i
] = Operand(tmp
);
937 phi
->definitions
[0] = Definition(rename
);
938 instructions
.emplace_back(std::move(phi
));
941 /* the variable was renamed: add new name to renames */
942 if (!(rename
== Temp() || rename
== pair
.first
))
943 ctx
.renames
[block_idx
][pair
.first
] = rename
;
946 /* combine phis with instructions */
948 while (!block
->instructions
[idx
]) {
952 ctx
.register_demand
[block
->index
].erase(ctx
.register_demand
[block
->index
].begin(), ctx
.register_demand
[block
->index
].begin() + idx
);
953 ctx
.register_demand
[block
->index
].insert(ctx
.register_demand
[block
->index
].begin(), instructions
.size(), RegisterDemand());
955 std::vector
<aco_ptr
<Instruction
>>::iterator start
= std::next(block
->instructions
.begin(), idx
);
956 instructions
.insert(instructions
.end(), std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(start
),
957 std::move_iterator
<std::vector
<aco_ptr
<Instruction
>>::iterator
>(block
->instructions
.end()));
958 block
->instructions
= std::move(instructions
);
961 void process_block(spill_ctx
& ctx
, unsigned block_idx
, Block
* block
,
962 std::map
<Temp
, uint32_t> ¤t_spills
, RegisterDemand spilled_registers
)
964 std::vector
<std::map
<Temp
, uint32_t>> local_next_use_distance
;
965 std::vector
<aco_ptr
<Instruction
>> instructions
;
968 /* phis are handled separetely */
969 while (block
->instructions
[idx
]->opcode
== aco_opcode::p_phi
||
970 block
->instructions
[idx
]->opcode
== aco_opcode::p_linear_phi
) {
971 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
972 for (const Operand
& op
: instr
->operands
) {
973 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
974 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
975 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
977 instructions
.emplace_back(std::move(instr
));
981 if (block
->register_demand
.exceeds(ctx
.target_pressure
))
982 local_next_use_distance
= local_next_uses(ctx
, block
);
984 while (idx
< block
->instructions
.size()) {
985 aco_ptr
<Instruction
>& instr
= block
->instructions
[idx
];
987 std::map
<Temp
, std::pair
<Temp
, uint32_t>> reloads
;
988 std::map
<Temp
, uint32_t> spills
;
989 /* rename and reload operands */
990 for (Operand
& op
: instr
->operands
) {
993 if (current_spills
.find(op
.getTemp()) == current_spills
.end()) {
994 /* the Operand is in register: check if it was renamed */
995 if (ctx
.renames
[block_idx
].find(op
.getTemp()) != ctx
.renames
[block_idx
].end())
996 op
.setTemp(ctx
.renames
[block_idx
][op
.getTemp()]);
997 /* prevent it's definining instruction from being DCE'd if it could be rematerialized */
998 if (ctx
.remat
.count(op
.getTemp()))
999 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1002 /* the Operand is spilled: add it to reloads */
1003 Temp new_tmp
= {ctx
.program
->allocateId(), op
.regClass()};
1004 ctx
.renames
[block_idx
][op
.getTemp()] = new_tmp
;
1005 reloads
[new_tmp
] = std::make_pair(op
.getTemp(), current_spills
[op
.getTemp()]);
1006 current_spills
.erase(op
.getTemp());
1007 op
.setTemp(new_tmp
);
1008 spilled_registers
-= new_tmp
;
1011 /* check if register demand is low enough before and after the current instruction */
1012 if (block
->register_demand
.exceeds(ctx
.target_pressure
)) {
1014 RegisterDemand new_demand
= ctx
.register_demand
[block_idx
][idx
];
1016 for (const Definition
& def
: instr
->definitions
) {
1019 new_demand
+= def
.getTemp();
1022 new_demand
.update(ctx
.register_demand
[block_idx
][idx
- 1]);
1025 assert(!local_next_use_distance
.empty());
1027 /* if reg pressure is too high, spill variable with furthest next use */
1028 while (RegisterDemand(new_demand
- spilled_registers
).exceeds(ctx
.target_pressure
)) {
1029 unsigned distance
= 0;
1031 bool do_rematerialize
= false;
1032 if (new_demand
.vgpr
- spilled_registers
.vgpr
> ctx
.target_pressure
.vgpr
) {
1033 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1034 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1035 if (pair
.first
.type() == RegType::vgpr
&&
1036 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1037 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1038 current_spills
.find(pair
.first
) == current_spills
.end() &&
1039 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1040 to_spill
= pair
.first
;
1041 distance
= pair
.second
;
1042 do_rematerialize
= can_rematerialize
;
1046 for (std::pair
<Temp
, uint32_t> pair
: local_next_use_distance
[idx
]) {
1047 bool can_rematerialize
= ctx
.remat
.count(pair
.first
);
1048 if (pair
.first
.type() == RegType::sgpr
&&
1049 ((pair
.second
> distance
&& can_rematerialize
== do_rematerialize
) ||
1050 (can_rematerialize
&& !do_rematerialize
&& pair
.second
> idx
)) &&
1051 current_spills
.find(pair
.first
) == current_spills
.end() &&
1052 ctx
.spills_exit
[block_idx
].find(pair
.first
) == ctx
.spills_exit
[block_idx
].end()) {
1053 to_spill
= pair
.first
;
1054 distance
= pair
.second
;
1055 do_rematerialize
= can_rematerialize
;
1060 assert(distance
!= 0 && distance
> idx
);
1061 uint32_t spill_id
= ctx
.allocate_spill_id(to_spill
.regClass());
1063 /* add interferences with currently spilled variables */
1064 for (std::pair
<Temp
, uint32_t> pair
: current_spills
) {
1065 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
);
1066 ctx
.interferences
[pair
.second
].second
.emplace(spill_id
);
1068 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1069 ctx
.interferences
[spill_id
].second
.emplace(pair
.second
.second
);
1070 ctx
.interferences
[pair
.second
.second
].second
.emplace(spill_id
);
1073 current_spills
[to_spill
] = spill_id
;
1074 spilled_registers
+= to_spill
;
1076 /* rename if necessary */
1077 if (ctx
.renames
[block_idx
].find(to_spill
) != ctx
.renames
[block_idx
].end()) {
1078 to_spill
= ctx
.renames
[block_idx
][to_spill
];
1081 /* add spill to new instructions */
1082 aco_ptr
<Pseudo_instruction
> spill
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 2, 0)};
1083 spill
->operands
[0] = Operand(to_spill
);
1084 spill
->operands
[1] = Operand(spill_id
);
1085 instructions
.emplace_back(std::move(spill
));
1089 /* add reloads and instruction to new instructions */
1090 for (std::pair
<Temp
, std::pair
<Temp
, uint32_t>> pair
: reloads
) {
1091 aco_ptr
<Instruction
> reload
= do_reload(ctx
, pair
.second
.first
, pair
.first
, pair
.second
.second
);
1092 instructions
.emplace_back(std::move(reload
));
1094 instructions
.emplace_back(std::move(instr
));
1098 block
->instructions
= std::move(instructions
);
1099 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1102 void spill_block(spill_ctx
& ctx
, unsigned block_idx
)
1104 Block
* block
= &ctx
.program
->blocks
[block_idx
];
1105 ctx
.processed
[block_idx
] = true;
1107 /* determine set of variables which are spilled at the beginning of the block */
1108 RegisterDemand spilled_registers
= init_live_in_vars(ctx
, block
, block_idx
);
1110 /* add interferences for spilled variables */
1111 for (std::pair
<Temp
, uint32_t> x
: ctx
.spills_entry
[block_idx
]) {
1112 for (std::pair
<Temp
, uint32_t> y
: ctx
.spills_entry
[block_idx
])
1113 if (x
.second
!= y
.second
)
1114 ctx
.interferences
[x
.second
].second
.emplace(y
.second
);
1117 bool is_loop_header
= block
->loop_nest_depth
&& ctx
.loop_header
.top()->index
== block_idx
;
1118 if (!is_loop_header
) {
1119 /* add spill/reload code on incoming control flow edges */
1120 add_coupling_code(ctx
, block
, block_idx
);
1123 std::map
<Temp
, uint32_t> current_spills
= ctx
.spills_entry
[block_idx
];
1125 /* check conditions to process this block */
1126 bool process
= RegisterDemand(block
->register_demand
- spilled_registers
).exceeds(ctx
.target_pressure
) ||
1127 !ctx
.renames
[block_idx
].empty() ||
1128 ctx
.remat_used
.size();
1130 std::map
<Temp
, uint32_t>::iterator it
= current_spills
.begin();
1131 while (!process
&& it
!= current_spills
.end()) {
1132 if (ctx
.next_use_distances_start
[block_idx
][it
->first
].first
== block_idx
)
1138 process_block(ctx
, block_idx
, block
, current_spills
, spilled_registers
);
1140 ctx
.spills_exit
[block_idx
].insert(current_spills
.begin(), current_spills
.end());
1142 /* check if the next block leaves the current loop */
1143 if (block
->loop_nest_depth
== 0 || ctx
.program
->blocks
[block_idx
+ 1].loop_nest_depth
>= block
->loop_nest_depth
)
1146 Block
* loop_header
= ctx
.loop_header
.top();
1148 /* preserve original renames at end of loop header block */
1149 std::map
<Temp
, Temp
> renames
= std::move(ctx
.renames
[loop_header
->index
]);
1151 /* add coupling code to all loop header predecessors */
1152 add_coupling_code(ctx
, loop_header
, loop_header
->index
);
1154 /* update remat_used for phis added in add_coupling_code() */
1155 for (aco_ptr
<Instruction
>& instr
: loop_header
->instructions
) {
1158 for (const Operand
& op
: instr
->operands
) {
1159 if (op
.isTemp() && ctx
.remat
.count(op
.getTemp()))
1160 ctx
.remat_used
[ctx
.remat
[op
.getTemp()].instr
] = true;
1164 /* propagate new renames through loop: i.e. repair the SSA */
1165 renames
.swap(ctx
.renames
[loop_header
->index
]);
1166 for (std::pair
<Temp
, Temp
> rename
: renames
) {
1167 for (unsigned idx
= loop_header
->index
; idx
<= block_idx
; idx
++) {
1168 Block
& current
= ctx
.program
->blocks
[idx
];
1169 std::vector
<aco_ptr
<Instruction
>>::iterator instr_it
= current
.instructions
.begin();
1171 /* first rename phis */
1172 while (instr_it
!= current
.instructions
.end()) {
1173 aco_ptr
<Instruction
>& phi
= *instr_it
;
1174 if (phi
->opcode
!= aco_opcode::p_phi
&& phi
->opcode
!= aco_opcode::p_linear_phi
)
1176 /* no need to rename the loop header phis once again. this happened in add_coupling_code() */
1177 if (idx
== loop_header
->index
) {
1182 for (Operand
& op
: phi
->operands
) {
1185 if (op
.getTemp() == rename
.first
)
1186 op
.setTemp(rename
.second
);
1191 std::map
<Temp
, std::pair
<uint32_t, uint32_t>>::iterator it
= ctx
.next_use_distances_start
[idx
].find(rename
.first
);
1193 /* variable is not live at beginning of this block */
1194 if (it
== ctx
.next_use_distances_start
[idx
].end())
1197 /* if the variable is live at the block's exit, add rename */
1198 if (ctx
.next_use_distances_end
[idx
].find(rename
.first
) != ctx
.next_use_distances_end
[idx
].end())
1199 ctx
.renames
[idx
].insert(rename
);
1201 /* rename all uses in this block */
1202 bool renamed
= false;
1203 while (!renamed
&& instr_it
!= current
.instructions
.end()) {
1204 aco_ptr
<Instruction
>& instr
= *instr_it
;
1205 for (Operand
& op
: instr
->operands
) {
1208 if (op
.getTemp() == rename
.first
) {
1209 op
.setTemp(rename
.second
);
1210 /* we can stop with this block as soon as the variable is spilled */
1211 if (instr
->opcode
== aco_opcode::p_spill
)
1220 /* remove loop header info from stack */
1221 ctx
.loop_header
.pop();
1224 void assign_spill_slots(spill_ctx
& ctx
, unsigned spills_to_vgpr
) {
1225 std::map
<uint32_t, uint32_t> sgpr_slot
;
1226 std::map
<uint32_t, uint32_t> vgpr_slot
;
1227 std::vector
<bool> is_assigned(ctx
.interferences
.size());
1229 /* first, handle affinities: just merge all interferences into both spill ids */
1230 for (std::pair
<uint32_t, uint32_t> pair
: ctx
.affinities
) {
1231 assert(pair
.first
!= pair
.second
);
1232 for (uint32_t id
: ctx
.interferences
[pair
.first
].second
)
1233 ctx
.interferences
[id
].second
.insert(pair
.second
);
1234 for (uint32_t id
: ctx
.interferences
[pair
.second
].second
)
1235 ctx
.interferences
[id
].second
.insert(pair
.first
);
1236 ctx
.interferences
[pair
.first
].second
.insert(ctx
.interferences
[pair
.second
].second
.begin(), ctx
.interferences
[pair
.second
].second
.end());
1237 ctx
.interferences
[pair
.second
].second
.insert(ctx
.interferences
[pair
.first
].second
.begin(), ctx
.interferences
[pair
.first
].second
.end());
1239 bool reloaded
= ctx
.is_reloaded
[pair
.first
] || ctx
.is_reloaded
[pair
.second
];
1240 ctx
.is_reloaded
[pair
.first
] = ctx
.is_reloaded
[pair
.second
] = reloaded
;
1242 for (ASSERTED
uint32_t i
= 0; i
< ctx
.interferences
.size(); i
++)
1243 for (ASSERTED
uint32_t id
: ctx
.interferences
[i
].second
)
1246 /* for each spill slot, assign as many spill ids as possible */
1247 std::vector
<std::set
<uint32_t>> spill_slot_interferences
;
1248 unsigned slot_idx
= 0;
1251 /* assign sgpr spill slots */
1254 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1255 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1257 if (ctx
.interferences
[id
].first
.type() != RegType::sgpr
)
1260 /* check interferences */
1261 bool interferes
= false;
1262 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1263 if (i
== spill_slot_interferences
.size())
1264 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1265 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end() || i
/ 64 != slot_idx
/ 64) {
1275 /* we found a spill id which can be assigned to current spill slot */
1276 sgpr_slot
[id
] = slot_idx
;
1277 is_assigned
[id
] = true;
1278 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1279 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1287 /* assign vgpr spill slots */
1290 for (unsigned id
= 0; id
< ctx
.interferences
.size(); id
++) {
1291 if (is_assigned
[id
] || !ctx
.is_reloaded
[id
])
1293 if (ctx
.interferences
[id
].first
.type() != RegType::vgpr
)
1296 /* check interferences */
1297 bool interferes
= false;
1298 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++) {
1299 if (i
== spill_slot_interferences
.size())
1300 spill_slot_interferences
.emplace_back(std::set
<uint32_t>());
1301 /* check for interference and ensure that vector regs are stored next to each other */
1302 if (spill_slot_interferences
[i
].find(id
) != spill_slot_interferences
[i
].end() || i
/ 64 != slot_idx
/ 64) {
1312 /* we found a spill id which can be assigned to current spill slot */
1313 vgpr_slot
[id
] = slot_idx
;
1314 is_assigned
[id
] = true;
1315 for (unsigned i
= slot_idx
; i
< slot_idx
+ ctx
.interferences
[id
].first
.size(); i
++)
1316 spill_slot_interferences
[i
].insert(ctx
.interferences
[id
].second
.begin(), ctx
.interferences
[id
].second
.end());
1321 for (unsigned id
= 0; id
< is_assigned
.size(); id
++)
1322 assert(is_assigned
[id
] || !ctx
.is_reloaded
[id
]);
1324 for (std::pair
<uint32_t, uint32_t> pair
: ctx
.affinities
) {
1325 assert(is_assigned
[pair
.first
] == is_assigned
[pair
.second
]);
1326 if (!is_assigned
[pair
.first
])
1328 assert(ctx
.is_reloaded
[pair
.first
] == ctx
.is_reloaded
[pair
.second
]);
1329 assert(ctx
.interferences
[pair
.first
].first
.type() == ctx
.interferences
[pair
.second
].first
.type());
1330 if (ctx
.interferences
[pair
.first
].first
.type() == RegType::sgpr
)
1331 assert(sgpr_slot
[pair
.first
] == sgpr_slot
[pair
.second
]);
1333 assert(vgpr_slot
[pair
.first
] == vgpr_slot
[pair
.second
]);
1336 /* hope, we didn't mess up */
1337 std::vector
<Temp
> vgpr_spill_temps((spill_slot_interferences
.size() + 63) / 64);
1338 assert(vgpr_spill_temps
.size() <= spills_to_vgpr
);
1340 /* replace pseudo instructions with actual hardware instructions */
1341 unsigned last_top_level_block_idx
= 0;
1342 std::vector
<bool> reload_in_loop(vgpr_spill_temps
.size());
1343 for (Block
& block
: ctx
.program
->blocks
) {
1345 /* after loops, we insert a user if there was a reload inside the loop */
1346 if (block
.loop_nest_depth
== 0) {
1348 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1349 if (reload_in_loop
[i
])
1353 if (end_vgprs
> 0) {
1354 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, end_vgprs
, 0)};
1356 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1357 if (reload_in_loop
[i
])
1358 destr
->operands
[k
++] = Operand(vgpr_spill_temps
[i
]);
1359 reload_in_loop
[i
] = false;
1361 /* find insertion point */
1362 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1363 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1365 block
.instructions
.insert(it
, std::move(destr
));
1369 if (block
.kind
& block_kind_top_level
&& !block
.linear_preds
.empty()) {
1370 last_top_level_block_idx
= block
.index
;
1372 /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
1373 for (unsigned i
= 0; i
< vgpr_spill_temps
.size(); i
++) {
1374 if (vgpr_spill_temps
[i
] == Temp())
1377 bool can_destroy
= true;
1378 for (std::pair
<Temp
, uint32_t> pair
: ctx
.spills_exit
[block
.linear_preds
[0]]) {
1380 if (sgpr_slot
.find(pair
.second
) != sgpr_slot
.end() &&
1381 sgpr_slot
[pair
.second
] / 64 == i
) {
1382 can_destroy
= false;
1387 vgpr_spill_temps
[i
] = Temp();
1391 std::vector
<aco_ptr
<Instruction
>>::iterator it
;
1392 std::vector
<aco_ptr
<Instruction
>> instructions
;
1393 instructions
.reserve(block
.instructions
.size());
1394 for (it
= block
.instructions
.begin(); it
!= block
.instructions
.end(); ++it
) {
1396 if ((*it
)->opcode
== aco_opcode::p_spill
) {
1397 uint32_t spill_id
= (*it
)->operands
[1].constantValue();
1399 if (!ctx
.is_reloaded
[spill_id
]) {
1400 /* never reloaded, so don't spill */
1401 } else if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1403 ctx
.program
->config
->spilled_vgprs
+= (*it
)->operands
[0].size();
1405 assert(false && "vgpr spilling not yet implemented.");
1406 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1407 ctx
.program
->config
->spilled_sgprs
+= (*it
)->operands
[0].size();
1409 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1411 /* check if the linear vgpr already exists */
1412 if (vgpr_spill_temps
[spill_slot
/ 64] == Temp()) {
1413 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1414 vgpr_spill_temps
[spill_slot
/ 64] = linear_vgpr
;
1415 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1416 create
->definitions
[0] = Definition(linear_vgpr
);
1417 /* find the right place to insert this definition */
1418 if (last_top_level_block_idx
== block
.index
) {
1419 /* insert right before the current instruction */
1420 instructions
.emplace_back(std::move(create
));
1422 assert(last_top_level_block_idx
< block
.index
);
1423 /* insert before the branch at last top level block */
1424 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1425 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1429 /* spill sgpr: just add the vgpr temp to operands */
1430 Pseudo_instruction
* spill
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_spill
, Format::PSEUDO
, 3, 0);
1431 spill
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ 64]);
1432 spill
->operands
[1] = Operand(spill_slot
% 64);
1433 spill
->operands
[2] = (*it
)->operands
[0];
1434 instructions
.emplace_back(aco_ptr
<Instruction
>(spill
));
1436 unreachable("No spill slot assigned for spill id");
1439 } else if ((*it
)->opcode
== aco_opcode::p_reload
) {
1440 uint32_t spill_id
= (*it
)->operands
[0].constantValue();
1441 assert(ctx
.is_reloaded
[spill_id
]);
1443 if (vgpr_slot
.find(spill_id
) != vgpr_slot
.end()) {
1445 assert(false && "vgpr spilling not yet implemented.");
1447 } else if (sgpr_slot
.find(spill_id
) != sgpr_slot
.end()) {
1448 uint32_t spill_slot
= sgpr_slot
[spill_id
];
1449 reload_in_loop
[spill_slot
/ 64] = block
.loop_nest_depth
> 0;
1451 /* check if the linear vgpr already exists */
1452 if (vgpr_spill_temps
[spill_slot
/ 64] == Temp()) {
1453 Temp linear_vgpr
= {ctx
.program
->allocateId(), v1
.as_linear()};
1454 vgpr_spill_temps
[spill_slot
/ 64] = linear_vgpr
;
1455 aco_ptr
<Pseudo_instruction
> create
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_start_linear_vgpr
, Format::PSEUDO
, 0, 1)};
1456 create
->definitions
[0] = Definition(linear_vgpr
);
1457 /* find the right place to insert this definition */
1458 if (last_top_level_block_idx
== block
.index
) {
1459 /* insert right before the current instruction */
1460 instructions
.emplace_back(std::move(create
));
1462 assert(last_top_level_block_idx
< block
.index
);
1463 /* insert before the branch at last top level block */
1464 std::vector
<aco_ptr
<Instruction
>>& instructions
= ctx
.program
->blocks
[last_top_level_block_idx
].instructions
;
1465 instructions
.insert(std::next(instructions
.begin(), instructions
.size() - 1), std::move(create
));
1469 /* reload sgpr: just add the vgpr temp to operands */
1470 Pseudo_instruction
* reload
= create_instruction
<Pseudo_instruction
>(aco_opcode::p_reload
, Format::PSEUDO
, 2, 1);
1471 reload
->operands
[0] = Operand(vgpr_spill_temps
[spill_slot
/ 64]);
1472 reload
->operands
[1] = Operand(spill_slot
% 64);
1473 reload
->definitions
[0] = (*it
)->definitions
[0];
1474 instructions
.emplace_back(aco_ptr
<Instruction
>(reload
));
1476 unreachable("No spill slot assigned for spill id");
1478 } else if (!ctx
.remat_used
.count(it
->get()) || ctx
.remat_used
[it
->get()]) {
1479 instructions
.emplace_back(std::move(*it
));
1483 block
.instructions
= std::move(instructions
);
1486 /* SSA elimination inserts copies for logical phis right before p_logical_end
1487 * So if a linear vgpr is used between that p_logical_end and the branch,
1488 * we need to ensure logical phis don't choose a definition which aliases
1490 * TODO: Moving the spills and reloads to before p_logical_end might produce
1491 * slightly better code. */
1492 for (Block
& block
: ctx
.program
->blocks
) {
1493 /* loops exits are already handled */
1494 if (block
.logical_preds
.size() <= 1)
1497 bool has_logical_phis
= false;
1498 for (aco_ptr
<Instruction
>& instr
: block
.instructions
) {
1499 if (instr
->opcode
== aco_opcode::p_phi
) {
1500 has_logical_phis
= true;
1502 } else if (instr
->opcode
!= aco_opcode::p_linear_phi
) {
1506 if (!has_logical_phis
)
1509 std::set
<Temp
> vgprs
;
1510 for (unsigned pred_idx
: block
.logical_preds
) {
1511 Block
& pred
= ctx
.program
->blocks
[pred_idx
];
1512 for (int i
= pred
.instructions
.size() - 1; i
>= 0; i
--) {
1513 aco_ptr
<Instruction
>& pred_instr
= pred
.instructions
[i
];
1514 if (pred_instr
->opcode
== aco_opcode::p_logical_end
) {
1516 } else if (pred_instr
->opcode
== aco_opcode::p_spill
||
1517 pred_instr
->opcode
== aco_opcode::p_reload
) {
1518 vgprs
.insert(pred_instr
->operands
[0].getTemp());
1525 aco_ptr
<Instruction
> destr
{create_instruction
<Pseudo_instruction
>(aco_opcode::p_end_linear_vgpr
, Format::PSEUDO
, vgprs
.size(), 0)};
1527 for (Temp tmp
: vgprs
) {
1528 destr
->operands
[k
++] = Operand(tmp
);
1530 /* find insertion point */
1531 std::vector
<aco_ptr
<Instruction
>>::iterator it
= block
.instructions
.begin();
1532 while ((*it
)->opcode
== aco_opcode::p_linear_phi
|| (*it
)->opcode
== aco_opcode::p_phi
)
1534 block
.instructions
.insert(it
, std::move(destr
));
1538 } /* end namespace */
1541 void spill(Program
* program
, live
& live_vars
, const struct radv_nir_compiler_options
*options
)
1543 program
->config
->spilled_vgprs
= 0;
1544 program
->config
->spilled_sgprs
= 0;
1546 /* no spilling when wave count is already high */
1547 if (program
->num_waves
>= 6)
1550 /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1551 lower_to_cssa(program
, live_vars
, options
);
1553 /* else, we check if we can improve things a bit */
1555 /* calculate target register demand */
1556 RegisterDemand max_reg_demand
;
1557 for (Block
& block
: program
->blocks
) {
1558 max_reg_demand
.update(block
.register_demand
);
1561 RegisterDemand target_pressure
= {256, int16_t(program
->sgpr_limit
)};
1562 unsigned num_waves
= 1;
1563 int spills_to_vgpr
= (max_reg_demand
.sgpr
- program
->sgpr_limit
+ 63) / 64;
1565 /* test if it possible to increase occupancy with little spilling */
1566 for (unsigned num_waves_next
= 2; num_waves_next
<= program
->max_waves
; num_waves_next
++) {
1567 RegisterDemand target_pressure_next
= {int16_t((256 / num_waves_next
) & ~3),
1568 int16_t(get_addr_sgpr_from_waves(program
, num_waves_next
))};
1570 /* Currently no vgpr spilling supported.
1571 * Spill as many sgprs as necessary to not hinder occupancy */
1572 if (max_reg_demand
.vgpr
> target_pressure_next
.vgpr
)
1574 /* check that we have enough free vgprs to spill sgprs to */
1575 if (max_reg_demand
.sgpr
> target_pressure_next
.sgpr
) {
1576 /* add some buffer in case graph coloring is not perfect ... */
1577 const int spills_to_vgpr_next
= (max_reg_demand
.sgpr
- target_pressure_next
.sgpr
+ 63 + 32) / 64;
1578 if (spills_to_vgpr_next
+ max_reg_demand
.vgpr
> target_pressure_next
.vgpr
)
1580 spills_to_vgpr
= spills_to_vgpr_next
;
1583 target_pressure
= target_pressure_next
;
1584 num_waves
= num_waves_next
;
1587 assert(max_reg_demand
.vgpr
<= target_pressure
.vgpr
&& "VGPR spilling not yet supported.");
1589 if (num_waves
== program
->num_waves
)
1592 /* initialize ctx */
1593 spill_ctx
ctx(target_pressure
, program
, live_vars
.register_demand
);
1594 compute_global_next_uses(ctx
, live_vars
.live_out
);
1595 get_rematerialize_info(ctx
);
1597 /* create spills and reloads */
1598 for (unsigned i
= 0; i
< program
->blocks
.size(); i
++)
1599 spill_block(ctx
, i
);
1601 /* assign spill slots and DCE rematerialized code */
1602 assign_spill_slots(ctx
, spills_to_vgpr
);
1604 /* update live variable information */
1605 live_vars
= live_var_analysis(program
, options
);
1607 assert(program
->num_waves
>= num_waves
);